diff options
author | Jörg Wagner <jorwag@google.com> | 2023-08-01 13:38:22 +0000 |
---|---|---|
committer | Jörg Wagner <jorwag@google.com> | 2023-08-03 09:29:34 +0000 |
commit | dacf004cc8a4b35f5a0fb5fb67246f9cc8fdaafb (patch) | |
tree | 07484dccba43bb2c2a07626c00154751f318bd47 /mali_kbase/backend | |
parent | bce5281a0408a175137c08dc93028e2a2c0fb69b (diff) | |
download | gpu-dacf004cc8a4b35f5a0fb5fb67246f9cc8fdaafb.tar.gz |
Update KMD to 'mini release: update r44p1-01bet1 to r44p1-00dev2'
Provenance: ipdelivery@d10c137c7691a470b8b33786aec4965315db4561
Change-Id: I4fbcc669d3b8e36c8288c91fdddd8b79258b6635
Diffstat (limited to 'mali_kbase/backend')
18 files changed, 460 insertions, 273 deletions
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index f821a6f..a06b15d 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -22,7 +22,6 @@ mali_kbase-y += \ backend/gpu/mali_kbase_cache_policy_backend.o \ backend/gpu/mali_kbase_gpuprops_backend.o \ backend/gpu/mali_kbase_irq_linux.o \ - backend/gpu/mali_kbase_js_backend.o \ backend/gpu/mali_kbase_pm_backend.o \ backend/gpu/mali_kbase_pm_driver.o \ backend/gpu/mali_kbase_pm_metrics.o \ @@ -40,7 +39,8 @@ ifeq ($(MALI_USE_CSF),0) backend/gpu/mali_kbase_jm_as.o \ backend/gpu/mali_kbase_debug_job_fault_backend.o \ backend/gpu/mali_kbase_jm_hw.o \ - backend/gpu/mali_kbase_jm_rb.o + backend/gpu/mali_kbase_jm_rb.o \ + backend/gpu/mali_kbase_js_backend.o endif diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c index 7c0abba..86539d5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,12 +43,12 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, kbdev->current_gpu_coherency_mode = mode; if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); - kbase_reg_write(kbdev, AMBA_ENABLE, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); } else - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); + kbase_reg_write(kbdev, GPU_CONTROL_REG(COHERENCY_ENABLE), mode); } u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) @@ -69,24 +69,12 @@ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, bool enable) { if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); } else { WARN(1, "memory_cache_support not supported"); } } - -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) -{ - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); - - val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); - } else { - WARN(1, "invalidate_hint not supported"); - } -} diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 8cd8090..0103695 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,13 +53,4 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); */ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, bool enable); -/** - * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint - * in the GPU. - * @kbdev: Device pointer - * @enable: true for enable. - * - * Note: Only for arch version 12.x.1 onwards. - */ -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index ddd03ca..912bac5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) return callbacks; - arbiter_if_node = - of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ if (arbiter_if_node) callbacks = &arb_clk_rate_trace_ops; @@ -241,7 +243,8 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) if (!clk_rtm->clk_rate_trace_ops) return; - spin_lock_irqsave(&clk_rtm->lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&clk_rtm->lock); for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { struct kbase_clk_data *clk_data = clk_rtm->clks[i]; @@ -257,7 +260,8 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) } clk_rtm->gpu_idle = false; - spin_unlock_irqrestore(&clk_rtm->lock, flags); + spin_unlock(&clk_rtm->lock); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c index e121b41..cd3b29d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,7 +59,7 @@ static int job_slot_reg_snapshot[] = { JS_CONFIG_NEXT }; -/*MMU_REG(r)*/ +/*MMU_CONTROL_REG(r)*/ static int mmu_reg_snapshot[] = { MMU_IRQ_MASK, MMU_IRQ_STATUS @@ -118,15 +118,14 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, /* get the MMU registers*/ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + kctx->reg_dump[offset] = MMU_CONTROL_REG(mmu_reg_snapshot[i]); offset += 2; } /* get the Address space registers*/ for (j = 0; j < as_number; j++) { for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - MMU_AS_REG(j, as_reg_snapshot[i]); + kctx->reg_dump[offset] = MMU_STAGE1_REG(MMU_AS_REG(j, as_reg_snapshot[i])); offset += 2; } } diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index ef09c6b..b95277c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -99,7 +99,7 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) atomic_inc(&kbdev->faults_pending); - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); #ifdef CONFIG_MALI_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) @@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -310,7 +310,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -344,8 +344,8 @@ static int kbasep_common_test_interrupt( break; case MMU_IRQ_TAG: test_handler = kbase_mmu_irq_test_handler; - rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); - mask_offset = MMU_REG(MMU_IRQ_MASK); + rawstat_offset = MMU_CONTROL_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_CONTROL_REG(MMU_IRQ_MASK); break; case GPU_IRQ_TAG: /* already tested by pm_driver - bail out */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 7df2173..be1da4a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -574,7 +574,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) count += nr_done; while (nr_done) { - if (nr_done == 1) { + if (likely(nr_done == 1)) { kbase_gpu_complete_hw(kbdev, i, completion_code, job_tail, @@ -593,6 +593,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) BASE_JD_EVENT_DONE, 0, &end_timestamp); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /* Increment the end timestamp value by 1 ns to + * avoid having the same value for 'start_time_ns' + * and 'end_time_ns' for the 2nd atom whose job + * completion IRQ got merged with the 1st atom. + */ + end_timestamp = ktime_add(end_timestamp, ns_to_ktime(1)); +#endif } nr_done--; } @@ -1052,12 +1060,12 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index bfd55a6..380a530 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -47,7 +47,7 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%u", js); + (void)scnprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 7db2b35..efef482 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -32,6 +32,9 @@ #include <hwcnt/mali_kbase_hwcnt_context.h> #include <mali_kbase_reset_gpu.h> #include <mali_kbase_kinstr_jm.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#endif #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_jm_internal.h> @@ -274,6 +277,59 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); } +/** + * trace_atom_completion_for_gpu_metrics - Report the completion of atom for the + * purpose of emitting power/gpu_work_period + * tracepoint. + * + * @katom: Pointer to the atom that completed execution on GPU. + * @end_timestamp: Pointer to the timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * The function would also report the start for an atom that was in the HEAD_NEXT + * register. + * + * Note: Caller must hold the HW access lock. + */ +static inline void trace_atom_completion_for_gpu_metrics( + struct kbase_jd_atom *const katom, + ktime_t *end_timestamp) +{ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + u64 complete_ns; + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_atom *queued = + kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1); + +#ifdef CONFIG_MALI_DEBUG + WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0)); +#endif + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (unlikely(queued == katom)) + return; + + /* A protected atom and a non-protected atom cannot be in the RB_SUBMITTED + * state at the same time in the job slot ringbuffer. Atom submission state + * machine prevents the submission of a non-protected atom until all + * protected atoms have completed and GPU has exited the protected mode. + * This implies that if the queued atom is in RB_SUBMITTED state, it shall + * be a protected atom and so we can return early. + */ + if (unlikely(kbase_jd_katom_is_protected(katom))) + return; + + if (likely(end_timestamp)) + complete_ns = ktime_to_ns(*end_timestamp); + else + complete_ns = ktime_get_raw_ns(); + + kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns); +#endif +} static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, @@ -290,6 +346,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: + trace_atom_completion_for_gpu_metrics(katom, end_timestamp); kbase_kinstr_jm_atom_hw_release(katom); /* Inform power management at start/finish of atom so it can * update its GPU utilisation metrics. Mark atom as not @@ -866,6 +923,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + bool trace_atom_submit_for_gpu_metrics = true; +#endif int ret; if (!katom[idx]) @@ -976,12 +1036,21 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) case KBASE_ATOM_GPU_RB_READY: if (idx == 1) { + enum kbase_atom_gpu_rb_state atom_0_gpu_rb_state = + katom[0]->gpu_rb_state; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + trace_atom_submit_for_gpu_metrics = + (atom_0_gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB); +#endif + /* Only submit if head atom or previous * atom already submitted */ - if ((katom[0]->gpu_rb_state != + if ((atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != + atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) break; @@ -1019,6 +1088,13 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) /* Inform platform at start/finish of atom */ kbasep_platform_event_atom_submit(katom[idx]); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (likely(trace_atom_submit_for_gpu_metrics && + !kbase_jd_katom_is_protected(katom[idx]))) + kbase_gpu_metrics_ctx_start_activity( + katom[idx]->kctx, + ktime_to_ns(katom[idx]->start_timestamp)); +#endif } else { if (katom[idx]->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter_nolock(kbdev); diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index 0ed04bb..ff4e114 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,28 +28,18 @@ #include <mali_kbase_reset_gpu.h> #include <backend/gpu/mali_kbase_jm_internal.h> #include <backend/gpu/mali_kbase_js_internal.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> + +#endif -#if !MALI_USE_CSF /* * Hold the runpool_mutex for this */ -static inline bool timer_callback_should_run(struct kbase_device *kbdev) +static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs) { - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - int nr_running_ctxs; - lockdep_assert_held(&kbdev->js_data.runpool_mutex); - /* Timer must stop if we are suspending */ - if (backend->suspend_timer) - return false; - - /* nr_contexts_pullable is updated with the runpool_mutex. However, the - * locking in the caller gives us a barrier that ensures - * nr_contexts_pullable is up-to-date for reading - */ - nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); - #ifdef CONFIG_MALI_DEBUG if (kbdev->js_data.softstop_always) { /* Debug support for allowing soft-stop on a single context */ @@ -273,18 +263,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } -#endif /* !MALI_USE_CSF */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_backend_data *backend = &kbdev->hwaccess.backend; unsigned long flags; + /* Timer must stop if we are suspending */ + const bool suspend_timer = backend->suspend_timer; + const int nr_running_ctxs = + atomic_read(&kbdev->js_data.nr_contexts_runnable); lockdep_assert_held(&js_devdata->runpool_mutex); - if (!timer_callback_should_run(kbdev)) { + if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; @@ -298,7 +290,8 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) hrtimer_cancel(&backend->scheduling_timer); } - if (timer_callback_should_run(kbdev) && !backend->timer_running) { + if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) && + !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = true; @@ -309,36 +302,59 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (unlikely(suspend_timer)) { + js_devdata->gpu_metrics_timer_needed = false; + /* Cancel the timer as System suspend is happening */ + hrtimer_cancel(&js_devdata->gpu_metrics_timer); + js_devdata->gpu_metrics_timer_running = false; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Explicitly emit the tracepoint on System suspend */ + kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns()); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + if (!nr_running_ctxs) { + /* Just set the flag to not restart the timer on expiry */ + js_devdata->gpu_metrics_timer_needed = false; + return; + } + + /* There are runnable contexts so the timer is needed */ + if (!js_devdata->gpu_metrics_timer_needed) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + js_devdata->gpu_metrics_timer_needed = true; + /* No need to restart the timer if it is already running. */ + if (!js_devdata->gpu_metrics_timer_running) { + hrtimer_start(&js_devdata->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_emit_interval()), + HRTIMER_MODE_REL); + js_devdata->gpu_metrics_timer_running = true; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif } int kbase_backend_timer_init(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; backend->timer_running = false; -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ return 0; } void kbase_backend_timer_term(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_cancel(&backend->scheduling_timer); -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ } void kbase_backend_timer_suspend(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c index 9ce5075..6eedc00 100644 --- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,9 @@ * */ +#include <linux/version_compat_defs.h> + #include <mali_kbase.h> -#include <mali_kbase_bits.h> #include <mali_kbase_config_defaults.h> #include <device/mali_kbase_device.h> #include "mali_kbase_l2_mmu_config.h" diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index dd16fb2..46bcdc7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -484,13 +484,6 @@ void *gpu_device_get_data(void *model) #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 -/* SCons should pass in a default GPU, but other ways of building (e.g. - * in-tree) won't, so define one here in case. - */ -#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU -#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" -#endif - static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; module_param(no_mali_gpu, charp, 0000); MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); @@ -1378,10 +1371,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->l2_config = value; } #if MALI_USE_CSF - else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && - addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + - (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { - if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) + else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET && + addr < CSF_HW_DOORBELL_PAGE_OFFSET + + (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + if (addr == CSF_HW_DOORBELL_PAGE_OFFSET) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { @@ -1409,13 +1402,13 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) } } #endif - else if (addr == MMU_REG(MMU_IRQ_MASK)) { + else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { hw_error_status.mmu_irq_mask = value; - } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); - } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) && + (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_COMMAND: @@ -1926,10 +1919,9 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { *value = 0; - } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) - && addr <= MMU_AS_REG(15, AS_STATUS)) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) && + addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_TRANSTAB_LO: @@ -1973,11 +1965,11 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = 0; break; } - } else if (addr == MMU_REG(MMU_IRQ_MASK)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { *value = hw_error_status.mmu_irq_mask; - } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) { *value = hw_error_status.mmu_irq_rawstat; - } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) { *value = hw_error_status.mmu_irq_mask & hw_error_status.mmu_irq_rawstat; } @@ -1985,8 +1977,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) else if (addr == IPA_CONTROL_REG(STATUS)) { *value = (ipa_control_timer_enabled << 31); } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; bool is_low_word = @@ -1995,8 +1986,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; bool is_low_word = @@ -2005,8 +1995,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; bool is_low_word = @@ -2015,8 +2004,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; bool is_low_word = @@ -2214,16 +2202,3 @@ int gpu_model_control(void *model, return 0; } - -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c index e90e4df..67e00e9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,8 +95,7 @@ static void serve_mmu_irq(struct work_struct *work) if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { u32 val; - while ((val = kbase_reg_read(kbdev, - MMU_REG(MMU_IRQ_STATUS)))) { + while ((val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)))) { /* Handle the IRQ */ kbase_mmu_interrupt(kbdev, val); } @@ -156,7 +155,7 @@ KBASE_EXPORT_TEST_API(kbase_reg_write); u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { unsigned long flags; - u32 val; + u32 val = 0; spin_lock_irqsave(&kbdev->reg_op_lock, flags); midgard_model_read_reg(kbdev->model, offset, &val); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index ad86cae..f31711d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,6 +154,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.backend.gpu_powered = false; kbdev->pm.backend.gpu_ready = false; kbdev->pm.suspending = false; + kbdev->pm.resuming = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_set_gpu_lost(kbdev, false); #endif @@ -575,11 +576,13 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; - int ret = 0; + int ret; WARN_ON(kbdev->pm.active_count); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) + return ret; kbase_pm_lock(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -664,25 +667,6 @@ unlock_hwaccess: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -static bool is_poweroff_in_progress(struct kbase_device *kbdev) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) -{ - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev)); -} -KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); - /** * is_gpu_powered_down - Check whether GPU is powered down * @@ -936,7 +920,13 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) { +#if !MALI_USE_CSF + kbase_backend_timer_resume(kbdev); +#endif /* !MALI_USE_CSF */ + return ret; + } #endif WARN_ON(kbdev->pm.backend.gpu_powered); @@ -952,6 +942,8 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) { kbase_pm_lock(kbdev); + /* System resume callback has begun */ + kbdev->pm.resuming = true; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { @@ -966,7 +958,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) kbase_backend_timer_resume(kbdev); #endif /* !MALI_USE_CSF */ - wake_up_all(&kbdev->pm.resume_wait); kbase_pm_unlock(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 0caf63e..aab4106 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,9 +50,6 @@ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include <arbiter/mali_kbase_arbiter_pm.h> #endif /* CONFIG_MALI_ARBITER_SUPPORT */ -#if MALI_USE_CSF -#include <csf/ipa_control/mali_kbase_csf_ipa_control.h> -#endif #if MALI_USE_CSF #include <linux/delay.h> @@ -698,8 +695,8 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & - AS_STATUS_AS_ACTIVE_INT) + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) & + AS_STATUS_AS_ACTIVE_INT) ; if (!WARN_ON_ONCE(max_loops == 0)) @@ -2315,11 +2312,11 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) #define PM_TIMEOUT_MS (5000) /* 5s */ #endif -static void kbase_pm_timed_out(struct kbase_device *kbdev) +static void kbase_pm_timed_out(struct kbase_device *kbdev, const char *timeout_msg) { unsigned long flags; - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "%s", timeout_msg); #if !MALI_USE_CSF CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); @@ -2405,7 +2402,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info( @@ -2445,11 +2442,11 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for power transition timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info(kbdev->dev, - "Wait for desired PM state got interrupted"); + "Wait for power transition got interrupted"); err = (int)remaining; } @@ -2504,7 +2501,7 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info( @@ -2517,6 +2514,46 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) } #endif +static bool is_poweroff_wait_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbdev->pm.backend.poweroff_wait_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + long remaining; +#if MALI_USE_CSF + /* gpu_poweroff_wait_work would be subjected to the kernel scheduling + * and so the wait time can't only be the function of GPU frequency. + */ + const unsigned int extra_wait_time_ms = 2000; + const long timeout = + kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + + extra_wait_time_ms); +#else + const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); +#endif + int err = 0; + + remaining = wait_event_timeout( + kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), timeout); + if (!remaining) { + kbase_pm_timed_out(kbdev, "Wait for poweroff work timed out"); + err = -ETIMEDOUT; + } + + return err; +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2534,12 +2571,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); #if MALI_USE_CSF /* Enable only the Page fault bits part */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFF); #else - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFFFFFF); #endif } @@ -2559,8 +2596,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); } void kbase_pm_disable_interrupts(struct kbase_device *kbdev) @@ -2977,9 +3014,13 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { - dev_info(kbdev->dev, - "Found quirks_gpu = [0x%x] in Devicetree\n", + /* Read the "-" versions of the properties and fall back to + * the "_" versions if these are not found + */ + + if (!of_property_read_u32(np, "quirks-gpu", &kbdev->hw_quirks_gpu) || + !of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { + dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n", kbdev->hw_quirks_gpu); } else { error = kbase_set_gpu_quirks(kbdev, prod_id); @@ -2987,33 +3028,30 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) return error; } - if (!of_property_read_u32(np, "quirks_sc", - &kbdev->hw_quirks_sc)) { - dev_info(kbdev->dev, - "Found quirks_sc = [0x%x] in Devicetree\n", - kbdev->hw_quirks_sc); + if (!of_property_read_u32(np, "quirks-sc", &kbdev->hw_quirks_sc) || + !of_property_read_u32(np, "quirks_sc", &kbdev->hw_quirks_sc)) { + dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n", + kbdev->hw_quirks_sc); } else { error = kbase_set_sc_quirks(kbdev, prod_id); if (error) return error; } - if (!of_property_read_u32(np, "quirks_tiler", - &kbdev->hw_quirks_tiler)) { - dev_info(kbdev->dev, - "Found quirks_tiler = [0x%x] in Devicetree\n", - kbdev->hw_quirks_tiler); + if (!of_property_read_u32(np, "quirks-tiler", &kbdev->hw_quirks_tiler) || + !of_property_read_u32(np, "quirks_tiler", &kbdev->hw_quirks_tiler)) { + dev_info(kbdev->dev, "Found quirks_tiler = [0x%x] in Devicetree\n", + kbdev->hw_quirks_tiler); } else { error = kbase_set_tiler_quirks(kbdev); if (error) return error; } - if (!of_property_read_u32(np, "quirks_mmu", - &kbdev->hw_quirks_mmu)) { - dev_info(kbdev->dev, - "Found quirks_mmu = [0x%x] in Devicetree\n", - kbdev->hw_quirks_mmu); + if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) || + !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) { + dev_info(kbdev->dev, "Found quirks_mmu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_mmu); } else { error = kbase_set_mmu_quirks(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index cdc51d5..e999f9f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -467,8 +467,10 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); * This function effectively just waits for the @gpu_poweroff_wait_work work * item to complete, if it was enqueued. GPU may not have been powered down * before this function returns. + * + * Return: 0 on success, error code on error */ -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); /** * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete @@ -857,6 +859,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev) { bool in_desired_state = true; + lockdep_assert_held(&kbdev->hwaccess_lock); + if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON) in_desired_state = false; else if (!kbase_pm_is_mcu_desired(kbdev) && diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index d2979e8..39a05e7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,7 +53,9 @@ void kbase_pm_policy_init(struct kbase_device *kbdev) unsigned long flags; int i; - if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { + /* Read "power-policy" property and fallback to "power_policy" if not found */ + if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) || + (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) { for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { default_policy = all_policy_list[i]; @@ -294,6 +296,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, bool reset_gpu = false; bool reset_op_prevented = true; struct kbase_csf_scheduler *scheduler = NULL; + u32 pwroff; + bool switching_to_always_on; #endif KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -302,6 +306,16 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); #if MALI_USE_CSF + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops; + if (pwroff == 0 && !switching_to_always_on) { + dev_warn(kbdev->dev, + "power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n"); + dev_warn(kbdev->dev, + "power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n"); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); + } + scheduler = &kbdev->csf.scheduler; KBASE_DEBUG_ASSERT(scheduler != NULL); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 7a4d662..28365c0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -29,6 +29,39 @@ #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> +#include <linux/version_compat_defs.h> + +struct kbase_timeout_info { + char *selector_str; + u64 timeout_cycles; +}; + +#if MALI_USE_CSF +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES, + CSF_FIRMWARE_PING_TIMEOUT_CYCLES) }, + [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, + [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, + [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", + CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", + CSF_FIRMWARE_PING_TIMEOUT_CYCLES }, + [CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT", + DEFAULT_PROGRESS_TIMEOUT_CYCLES }, + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT", + KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES }, +}; +#else +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT", + JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES }, +}; +#endif void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, @@ -108,94 +141,130 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, #endif } -unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, - enum kbase_timeout_selector selector) +static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev) +{ + u64 freq_khz = kbdev->lowest_gpu_freq_khz; + + if (!freq_khz) { + dev_dbg(kbdev->dev, + "Lowest frequency uninitialized! Using reference frequency for scaling"); + return DEFAULT_REF_TIMEOUT_FREQ_KHZ; + } + + return freq_khz; +} + +void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + unsigned int timeout_ms) { + char *selector_str; + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + selector_str = timeout_info[selector].selector_str; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + selector_str = timeout_info[selector].selector_str; + + kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms; + dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms); +} + +void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + u64 timeout_cycles, u32 cycle_multiplier) +{ + u64 final_cycles; + u64 timeout; + u64 freq_khz = kbase_device_get_scaling_frequency(kbdev); + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + + /* If the multiplication overflows, we will have unsigned wrap-around, and so might + * end up with a shorter timeout. In those cases, we then want to have the largest + * timeout possible that will not run into these issues. Note that this will not + * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX + * milliseconds by subsequent steps. + */ + if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles)) + final_cycles = U64_MAX; + /* Timeout calculation: * dividing number of cycles by freq in KHz automatically gives value * in milliseconds. nr_cycles will have to be multiplied by 1e3 to * get result in microseconds, and 1e6 to get result in nanoseconds. */ + timeout = div_u64(final_cycles, freq_khz); + + if (unlikely(timeout > UINT_MAX)) { + dev_dbg(kbdev->dev, + "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", + timeout, timeout_info[selector].selector_str, + kbase_device_get_scaling_frequency(kbdev)); + timeout = UINT_MAX; + } - u64 timeout, nr_cycles = 0; - u64 freq_khz; + kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout); +} - /* Only for debug messages, safe default in case it's mis-maintained */ - const char *selector_str = "(unknown)"; +/** + * kbase_timeout_scaling_init - Initialize the table of scaled timeout + * values associated with a @kbase_device. + * + * @kbdev: KBase device pointer. + * + * Return: 0 on success, negative error code otherwise. + */ +static int kbase_timeout_scaling_init(struct kbase_device *kbdev) +{ + int err; + enum kbase_timeout_selector selector; - if (!kbdev->lowest_gpu_freq_khz) { - dev_dbg(kbdev->dev, - "Lowest frequency uninitialized! Using reference frequency for scaling"); - freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - } else { - freq_khz = kbdev->lowest_gpu_freq_khz; + /* First, we initialize the minimum and maximum device frequencies, which + * are used to compute the timeouts. + */ + err = kbase_pm_gpu_freq_init(kbdev); + if (unlikely(err < 0)) { + dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n"); + return err; } - switch (selector) { - case MMU_AS_INACTIVE_WAIT_TIMEOUT: - selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; - nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; - break; - case KBASE_TIMEOUT_SELECTOR_COUNT: - default: -#if !MALI_USE_CSF - WARN(1, "Invalid timeout selector used! Using default value"); - nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; - break; - case JM_DEFAULT_JS_FREE_TIMEOUT: - selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; - nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; - break; -#else - /* Use Firmware timeout if invalid selection */ - WARN(1, - "Invalid timeout selector used! Using CSF Firmware timeout"); - fallthrough; - case CSF_FIRMWARE_TIMEOUT: - selector_str = "CSF_FIRMWARE_TIMEOUT"; - /* Any FW timeout cannot be longer than the FW ping interval, after which - * the firmware_aliveness_monitor will be triggered and may restart - * the GPU if the FW is unresponsive. + dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n"); + for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) { + u32 cycle_multiplier = 1; + u64 nr_cycles = timeout_info[selector].timeout_cycles; +#if MALI_USE_CSF + /* Special case: the scheduler progress timeout can be set manually, + * and does not have a canonical length defined in the headers. Hence, + * we query it once upon startup to get a baseline, and change it upon + * every invocation of the appropriate functions */ - nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); - - if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) - dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", - selector_str); - break; - case CSF_PM_TIMEOUT: - selector_str = "CSF_PM_TIMEOUT"; - nr_cycles = CSF_PM_TIMEOUT_CYCLES; - break; - case CSF_GPU_RESET_TIMEOUT: - selector_str = "CSF_GPU_RESET_TIMEOUT"; - nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; - break; - case CSF_CSG_SUSPEND_TIMEOUT: - selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; - nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_BOOT_TIMEOUT: - selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_PING_TIMEOUT: - selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; - break; - case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: - selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; - nr_cycles = kbase_csf_timeout_get(kbdev); - break; + if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT) + nr_cycles = kbase_csf_timeout_get(kbdev); #endif + + /* Since we are in control of the iteration bounds for the selector, + * we don't have to worry about bounds checking when setting the timeout. + */ + kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier); } + return 0; +} - timeout = div_u64(nr_cycles, freq_khz); - if (WARN(timeout > UINT_MAX, - "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", - (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) - timeout = UINT_MAX; - return (unsigned int)timeout; +unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector) +{ + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n"); + selector = KBASE_DEFAULT_TIMEOUT; + } + + return kbdev->backend_time.device_scaled_timeouts[selector]; } KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); @@ -247,18 +316,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t int kbase_backend_time_init(struct kbase_device *kbdev) { + int err = 0; #if MALI_USE_CSF u64 cpu_ts = 0; u64 gpu_ts = 0; u64 freq; u64 common_factor; + kbase_pm_register_access_enable(kbdev); get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); freq = arch_timer_get_cntfrq(); if (!freq) { dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } common_factor = gcd(NSEC_PER_SEC, freq); @@ -268,12 +340,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev) if (!kbdev->backend_time.divisor) { dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor); #endif - return 0; + if (kbase_timeout_scaling_init(kbdev)) { + dev_warn(kbdev->dev, "Could not initialize timeout scaling"); + err = -EINVAL; + } + +#if MALI_USE_CSF +disable_registers: + kbase_pm_register_access_disable(kbdev); +#endif + + return err; } |