diff options
author | Jörg Wagner <jorwag@google.com> | 2023-08-31 19:15:13 +0000 |
---|---|---|
committer | Jörg Wagner <jorwag@google.com> | 2023-09-01 09:13:55 +0000 |
commit | b6fd708b3a4da86a196a61592ea3585f1aca7313 (patch) | |
tree | 1cbe3029a45bf9869c17a5b6954e5ae074b44ac8 /mali_kbase/backend | |
parent | 46edf1b5965d872c5f8a09c6dc3dcbff58f78a92 (diff) | |
parent | e61eb93296e9f940b32d4ad4b0c3a5557cbeaf17 (diff) | |
download | gpu-b6fd708b3a4da86a196a61592ea3585f1aca7313.tar.gz |
Merge r44p1-00dev3 from partner/upstream into android13-gs-pixel-5.10-udc-qpr1
Bug: 290882327
Change-Id: I90723cbaa3f294431087587fd8025f0688e51bf2
Diffstat (limited to 'mali_kbase/backend')
18 files changed, 603 insertions, 359 deletions
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index 7df24c3..c37cc59 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -22,7 +22,6 @@ mali_kbase-y += \ backend/gpu/mali_kbase_cache_policy_backend.o \ backend/gpu/mali_kbase_gpuprops_backend.o \ backend/gpu/mali_kbase_irq_linux.o \ - backend/gpu/mali_kbase_js_backend.o \ backend/gpu/mali_kbase_pm_backend.o \ backend/gpu/mali_kbase_pm_driver.o \ backend/gpu/mali_kbase_pm_metrics.o \ @@ -42,7 +41,8 @@ ifeq ($(MALI_USE_CSF),0) backend/gpu/mali_kbase_jm_as.o \ backend/gpu/mali_kbase_debug_job_fault_backend.o \ backend/gpu/mali_kbase_jm_hw.o \ - backend/gpu/mali_kbase_jm_rb.o + backend/gpu/mali_kbase_jm_rb.o \ + backend/gpu/mali_kbase_js_backend.o endif diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c index 7c0abba..86539d5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,12 +43,12 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, kbdev->current_gpu_coherency_mode = mode; if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); - kbase_reg_write(kbdev, AMBA_ENABLE, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); } else - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); + kbase_reg_write(kbdev, GPU_CONTROL_REG(COHERENCY_ENABLE), mode); } u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) @@ -69,24 +69,12 @@ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, bool enable) { if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); } else { WARN(1, "memory_cache_support not supported"); } } - -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) -{ - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); - - val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); - } else { - WARN(1, "invalidate_hint not supported"); - } -} diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 8cd8090..0103695 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,13 +53,4 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); */ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, bool enable); -/** - * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint - * in the GPU. - * @kbdev: Device pointer - * @enable: true for enable. - * - * Note: Only for arch version 12.x.1 onwards. - */ -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index 8d09347..cca4f74 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) return callbacks; - arbiter_if_node = - of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ if (arbiter_if_node) callbacks = &arb_clk_rate_trace_ops; @@ -241,8 +243,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) if (!clk_rtm->clk_rate_trace_ops) return; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_lock(&clk_rtm->lock); + spin_lock_irqsave(&clk_rtm->lock, flags); for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { struct kbase_clk_data *clk_data = clk_rtm->clks[i]; @@ -258,8 +259,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) } clk_rtm->gpu_idle = false; - spin_unlock(&clk_rtm->lock); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&clk_rtm->lock, flags); } void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c index e121b41..cd3b29d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,7 +59,7 @@ static int job_slot_reg_snapshot[] = { JS_CONFIG_NEXT }; -/*MMU_REG(r)*/ +/*MMU_CONTROL_REG(r)*/ static int mmu_reg_snapshot[] = { MMU_IRQ_MASK, MMU_IRQ_STATUS @@ -118,15 +118,14 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, /* get the MMU registers*/ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + kctx->reg_dump[offset] = MMU_CONTROL_REG(mmu_reg_snapshot[i]); offset += 2; } /* get the Address space registers*/ for (j = 0; j < as_number; j++) { for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - MMU_AS_REG(j, as_reg_snapshot[i]); + kctx->reg_dump[offset] = MMU_STAGE1_REG(MMU_AS_REG(j, as_reg_snapshot[i])); offset += 2; } } diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index ef09c6b..b95277c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -99,7 +99,7 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) atomic_inc(&kbdev->faults_pending); - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); #ifdef CONFIG_MALI_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) @@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -310,7 +310,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -344,8 +344,8 @@ static int kbasep_common_test_interrupt( break; case MMU_IRQ_TAG: test_handler = kbase_mmu_irq_test_handler; - rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); - mask_offset = MMU_REG(MMU_IRQ_MASK); + rawstat_offset = MMU_CONTROL_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_CONTROL_REG(MMU_IRQ_MASK); break; case GPU_IRQ_TAG: /* already tested by pm_driver - bail out */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 72926bc..dd8f4d9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -585,7 +585,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) count += nr_done; while (nr_done) { - if (nr_done == 1) { + if (likely(nr_done == 1)) { kbase_gpu_complete_hw(kbdev, i, completion_code, job_tail, @@ -604,6 +604,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) BASE_JD_EVENT_DONE, 0, &end_timestamp); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /* Increment the end timestamp value by 1 ns to + * avoid having the same value for 'start_time_ns' + * and 'end_time_ns' for the 2nd atom whose job + * completion IRQ got merged with the 1st atom. + */ + end_timestamp = ktime_add(end_timestamp, ns_to_ktime(1)); +#endif } nr_done--; } @@ -1061,12 +1069,12 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index bfd55a6..380a530 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -47,7 +47,7 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%u", js); + (void)scnprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index f4094a3..66f068a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -32,6 +32,9 @@ #include <hwcnt/mali_kbase_hwcnt_context.h> #include <mali_kbase_reset_gpu.h> #include <mali_kbase_kinstr_jm.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#endif #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_jm_internal.h> @@ -274,6 +277,59 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); } +/** + * trace_atom_completion_for_gpu_metrics - Report the completion of atom for the + * purpose of emitting power/gpu_work_period + * tracepoint. + * + * @katom: Pointer to the atom that completed execution on GPU. + * @end_timestamp: Pointer to the timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * The function would also report the start for an atom that was in the HEAD_NEXT + * register. + * + * Note: Caller must hold the HW access lock. + */ +static inline void trace_atom_completion_for_gpu_metrics( + struct kbase_jd_atom *const katom, + ktime_t *end_timestamp) +{ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + u64 complete_ns; + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_atom *queued = + kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1); + +#ifdef CONFIG_MALI_DEBUG + WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0)); +#endif + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (unlikely(queued == katom)) + return; + + /* A protected atom and a non-protected atom cannot be in the RB_SUBMITTED + * state at the same time in the job slot ringbuffer. Atom submission state + * machine prevents the submission of a non-protected atom until all + * protected atoms have completed and GPU has exited the protected mode. + * This implies that if the queued atom is in RB_SUBMITTED state, it shall + * be a protected atom and so we can return early. + */ + if (unlikely(kbase_jd_katom_is_protected(katom))) + return; + + if (likely(end_timestamp)) + complete_ns = ktime_to_ns(*end_timestamp); + else + complete_ns = ktime_get_raw_ns(); + + kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns); +#endif +} static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, @@ -290,6 +346,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: + trace_atom_completion_for_gpu_metrics(katom, end_timestamp); kbase_kinstr_jm_atom_hw_release(katom); /* Inform power management at start/finish of atom so it can * update its GPU utilisation metrics. Mark atom as not @@ -865,6 +922,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + bool trace_atom_submit_for_gpu_metrics = true; +#endif int ret; if (!katom[idx]) @@ -975,12 +1035,21 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) case KBASE_ATOM_GPU_RB_READY: if (idx == 1) { + enum kbase_atom_gpu_rb_state atom_0_gpu_rb_state = + katom[0]->gpu_rb_state; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + trace_atom_submit_for_gpu_metrics = + (atom_0_gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB); +#endif + /* Only submit if head atom or previous * atom already submitted */ - if ((katom[0]->gpu_rb_state != + if ((atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != + atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) break; @@ -1017,7 +1086,15 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) &katom[idx]->start_timestamp); /* Inform platform at start/finish of atom */ + kbasep_platform_event_work_begin(katom[idx]); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (likely(trace_atom_submit_for_gpu_metrics && + !kbase_jd_katom_is_protected(katom[idx]))) + kbase_gpu_metrics_ctx_start_activity( + katom[idx]->kctx, + ktime_to_ns(katom[idx]->start_timestamp)); +#endif } else { if (katom[idx]->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter_nolock(kbdev); @@ -1079,6 +1156,25 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, KBASE_KATOM_FLAG_FAIL_BLOCKER))); } +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + u32 action, + bool disjoint) +{ + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, + katom->sched_priority); + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, + katom); +} + /** * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is * related to a failed JSn_HEAD atom @@ -1129,9 +1225,9 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP); - next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; if (completion_code == BASE_JD_EVENT_STOPPED) { + kbase_gpu_remove_atom(kbdev, next_katom, JS_COMMAND_SOFT_STOP, false); KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, &kbdev->gpu_props.props.raw_props.js_features [next_katom->slot_nr]); @@ -1140,10 +1236,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, &kbdev->gpu_props.props.raw_props.js_features [next_katom->slot_nr]); - } + } else { + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; - if (next_katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + if (next_katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + } /* On evicting the next_katom, the last submission kctx on the * given job slot then reverts back to the one that owns katom. @@ -1528,25 +1626,6 @@ static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); } -static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - u32 action, - bool disjoint) -{ - struct kbase_context *kctx = katom->kctx; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; - kbase_gpu_mark_atom_for_return(kbdev, katom); - kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, - katom->sched_priority); - - if (disjoint) - kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, - katom); -} - static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) { if (katom->x_post_dep) { diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index 0ed04bb..ff4e114 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,28 +28,18 @@ #include <mali_kbase_reset_gpu.h> #include <backend/gpu/mali_kbase_jm_internal.h> #include <backend/gpu/mali_kbase_js_internal.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> + +#endif -#if !MALI_USE_CSF /* * Hold the runpool_mutex for this */ -static inline bool timer_callback_should_run(struct kbase_device *kbdev) +static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs) { - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - int nr_running_ctxs; - lockdep_assert_held(&kbdev->js_data.runpool_mutex); - /* Timer must stop if we are suspending */ - if (backend->suspend_timer) - return false; - - /* nr_contexts_pullable is updated with the runpool_mutex. However, the - * locking in the caller gives us a barrier that ensures - * nr_contexts_pullable is up-to-date for reading - */ - nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); - #ifdef CONFIG_MALI_DEBUG if (kbdev->js_data.softstop_always) { /* Debug support for allowing soft-stop on a single context */ @@ -273,18 +263,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } -#endif /* !MALI_USE_CSF */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_backend_data *backend = &kbdev->hwaccess.backend; unsigned long flags; + /* Timer must stop if we are suspending */ + const bool suspend_timer = backend->suspend_timer; + const int nr_running_ctxs = + atomic_read(&kbdev->js_data.nr_contexts_runnable); lockdep_assert_held(&js_devdata->runpool_mutex); - if (!timer_callback_should_run(kbdev)) { + if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; @@ -298,7 +290,8 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) hrtimer_cancel(&backend->scheduling_timer); } - if (timer_callback_should_run(kbdev) && !backend->timer_running) { + if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) && + !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = true; @@ -309,36 +302,59 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (unlikely(suspend_timer)) { + js_devdata->gpu_metrics_timer_needed = false; + /* Cancel the timer as System suspend is happening */ + hrtimer_cancel(&js_devdata->gpu_metrics_timer); + js_devdata->gpu_metrics_timer_running = false; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Explicitly emit the tracepoint on System suspend */ + kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns()); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + if (!nr_running_ctxs) { + /* Just set the flag to not restart the timer on expiry */ + js_devdata->gpu_metrics_timer_needed = false; + return; + } + + /* There are runnable contexts so the timer is needed */ + if (!js_devdata->gpu_metrics_timer_needed) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + js_devdata->gpu_metrics_timer_needed = true; + /* No need to restart the timer if it is already running. */ + if (!js_devdata->gpu_metrics_timer_running) { + hrtimer_start(&js_devdata->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_emit_interval()), + HRTIMER_MODE_REL); + js_devdata->gpu_metrics_timer_running = true; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif } int kbase_backend_timer_init(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; backend->timer_running = false; -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ return 0; } void kbase_backend_timer_term(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_cancel(&backend->scheduling_timer); -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ } void kbase_backend_timer_suspend(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c index 9ce5075..6eedc00 100644 --- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,9 @@ * */ +#include <linux/version_compat_defs.h> + #include <mali_kbase.h> -#include <mali_kbase_bits.h> #include <mali_kbase_config_defaults.h> #include <device/mali_kbase_device.h> #include "mali_kbase_l2_mmu_config.h" diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index dd16fb2..46bcdc7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -484,13 +484,6 @@ void *gpu_device_get_data(void *model) #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 -/* SCons should pass in a default GPU, but other ways of building (e.g. - * in-tree) won't, so define one here in case. - */ -#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU -#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" -#endif - static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; module_param(no_mali_gpu, charp, 0000); MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); @@ -1378,10 +1371,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->l2_config = value; } #if MALI_USE_CSF - else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && - addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + - (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { - if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) + else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET && + addr < CSF_HW_DOORBELL_PAGE_OFFSET + + (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + if (addr == CSF_HW_DOORBELL_PAGE_OFFSET) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { @@ -1409,13 +1402,13 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) } } #endif - else if (addr == MMU_REG(MMU_IRQ_MASK)) { + else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { hw_error_status.mmu_irq_mask = value; - } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); - } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) && + (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_COMMAND: @@ -1926,10 +1919,9 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { *value = 0; - } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) - && addr <= MMU_AS_REG(15, AS_STATUS)) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) && + addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_TRANSTAB_LO: @@ -1973,11 +1965,11 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = 0; break; } - } else if (addr == MMU_REG(MMU_IRQ_MASK)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { *value = hw_error_status.mmu_irq_mask; - } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) { *value = hw_error_status.mmu_irq_rawstat; - } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) { *value = hw_error_status.mmu_irq_mask & hw_error_status.mmu_irq_rawstat; } @@ -1985,8 +1977,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) else if (addr == IPA_CONTROL_REG(STATUS)) { *value = (ipa_control_timer_enabled << 31); } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; bool is_low_word = @@ -1995,8 +1986,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; bool is_low_word = @@ -2005,8 +1995,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; bool is_low_word = @@ -2015,8 +2004,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; bool is_low_word = @@ -2214,16 +2202,3 @@ int gpu_model_control(void *model, return 0; } - -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c index e90e4df..67e00e9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,8 +95,7 @@ static void serve_mmu_irq(struct work_struct *work) if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { u32 val; - while ((val = kbase_reg_read(kbdev, - MMU_REG(MMU_IRQ_STATUS)))) { + while ((val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)))) { /* Handle the IRQ */ kbase_mmu_interrupt(kbdev, val); } @@ -156,7 +155,7 @@ KBASE_EXPORT_TEST_API(kbase_reg_write); u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { unsigned long flags; - u32 val; + u32 val = 0; spin_lock_irqsave(&kbdev->reg_op_lock, flags); midgard_model_read_reg(kbdev->model, offset, &val); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index abbb9c8..46c5ffd 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,6 +169,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.backend.gpu_powered = false; kbdev->pm.backend.gpu_ready = false; kbdev->pm.suspending = false; + kbdev->pm.resuming = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_set_gpu_lost(kbdev, false); #endif @@ -590,11 +591,13 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; - int ret = 0; + int ret; WARN_ON(kbdev->pm.active_count); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) + return ret; kbase_pm_lock(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -679,60 +682,6 @@ unlock_hwaccess: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -static bool is_poweroff_in_progress(struct kbase_device *kbdev) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) -{ -#define POWEROFF_TIMEOUT_MSEC 500 - long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC); - remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev), remaining); - if (!remaining) { - /* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will - * definitely be called, so it's safe to continue waiting for it. - */ - if (!work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) { - unsigned long flags; - kbasep_platform_event_core_dump(kbdev, "poweroff work timeout"); - dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims", - POWEROFF_TIMEOUT_MSEC); - kbase_gpu_timeout_debug_message(kbdev); -#if MALI_USE_CSF - //csf.scheduler.state should be accessed with scheduler lock! - //callchains go through this function though holding that lock - //so just print without locking. - dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); - dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0)); -#endif - //Attempt another state machine transition prompt. - dev_err(kbdev->dev, "Attempt to prompt state machine"); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - dev_err(kbdev->dev, "GPU state after re-prompt of state machine"); - kbase_gpu_timeout_debug_message(kbdev); - - dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d", - is_poweroff_in_progress(kbdev)); - } - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev)); - } -#undef POWEROFF_TIMEOUT_MSEC -} -KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); - /** * is_gpu_powered_down - Check whether GPU is powered down * @@ -986,7 +935,13 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) { +#if !MALI_USE_CSF + kbase_backend_timer_resume(kbdev); +#endif /* !MALI_USE_CSF */ + return ret; + } #endif WARN_ON(kbdev->pm.backend.gpu_powered); @@ -1002,6 +957,8 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) { kbase_pm_lock(kbdev); + /* System resume callback has begun */ + kbdev->pm.resuming = true; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { @@ -1016,7 +973,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) kbase_backend_timer_resume(kbdev); #endif /* !MALI_USE_CSF */ - wake_up_all(&kbdev->pm.resume_wait); kbase_pm_unlock(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 2c69ac9..7c891c1 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,9 +51,6 @@ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include <arbiter/mali_kbase_arbiter_pm.h> #endif /* CONFIG_MALI_ARBITER_SUPPORT */ -#if MALI_USE_CSF -#include <csf/ipa_control/mali_kbase_csf_ipa_control.h> -#endif #if MALI_USE_CSF #include <linux/delay.h> @@ -699,8 +696,8 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & - AS_STATUS_AS_ACTIVE_INT) + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) & + AS_STATUS_AS_ACTIVE_INT) ; if (!WARN_ON_ONCE(max_loops == 0)) @@ -2442,26 +2439,29 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) #define PM_TIMEOUT_MS (5000) /* 5s */ #endif -void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { +void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg) +{ unsigned long flags; + + dev_err(kbdev->dev, "%s", timeout_msg); #if !MALI_USE_CSF CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, " Shader=%016llx\n", + dev_err(kbdev->dev, "\tShader=%016llx\n", kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); #else dev_err(kbdev->dev, "GPU pm state :\n"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count); - dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d", + dev_err(kbdev->dev, "\tscheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count); + dev_err(kbdev->dev, "\tpoweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d", kbdev->pm.backend.poweron_required, kbdev->pm.active_count, kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off); - dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d", + dev_err(kbdev->dev, "\tgpu_poweroff_wait_work pending %d", work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); - dev_err(kbdev->dev, " MCU desired = %d\n", + dev_err(kbdev->dev, "\tMCU desired = %d\n", kbase_pm_is_mcu_desired(kbdev)); - dev_err(kbdev->dev, " MCU sw state = %d\n", + dev_err(kbdev->dev, "\tMCU sw state = %d\n", kbdev->pm.backend.mcu_state); dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n", kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off); @@ -2474,17 +2474,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, " Shader=%08x%08x\n", + dev_err(kbdev->dev, "\tShader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, " Tiler =%08x%08x\n", + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, " L2 =%08x%08x\n", + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_HI)), kbase_reg_read(kbdev, @@ -2493,17 +2493,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { kbase_csf_debug_dump_registers(kbdev); #endif dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, " Shader=%08x%08x\n", + dev_err(kbdev->dev, "\tShader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, " Tiler =%08x%08x\n", + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, " L2 =%08x%08x\n", + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( @@ -2512,12 +2512,9 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { dump_stack(); } -static void kbase_pm_timed_out(struct kbase_device *kbdev) +static void kbase_pm_timed_out(struct kbase_device *kbdev, const char *timeout_msg) { - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); - kbase_gpu_timeout_debug_message(kbdev); - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - + kbase_gpu_timeout_debug_message(kbdev, timeout_msg); /* pixel: If either: * 1. L2/MCU power transition timed out, or, * 2. kbase state machine fell out of sync with the hw state, @@ -2530,6 +2527,7 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) * We have already lost work if we end up here, so send a powercycle to reset the hw, * which is more reliable. */ + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR | RESET_FLAGS_FORCE_PM_HW_RESET)) @@ -2570,7 +2568,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) .info = GPU_UEVENT_INFO_L2_PM_TIMEOUT }; pixel_gpu_uevent_send(kbdev, &evt); - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info( @@ -2582,7 +2580,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) return err; } -int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +static int pm_wait_for_desired_state(struct kbase_device *kbdev, bool killable_wait) { unsigned long flags; long remaining; @@ -2600,31 +2598,42 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) /* Wait for cores */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev), timeout); + if (killable_wait) + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), + timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev), timeout); + killable_wait = false; #endif - + if (!killable_wait) + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), timeout); if (!remaining) { const struct gpu_uevent evt = { .type = GPU_UEVENT_TYPE_KMD_ERROR, .info = GPU_UEVENT_INFO_PM_TIMEOUT }; pixel_gpu_uevent_send(kbdev, &evt); - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for power transition timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info(kbdev->dev, - "Wait for desired PM state got interrupted"); + WARN_ON_ONCE(!killable_wait); + dev_info(kbdev->dev, "Wait for power transition got interrupted"); err = (int)remaining; } return err; } + +int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev) +{ + return pm_wait_for_desired_state(kbdev, true); +} + +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + return pm_wait_for_desired_state(kbdev, false); +} KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); #if MALI_USE_CSF @@ -2674,7 +2683,7 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info( @@ -2687,6 +2696,96 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) } #endif +static bool is_poweroff_wait_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbdev->pm.backend.poweroff_wait_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool killable_wait) +{ + long remaining; +#if MALI_USE_CSF + /* gpu_poweroff_wait_work would be subjected to the kernel scheduling + * and so the wait time can't only be the function of GPU frequency. + */ + const unsigned int extra_wait_time_ms = 2000; + const long timeout = kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms); +#else +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Handling of timeout error isn't supported for arbiter builds */ + const long timeout = MAX_SCHEDULE_TIMEOUT; +#else + const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); +#endif +#endif + int err = 0; + +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + if (killable_wait) + remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), + timeout); +#else + killable_wait = false; +#endif + + if (!killable_wait) + remaining = wait_event_timeout(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), timeout); + if (!remaining) { + /* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will + * definitely be called, so it's safe to continue waiting for it. + */ + if (work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) { + wait_event_killable(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev)); + } else { + unsigned long flags; + kbasep_platform_event_core_dump(kbdev, "poweroff work timeout"); + kbase_gpu_timeout_debug_message(kbdev, "failed to wait for poweroff worker"); +#if MALI_USE_CSF + //csf.scheduler.state should be accessed with scheduler lock! + //callchains go through this function though holding that lock + //so just print without locking. + dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); + dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0)); +#endif + //Attempt another state machine transition prompt. + dev_err(kbdev->dev, "Attempt to prompt state machine"); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_gpu_timeout_debug_message(kbdev, "GPU state after re-prompt of state machine"); + err = -ETIMEDOUT; + } + } else if (remaining < 0) { + WARN_ON_ONCE(!killable_wait); + dev_info(kbdev->dev, "Wait for poweroff work got interrupted"); + err = (int)remaining; + } + return err; +} + +int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + return pm_wait_for_poweroff_work_complete(kbdev, true); +} + +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + return pm_wait_for_poweroff_work_complete(kbdev, false); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2704,12 +2803,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); #if MALI_USE_CSF /* Enable only the Page fault bits part */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFF); #else - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFFFFFF); #endif } @@ -2729,8 +2828,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); } void kbase_pm_disable_interrupts(struct kbase_device *kbdev) @@ -3147,9 +3246,13 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { - dev_info(kbdev->dev, - "Found quirks_gpu = [0x%x] in Devicetree\n", + /* Read the "-" versions of the properties and fall back to + * the "_" versions if these are not found + */ + + if (!of_property_read_u32(np, "quirks-gpu", &kbdev->hw_quirks_gpu) || + !of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { + dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n", kbdev->hw_quirks_gpu); } else { error = kbase_set_gpu_quirks(kbdev, prod_id); @@ -3157,33 +3260,30 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) return error; } - if (!of_property_read_u32(np, "quirks_sc", - &kbdev->hw_quirks_sc)) { - dev_info(kbdev->dev, - "Found quirks_sc = [0x%x] in Devicetree\n", - kbdev->hw_quirks_sc); + if (!of_property_read_u32(np, "quirks-sc", &kbdev->hw_quirks_sc) || + !of_property_read_u32(np, "quirks_sc", &kbdev->hw_quirks_sc)) { + dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n", + kbdev->hw_quirks_sc); } else { error = kbase_set_sc_quirks(kbdev, prod_id); if (error) return error; } - if (!of_property_read_u32(np, "quirks_tiler", - &kbdev->hw_quirks_tiler)) { - dev_info(kbdev->dev, - "Found quirks_tiler = [0x%x] in Devicetree\n", - kbdev->hw_quirks_tiler); + if (!of_property_read_u32(np, "quirks-tiler", &kbdev->hw_quirks_tiler) || + !of_property_read_u32(np, "quirks_tiler", &kbdev->hw_quirks_tiler)) { + dev_info(kbdev->dev, "Found quirks_tiler = [0x%x] in Devicetree\n", + kbdev->hw_quirks_tiler); } else { error = kbase_set_tiler_quirks(kbdev); if (error) return error; } - if (!of_property_read_u32(np, "quirks_mmu", - &kbdev->hw_quirks_mmu)) { - dev_info(kbdev->dev, - "Found quirks_mmu = [0x%x] in Devicetree\n", - kbdev->hw_quirks_mmu); + if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) || + !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) { + dev_info(kbdev->dev, "Found quirks_mmu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_mmu); } else { error = kbase_set_mmu_quirks(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index 9e29236..d7f19fb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,7 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * power off in progress and kbase_pm_context_active() was called instead of * kbase_csf_scheduler_pm_active(). * - * Return: 0 on success, error code on error + * Return: 0 on success, or -ETIMEDOUT code on timeout error. */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #else @@ -247,12 +247,27 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); * must ensure that this is not the case by, for example, calling * kbase_pm_wait_for_poweroff_work_complete() * - * Return: 0 on success, error code on error + * Return: 0 on success, or -ETIMEDOUT error code on timeout error. */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #endif /** + * kbase_pm_killable_wait_for_desired_state - Wait for the desired power state to be + * reached in a killable state. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function is same as kbase_pm_wait_for_desired_state(), expect that it would + * allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the + * wait was interrupted. + */ +int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev); + +/** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -467,8 +482,26 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); * This function effectively just waits for the @gpu_poweroff_wait_work work * item to complete, if it was enqueued. GPU may not have been powered down * before this function returns. + * + * Return: 0 on success, error code on error */ -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_killable_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to + * complete in killable state. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function is same as kbase_pm_wait_for_poweroff_work_complete(), expect that + * it would allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the + * wait was interrupted. + */ +int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev); /** * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete @@ -857,6 +890,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev) { bool in_desired_state = true; + lockdep_assert_held(&kbdev->hwaccess_lock); + if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON) in_desired_state = false; else if (!kbase_pm_is_mcu_desired(kbdev) && diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index f5dc008..7d7650c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -54,7 +54,9 @@ void kbase_pm_policy_init(struct kbase_device *kbdev) unsigned long flags; int i; - if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { + /* Read "power-policy" property and fallback to "power_policy" if not found */ + if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) || + (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) { for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { default_policy = all_policy_list[i]; @@ -298,6 +300,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, bool reset_gpu = false; bool reset_op_prevented = true; struct kbase_csf_scheduler *scheduler = NULL; + u32 pwroff; + bool switching_to_always_on; #endif KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -306,6 +310,16 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); #if MALI_USE_CSF + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops; + if (pwroff == 0 && !switching_to_always_on) { + dev_warn(kbdev->dev, + "power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n"); + dev_warn(kbdev->dev, + "power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n"); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); + } + scheduler = &kbdev->csf.scheduler; KBASE_DEBUG_ASSERT(scheduler != NULL); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 7a4d662..28365c0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -29,6 +29,39 @@ #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> +#include <linux/version_compat_defs.h> + +struct kbase_timeout_info { + char *selector_str; + u64 timeout_cycles; +}; + +#if MALI_USE_CSF +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES, + CSF_FIRMWARE_PING_TIMEOUT_CYCLES) }, + [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, + [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, + [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", + CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", + CSF_FIRMWARE_PING_TIMEOUT_CYCLES }, + [CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT", + DEFAULT_PROGRESS_TIMEOUT_CYCLES }, + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT", + KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES }, +}; +#else +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT", + JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES }, +}; +#endif void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, @@ -108,94 +141,130 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, #endif } -unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, - enum kbase_timeout_selector selector) +static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev) +{ + u64 freq_khz = kbdev->lowest_gpu_freq_khz; + + if (!freq_khz) { + dev_dbg(kbdev->dev, + "Lowest frequency uninitialized! Using reference frequency for scaling"); + return DEFAULT_REF_TIMEOUT_FREQ_KHZ; + } + + return freq_khz; +} + +void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + unsigned int timeout_ms) { + char *selector_str; + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + selector_str = timeout_info[selector].selector_str; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + selector_str = timeout_info[selector].selector_str; + + kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms; + dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms); +} + +void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + u64 timeout_cycles, u32 cycle_multiplier) +{ + u64 final_cycles; + u64 timeout; + u64 freq_khz = kbase_device_get_scaling_frequency(kbdev); + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + + /* If the multiplication overflows, we will have unsigned wrap-around, and so might + * end up with a shorter timeout. In those cases, we then want to have the largest + * timeout possible that will not run into these issues. Note that this will not + * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX + * milliseconds by subsequent steps. + */ + if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles)) + final_cycles = U64_MAX; + /* Timeout calculation: * dividing number of cycles by freq in KHz automatically gives value * in milliseconds. nr_cycles will have to be multiplied by 1e3 to * get result in microseconds, and 1e6 to get result in nanoseconds. */ + timeout = div_u64(final_cycles, freq_khz); + + if (unlikely(timeout > UINT_MAX)) { + dev_dbg(kbdev->dev, + "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", + timeout, timeout_info[selector].selector_str, + kbase_device_get_scaling_frequency(kbdev)); + timeout = UINT_MAX; + } - u64 timeout, nr_cycles = 0; - u64 freq_khz; + kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout); +} - /* Only for debug messages, safe default in case it's mis-maintained */ - const char *selector_str = "(unknown)"; +/** + * kbase_timeout_scaling_init - Initialize the table of scaled timeout + * values associated with a @kbase_device. + * + * @kbdev: KBase device pointer. + * + * Return: 0 on success, negative error code otherwise. + */ +static int kbase_timeout_scaling_init(struct kbase_device *kbdev) +{ + int err; + enum kbase_timeout_selector selector; - if (!kbdev->lowest_gpu_freq_khz) { - dev_dbg(kbdev->dev, - "Lowest frequency uninitialized! Using reference frequency for scaling"); - freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - } else { - freq_khz = kbdev->lowest_gpu_freq_khz; + /* First, we initialize the minimum and maximum device frequencies, which + * are used to compute the timeouts. + */ + err = kbase_pm_gpu_freq_init(kbdev); + if (unlikely(err < 0)) { + dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n"); + return err; } - switch (selector) { - case MMU_AS_INACTIVE_WAIT_TIMEOUT: - selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; - nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; - break; - case KBASE_TIMEOUT_SELECTOR_COUNT: - default: -#if !MALI_USE_CSF - WARN(1, "Invalid timeout selector used! Using default value"); - nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; - break; - case JM_DEFAULT_JS_FREE_TIMEOUT: - selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; - nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; - break; -#else - /* Use Firmware timeout if invalid selection */ - WARN(1, - "Invalid timeout selector used! Using CSF Firmware timeout"); - fallthrough; - case CSF_FIRMWARE_TIMEOUT: - selector_str = "CSF_FIRMWARE_TIMEOUT"; - /* Any FW timeout cannot be longer than the FW ping interval, after which - * the firmware_aliveness_monitor will be triggered and may restart - * the GPU if the FW is unresponsive. + dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n"); + for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) { + u32 cycle_multiplier = 1; + u64 nr_cycles = timeout_info[selector].timeout_cycles; +#if MALI_USE_CSF + /* Special case: the scheduler progress timeout can be set manually, + * and does not have a canonical length defined in the headers. Hence, + * we query it once upon startup to get a baseline, and change it upon + * every invocation of the appropriate functions */ - nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); - - if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) - dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", - selector_str); - break; - case CSF_PM_TIMEOUT: - selector_str = "CSF_PM_TIMEOUT"; - nr_cycles = CSF_PM_TIMEOUT_CYCLES; - break; - case CSF_GPU_RESET_TIMEOUT: - selector_str = "CSF_GPU_RESET_TIMEOUT"; - nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; - break; - case CSF_CSG_SUSPEND_TIMEOUT: - selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; - nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_BOOT_TIMEOUT: - selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_PING_TIMEOUT: - selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; - break; - case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: - selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; - nr_cycles = kbase_csf_timeout_get(kbdev); - break; + if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT) + nr_cycles = kbase_csf_timeout_get(kbdev); #endif + + /* Since we are in control of the iteration bounds for the selector, + * we don't have to worry about bounds checking when setting the timeout. + */ + kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier); } + return 0; +} - timeout = div_u64(nr_cycles, freq_khz); - if (WARN(timeout > UINT_MAX, - "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", - (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) - timeout = UINT_MAX; - return (unsigned int)timeout; +unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector) +{ + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n"); + selector = KBASE_DEFAULT_TIMEOUT; + } + + return kbdev->backend_time.device_scaled_timeouts[selector]; } KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); @@ -247,18 +316,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t int kbase_backend_time_init(struct kbase_device *kbdev) { + int err = 0; #if MALI_USE_CSF u64 cpu_ts = 0; u64 gpu_ts = 0; u64 freq; u64 common_factor; + kbase_pm_register_access_enable(kbdev); get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); freq = arch_timer_get_cntfrq(); if (!freq) { dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } common_factor = gcd(NSEC_PER_SEC, freq); @@ -268,12 +340,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev) if (!kbdev->backend_time.divisor) { dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor); #endif - return 0; + if (kbase_timeout_scaling_init(kbdev)) { + dev_warn(kbdev->dev, "Could not initialize timeout scaling"); + err = -EINVAL; + } + +#if MALI_USE_CSF +disable_registers: + kbase_pm_register_access_disable(kbdev); +#endif + + return err; } |