diff options
author | Jack Diver <diverj@google.com> | 2022-11-07 12:13:47 +0000 |
---|---|---|
committer | Jack Diver <diverj@google.com> | 2022-11-09 17:51:01 +0000 |
commit | e19249ece66a726b13d0ed8734c4059f364ca2f5 (patch) | |
tree | 0bdfdcc3c70a6378c8265b03e3fba6b8120ffa21 /mali_kbase/backend | |
parent | 34e635317dc2a91076ac341df3867ac3bdb31ef1 (diff) | |
download | gpu-e19249ece66a726b13d0ed8734c4059f364ca2f5.tar.gz |
Revert "Revert "Merge r38p1 from upstream into partner/android13-gs-pixel-5.10-tm-qpr2""
This reverts commit 34e635317dc2a91076ac341df3867ac3bdb31ef1.
Bug: 228779790
Change-Id: Ic9d131af5568d7f55f610f255fa1c02925b18482
(cherry picked from commit 1c916e3f7c4d999f68e40c60fee6fe39418fcecd)
Diffstat (limited to 'mali_kbase/backend')
18 files changed, 797 insertions, 521 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index d6b9750..ddd03ca 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -72,49 +72,6 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) return callbacks; } -int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev) -{ - /* Uses default reference frequency defined in below macro */ - u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - - /* Only check lowest frequency in cases when OPPs are used and - * present in the device tree. - */ -#ifdef CONFIG_PM_OPP - struct dev_pm_opp *opp_ptr; - unsigned long found_freq = 0; - - /* find lowest frequency OPP */ - opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq); - if (IS_ERR(opp_ptr)) { - dev_err(kbdev->dev, - "No OPPs found in device tree! Scaling timeouts using %llu kHz", - (unsigned long long)lowest_freq_khz); - } else { -#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE - dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */ -#endif - /* convert found frequency to KHz */ - found_freq /= 1000; - - /* If lowest frequency in OPP table is still higher - * than the reference, then keep the reference frequency - * as the one to use for scaling . - */ - if (found_freq < lowest_freq_khz) - lowest_freq_khz = found_freq; - } -#else - dev_err(kbdev->dev, - "No operating-points-v2 node or operating-points property in DT"); -#endif - - kbdev->lowest_gpu_freq_khz = lowest_freq_khz; - dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", - kbdev->lowest_gpu_freq_khz); - return 0; -} - static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h index a6ee959..35b3b8d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,20 +61,6 @@ struct kbase_clk_data { int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); /** - * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can - * run as using the device tree, and save this - * within kbdev. - * @kbdev: Pointer to kbase device. - * - * This function could be called from kbase_clk_rate_trace_manager_init, - * but is left separate as it can be called as soon as - * dev_pm_opp_of_add_table() has been called to initialize the OPP table. - * - * Return: 0 in any case. - */ -int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev); - -/** * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 00b32b9..09c1863 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,7 +57,7 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); if (IS_ERR_OR_NULL(opp)) - dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); else { voltage = dev_pm_opp_get_voltage(opp); #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE @@ -133,8 +133,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) rcu_read_unlock(); #endif if (IS_ERR_OR_NULL(opp)) { - dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); - return PTR_ERR(opp); + dev_err(dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); + return IS_ERR(opp) ? PTR_ERR(opp) : -ENODEV; } #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE dev_pm_opp_put(opp); @@ -317,6 +317,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, dp->max_state = i; + /* Have the lowest clock as suspend clock. * It may be overridden by 'opp-mali-errata-1485982'. */ @@ -636,6 +637,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) struct devfreq_dev_profile *dp; int err; unsigned int i; + bool free_devfreq_freq_table = true; if (kbdev->nr_clocks == 0) { dev_err(kbdev->dev, "Clock not available for devfreq\n"); @@ -669,32 +671,35 @@ int kbase_devfreq_init(struct kbase_device *kbdev) dp->freq_table[0] / 1000; } - err = kbase_devfreq_init_core_mask_table(kbdev); +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + err = kbase_ipa_init(kbdev); if (err) { - kbase_devfreq_term_freq_table(kbdev); - return err; + dev_err(kbdev->dev, "IPA initialization failed"); + goto ipa_init_failed; } +#endif + + err = kbase_devfreq_init_core_mask_table(kbdev); + if (err) + goto init_core_mask_table_failed; kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); kbdev->devfreq = NULL; - kbase_devfreq_term_core_mask_table(kbdev); - kbase_devfreq_term_freq_table(kbdev); - dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err); - return err; + dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); + goto devfreq_add_dev_failed; } + /* Explicit free of freq table isn't needed after devfreq_add_device() */ + free_devfreq_freq_table = false; + /* Initialize devfreq suspend/resume workqueue */ err = kbase_devfreq_work_init(kbdev); if (err) { - if (devfreq_remove_device(kbdev->devfreq)) - dev_err(kbdev->dev, "Fail to rm devfreq\n"); - kbdev->devfreq = NULL; - kbase_devfreq_term_core_mask_table(kbdev); - dev_err(kbdev->dev, "Fail to init devfreq workqueue\n"); - return err; + dev_err(kbdev->dev, "Fail to init devfreq workqueue"); + goto devfreq_work_init_failed; } /* devfreq_add_device only copies a few of kbdev->dev's fields, so @@ -705,26 +710,20 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); if (err) { dev_err(kbdev->dev, - "Failed to register OPP notifier (%d)\n", err); + "Failed to register OPP notifier (%d)", err); goto opp_notifier_failed; } #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) - err = kbase_ipa_init(kbdev); - if (err) { - dev_err(kbdev->dev, "IPA initialization failed\n"); - goto ipa_init_failed; - } - kbdev->devfreq_cooling = of_devfreq_cooling_register_power( kbdev->dev->of_node, kbdev->devfreq, &kbase_ipa_power_model_ops); if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { - err = PTR_ERR(kbdev->devfreq_cooling); + err = PTR_ERR_OR_ZERO(kbdev->devfreq_cooling); dev_err(kbdev->dev, - "Failed to register cooling device (%d)\n", - err); + "Failed to register cooling device (%d)", err); + err = err == 0 ? -ENODEV : err; goto cooling_reg_failed; } #endif @@ -733,21 +732,29 @@ int kbase_devfreq_init(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) cooling_reg_failed: - kbase_ipa_term(kbdev); -ipa_init_failed: devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); #endif /* CONFIG_DEVFREQ_THERMAL */ opp_notifier_failed: kbase_devfreq_work_term(kbdev); +devfreq_work_init_failed: if (devfreq_remove_device(kbdev->devfreq)) - dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err); kbdev->devfreq = NULL; +devfreq_add_dev_failed: kbase_devfreq_term_core_mask_table(kbdev); +init_core_mask_table_failed: +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + kbase_ipa_term(kbdev); +ipa_init_failed: +#endif + if (free_devfreq_freq_table) + kbase_devfreq_term_freq_table(kbdev); + return err; } @@ -760,8 +767,6 @@ void kbase_devfreq_term(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) if (kbdev->devfreq_cooling) devfreq_cooling_unregister(kbdev->devfreq_cooling); - - kbase_ipa_term(kbdev); #endif devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); @@ -775,4 +780,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) kbdev->devfreq = NULL; kbase_devfreq_term_core_mask_table(kbdev); + +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + kbase_ipa_term(kbdev); +#endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 0ea14bc..10e92ec 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,19 +40,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, registers.l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); - registers.core_features = 0; -#if !MALI_USE_CSF - /* TGOx */ - registers.core_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CORE_FEATURES)); -#else /* !MALI_USE_CSF */ - if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TDUX) || - ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TODX))) - registers.core_features = - kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); -#endif /* MALI_USE_CSF */ + registers.tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES)); registers.mem_features = kbase_reg_read(kbdev, @@ -170,6 +158,11 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, regdump->coherency_features = coherency_features; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) + regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); + else + regdump->core_features = 0; + kbase_pm_register_access_disable(kbdev); return error; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 0ece571..b89b917 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,20 @@ #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_instr_internal.h> +static int wait_prfcnt_ready(struct kbase_device *kbdev) +{ + u32 loops; + + for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { + const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PRFCNT_ACTIVE; + if (!prfcnt_active) + return 0; + } + + dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); + return -EBUSY; +} int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, @@ -43,20 +57,20 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* alignment failure */ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) - goto out_err; + return err; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_err; + return err; } if (kbase_is_gpu_removed(kbdev)) { /* GPU has been removed by Arbiter */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_err; + return err; } /* Enable interrupt */ @@ -81,9 +95,19 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #endif + /* Wait until prfcnt config register can be written */ + err = wait_prfcnt_ready(kbdev); + if (err) + return err; + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + /* Wait until prfcnt is disabled before writing configuration registers */ + err = wait_prfcnt_ready(kbdev); + if (err) + return err; + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), enable->dump_buffer & 0xFFFFFFFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -111,12 +135,8 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - err = 0; - dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); - return err; - out_err: - return err; + return 0; } static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) @@ -135,7 +155,10 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); - /* Disable the counters */ + /* Wait until prfcnt config register can be written, then disable the counters. + * Return value is ignored as we are disabling anyway. + */ + wait_prfcnt_ready(kbdev); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); kbdev->hwcnt.kctx = NULL; @@ -146,7 +169,6 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; - int err = -EINVAL; struct kbase_device *kbdev = kctx->kbdev; while (1) { @@ -167,14 +189,14 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - return err; + return -EINVAL; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - return err; + return -EINVAL; } if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) @@ -233,6 +255,11 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + /* Wait until prfcnt is ready to request dump */ + err = wait_prfcnt_ready(kbdev); + if (err) + goto unlock; + /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF); @@ -248,11 +275,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); - err = 0; - unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); @@ -346,21 +370,24 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) - goto out; + goto unlock; if (kbase_is_gpu_removed(kbdev)) { /* GPU has been removed by Arbiter */ - goto out; + goto unlock; } + /* Wait until prfcnt is ready to clear */ + err = wait_prfcnt_ready(kbdev); + if (err) + goto unlock; + /* Clear the counters */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR); - err = 0; - -out: +unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 32bdf72..20905f7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -191,9 +191,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } -void kbase_job_hw_submit(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js) +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) { struct kbase_context *kctx; u32 cfg; @@ -202,13 +200,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(katom); kctx = katom->kctx; /* Command register must be available */ - KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx), + "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx)) + return -EPERM; dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, (void *)katom); @@ -281,7 +279,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. */ - katom->start_timestamp = ktime_get(); + katom->start_timestamp = ktime_get_raw(); /* GO ! */ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", @@ -329,6 +327,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_START); + + return 0; } /** @@ -393,11 +393,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(kbdev); - KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); - end_timestamp = ktime_get(); + end_timestamp = ktime_get_raw(); while (done) { u32 failed = done >> 16; @@ -409,7 +407,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * numbered interrupts before the higher numbered ones. */ i = ffs(finished) - 1; - KBASE_DEBUG_ASSERT(i >= 0); + if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__)) + break; do { int nr_done; @@ -590,7 +589,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) failed = done >> 16; finished = (done & 0xFFFF) | failed; if (done) - end_timestamp = ktime_get(); + end_timestamp = ktime_get_raw(); } while (finished & (1 << i)); kbasep_job_slot_update_head_start_timestamp(kbdev, i, @@ -619,7 +618,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, u64 job_in_head_before; u32 status_reg_after; - KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + WARN_ON(action & (~JS_COMMAND_MASK)); /* Check the head pointer */ job_in_head_before = ((u64) kbase_reg_read(kbdev, @@ -697,7 +696,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); break; default: - BUG(); + WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, + (void *)target_katom, (void *)target_katom->kctx); break; } } else { @@ -726,7 +726,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); break; default: - BUG(); + WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, + (void *)target_katom, (void *)target_katom->kctx); break; } } @@ -752,9 +753,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, int i; bool stop_sent = false; - KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -934,7 +933,11 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); - KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); + if (sw_flags & JS_COMMAND_MASK) { + WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, + target_katom ? (void *)target_katom->kctx : NULL, sw_flags); + sw_flags &= ~((u32)JS_COMMAND_MASK); + } kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, JS_COMMAND_SOFT_STOP | sw_flags); } @@ -1052,17 +1055,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags; struct kbase_device *kbdev; - ktime_t end_timestamp = ktime_get(); + ktime_t end_timestamp = ktime_get_raw(); struct kbasep_js_device_data *js_devdata; bool silent = false; u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - KBASE_DEBUG_ASSERT(data); - kbdev = container_of(data, struct kbase_device, hwaccess.backend.reset_work); - KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == @@ -1097,7 +1097,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) return; } - KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); spin_lock(&kbdev->mmu_mask_change); @@ -1138,7 +1138,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ - KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + if (unlikely(!kbdev->pm.backend.pm_current_policy)) + dev_warn(kbdev->dev, "No power policy set!"); /* All slot have been soft-stopped and we've waited * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we @@ -1235,8 +1236,6 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) struct kbase_device *kbdev = container_of(timer, struct kbase_device, hwaccess.backend.reset_timer); - KBASE_DEBUG_ASSERT(kbdev); - /* Reset still pending? */ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == @@ -1257,8 +1256,6 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) int i; int pending_jobs = 0; - KBASE_DEBUG_ASSERT(kbdev); - /* Count the number of jobs */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); @@ -1316,8 +1313,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, { int i; - KBASE_DEBUG_ASSERT(kbdev); - #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { /* GPU access has been removed, reset will be done by @@ -1371,13 +1366,11 @@ KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); */ void kbase_reset_gpu(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev); - /* Note this is an assert/atomic_set because it is a software issue for * a race to be occurring here */ - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_PREPARED); + if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) + return; atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); @@ -1395,13 +1388,11 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu); void kbase_reset_gpu_locked(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev); - /* Note this is an assert/atomic_set because it is a software issue for * a race to be occurring here */ - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_PREPARED); + if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) + return; atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 1039e85..1ebb843 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -76,7 +76,6 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, } #endif - /** * kbase_job_hw_submit() - Submit a job to the GPU * @kbdev: Device pointer @@ -88,10 +87,10 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, * * The following locking conditions are made on the caller: * - it must hold the hwaccess_lock + * + * Return: 0 if the job was successfully submitted to hardware, an error otherwise. */ -void kbase_job_hw_submit(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js); +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); #if !MALI_USE_CSF /** diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 48d1de8..4fe8046 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -346,16 +346,35 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; + + /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means + * one of two events prevented it from progressing to the next state and + * ultimately reach protected mode: + * - hwcnts were enabled, and the atom had to schedule a worker to + * disable them. + * - the hwcnts were already disabled, but some other error occurred. + * In the first case, if the worker has not yet completed + * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable + * them and signal to the worker they have already been enabled + */ + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + } + /* If the atom has suspended hwcnt but has not yet entered * protected mode, then resume hwcnt now. If the GPU is now in * protected mode then hwcnt will be resumed by GPU reset so * don't resume it here. */ if (kbase_jd_katom_is_protected(katom) && - ((katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { + ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { @@ -506,17 +525,14 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); if (err) { /* - * Failed to switch into protected mode, resume - * GPU hwcnt and fail atom. + * Failed to switch into protected mode. + * + * At this point we expect: + * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED + * ==> + * kbdev->protected_mode_hwcnt_disabled = false */ - WARN_ON(!kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* @@ -536,12 +552,9 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, /* * Protected mode sanity checks. */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == - kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), - kbase_gpu_in_protected_mode(kbdev)); + WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -951,18 +964,6 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) cores_ready = kbase_pm_cores_requested(kbdev, true); - if (katom[idx]->event_code == - BASE_JD_EVENT_PM_EVENT) { - KBASE_KTRACE_ADD_JM_SLOT_INFO( - kbdev, JM_MARK_FOR_RETURN_TO_JS, - katom[idx]->kctx, katom[idx], - katom[idx]->jc, js, - katom[idx]->event_code); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_RETURN_TO_JS; - break; - } - if (!cores_ready) break; @@ -1011,9 +1012,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); - kbase_job_hw_submit(kbdev, katom[idx], js); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_SUBMITTED; + if (!kbase_job_hw_submit(kbdev, katom[idx], js)) + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; + else + break; kbasep_platform_event_work_begin(katom[idx]); @@ -1346,11 +1348,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } else { char js_string[16]; - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, - js_string, - sizeof(js_string)), - ktime_to_ns(ktime_get()), 0, 0, - 0); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(ktime_get_raw()), 0, 0, 0); } } #endif @@ -1406,14 +1406,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { /* protected mode sanity checks */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_protected(katom) && js == 0) || - !kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); + WARN(kbase_jd_katom_is_protected(katom) != + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom), + kbase_gpu_in_protected_mode(kbdev)); + WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && + kbase_jd_katom_is_protected(katom), + "Protected atom on JS%d not supported", js); } if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && !kbase_ctx_flag(katom->kctx, KCTX_DYING)) @@ -1804,11 +1804,9 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req) { if (!kbdev->pm.active_count) { - mutex_lock(&kbdev->js_data.runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); kbase_pm_update_active(kbdev); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&kbdev->js_data.runpool_mutex); + kbase_pm_unlock(kbdev); } } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 603ffcf..961a951 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,6 +80,7 @@ static bool ipa_control_timer_enabled; #endif #define LO_MASK(M) ((M) & 0xFFFFFFFF) +#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) static u32 get_implementation_register(u32 reg) { @@ -104,20 +105,15 @@ static u32 get_implementation_register(u32 reg) } struct { + spinlock_t access_lock; +#if !MALI_USE_CSF unsigned long prfcnt_base; +#endif /* !MALI_USE_CSF */ u32 *prfcnt_base_cpu; - struct kbase_device *kbdev; - struct tagged_addr *pages; - size_t page_count; u32 time; - struct { - u32 jm; - u32 tiler; - u32 l2; - u32 shader; - } prfcnt_en; + struct gpu_model_prfcnt_en prfcnt_en; u64 l2_present; u64 shader_present; @@ -181,7 +177,9 @@ struct control_reg_values_t { struct dummy_model_t { int reset_completed; int reset_completed_mask; +#if !MALI_USE_CSF int prfcnt_sample_completed; +#endif /* !MALI_USE_CSF */ int power_changed_mask; /* 2bits: _ALL,_SINGLE */ int power_changed; /* 1bit */ bool clean_caches_completed; @@ -464,6 +462,7 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 event_index; u64 value = 0; u32 core; + unsigned long flags; if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) return 0; @@ -487,6 +486,8 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, event_index -= 4; + spin_lock_irqsave(&performance_counters.access_lock, flags); + switch (core_type) { case KBASE_IPA_CORE_TYPE_CSHW: core_count = 1; @@ -514,28 +515,46 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; } + spin_unlock_irqrestore(&performance_counters.access_lock, flags); + if (is_low_word) return (value & U32_MAX); else return (value >> 32); } +#endif /* MALI_USE_CSF */ -void gpu_model_clear_prfcnt_values(void) +/** + * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values + * + * Sets all performance counter values to zero. The performance counter access + * lock must be held when calling this function. + */ +static void gpu_model_clear_prfcnt_values_nolock(void) { - memset(performance_counters.cshw_counters, 0, - sizeof(performance_counters.cshw_counters)); - - memset(performance_counters.tiler_counters, 0, - sizeof(performance_counters.tiler_counters)); - - memset(performance_counters.l2_counters, 0, - sizeof(performance_counters.l2_counters)); - + lockdep_assert_held(&performance_counters.access_lock); +#if !MALI_USE_CSF + memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters)); +#else + memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters)); +#endif /* !MALI_USE_CSF */ + memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters)); + memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters)); memset(performance_counters.shader_counters, 0, sizeof(performance_counters.shader_counters)); } + +#if MALI_USE_CSF +void gpu_model_clear_prfcnt_values(void) +{ + unsigned long flags; + + spin_lock_irqsave(&performance_counters.access_lock, flags); + gpu_model_clear_prfcnt_values_nolock(); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); -#endif +#endif /* MALI_USE_CSF */ /** * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer @@ -545,17 +564,20 @@ KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); * @block_count: Number of blocks to dump * @prfcnt_enable_mask: Counter enable mask * @blocks_present: Available blocks bit mask + * + * The performance counter access lock must be held before calling this + * function. */ -static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, - u32 block_count, - u32 prfcnt_enable_mask, - u64 blocks_present) +static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count, + u32 prfcnt_enable_mask, u64 blocks_present) { u32 block_idx, counter; u32 counter_value = 0; u32 *prfcnt_base; u32 index = 0; + lockdep_assert_held(&performance_counters.access_lock); + prfcnt_base = performance_counters.prfcnt_base_cpu; for (block_idx = 0; block_idx < block_count; block_idx++) { @@ -594,35 +616,18 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, } } -/** - * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values - * - * Used to ensure counter values are not lost if cache invalidation is performed - * prior to reading. - */ -static void gpu_model_sync_dummy_prfcnt(void) -{ - int i; - struct page *pg; - - for (i = 0; i < performance_counters.page_count; i++) { - pg = as_page(performance_counters.pages[i]); - kbase_sync_single_for_device(performance_counters.kbdev, - kbase_dma_addr(pg), PAGE_SIZE, - DMA_BIDIRECTIONAL); - } -} - -static void midgard_model_dump_prfcnt(void) +static void gpu_model_dump_nolock(void) { u32 index = 0; + lockdep_assert_held(&performance_counters.access_lock); + #if !MALI_USE_CSF - gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, - 1, 0xffffffff, 0x1); + gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1, + performance_counters.prfcnt_en.fe, 0x1); #else - gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, - 1, 0xffffffff, 0x1); + gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, + performance_counters.prfcnt_en.fe, 0x1); #endif /* !MALI_USE_CSF */ gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, &index, 1, @@ -637,12 +642,48 @@ static void midgard_model_dump_prfcnt(void) performance_counters.prfcnt_en.shader, performance_counters.shader_present); - gpu_model_sync_dummy_prfcnt(); + /* Counter values are cleared after each dump */ + gpu_model_clear_prfcnt_values_nolock(); /* simulate a 'long' time between samples */ performance_counters.time += 10; } +#if !MALI_USE_CSF +static void midgard_model_dump_prfcnt(void) +{ + unsigned long flags; + + spin_lock_irqsave(&performance_counters.access_lock, flags); + gpu_model_dump_nolock(); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} +#else +void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps) +{ + unsigned long flags; + + if (WARN_ON(!sample_buf)) + return; + + spin_lock_irqsave(&performance_counters.access_lock, flags); + performance_counters.prfcnt_base_cpu = sample_buf; + performance_counters.prfcnt_en = enable_maps; + gpu_model_dump_nolock(); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} + +void gpu_model_glb_request_job_irq(void *model) +{ + unsigned long flags; + + spin_lock_irqsave(&hw_error_status.access_lock, flags); + hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); + gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); +} +#endif /* !MALI_USE_CSF */ + static void init_register_statuses(struct dummy_model_t *dummy) { int i; @@ -673,6 +714,8 @@ static void init_register_statuses(struct dummy_model_t *dummy) static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) { + lockdep_assert_held(&hw_error_status.access_lock); + if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { if (job_slot == hw_error_status.current_job_slot) { #if !MALI_USE_CSF @@ -922,6 +965,7 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) { int i; + lockdep_assert_held(&hw_error_status.access_lock); pr_debug("%s", "Updating the JS_ACTIVE register"); for (i = 0; i < NUM_SLOTS; i++) { @@ -990,6 +1034,9 @@ void *midgard_model_create(const void *config) { struct dummy_model_t *dummy = NULL; + spin_lock_init(&hw_error_status.access_lock); + spin_lock_init(&performance_counters.access_lock); + dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); if (dummy) { @@ -1009,14 +1056,18 @@ static void midgard_model_get_outputs(void *h) { struct dummy_model_t *dummy = (struct dummy_model_t *)h; + lockdep_assert_held(&hw_error_status.access_lock); + if (hw_error_status.job_irq_status) gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || hw_error_status.gpu_error_irq || - (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) || - dummy->prfcnt_sample_completed) +#if !MALI_USE_CSF + dummy->prfcnt_sample_completed || +#endif + (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) @@ -1028,6 +1079,8 @@ static void midgard_model_update(void *h) struct dummy_model_t *dummy = (struct dummy_model_t *)h; int i; + lockdep_assert_held(&hw_error_status.access_lock); + for (i = 0; i < NUM_SLOTS; i++) { if (!dummy->slots[i].job_active) continue; @@ -1074,6 +1127,8 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) { int i; + lockdep_assert_held(&hw_error_status.access_lock); + for (i = 0; i < NUM_SLOTS; i++) { if (dummy->slots[i].job_active) { hw_error_status.job_irq_rawstat |= (1 << (16 + i)); @@ -1085,7 +1140,11 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) u8 midgard_model_write_reg(void *h, u32 addr, u32 value) { + unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; + + spin_lock_irqsave(&hw_error_status.access_lock, flags); + #if !MALI_USE_CSF if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { @@ -1188,9 +1247,10 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) if (value & (1 << 17)) dummy->clean_caches_completed = false; - if (value & (1 << 16)) +#if !MALI_USE_CSF + if (value & PRFCNT_SAMPLE_COMPLETED) dummy->prfcnt_sample_completed = 0; - +#endif /* !MALI_USE_CSF */ /*update error status */ hw_error_status.gpu_error_irq &= ~(value); } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { @@ -1214,9 +1274,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) pr_debug("clean caches requested"); dummy->clean_caches_completed = true; break; +#if !MALI_USE_CSF case GPU_COMMAND_PRFCNT_SAMPLE: midgard_model_dump_prfcnt(); dummy->prfcnt_sample_completed = 1; +#endif /* !MALI_USE_CSF */ default: break; } @@ -1346,20 +1408,24 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) mem_addr_space, addr, value); break; } - } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) && - addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) { + } else { switch (addr) { +#if !MALI_USE_CSF case PRFCNT_BASE_LO: - performance_counters.prfcnt_base |= value; + performance_counters.prfcnt_base = + HI_MASK(performance_counters.prfcnt_base) | value; + performance_counters.prfcnt_base_cpu = + (u32 *)(uintptr_t)performance_counters.prfcnt_base; break; case PRFCNT_BASE_HI: - performance_counters.prfcnt_base |= ((u64) value) << 32; + performance_counters.prfcnt_base = + LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32); + performance_counters.prfcnt_base_cpu = + (u32 *)(uintptr_t)performance_counters.prfcnt_base; break; -#if !MALI_USE_CSF case PRFCNT_JM_EN: - performance_counters.prfcnt_en.jm = value; + performance_counters.prfcnt_en.fe = value; break; -#endif /* !MALI_USE_CSF */ case PRFCNT_SHADER_EN: performance_counters.prfcnt_en.shader = value; break; @@ -1369,9 +1435,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) case PRFCNT_MMU_L2_EN: performance_counters.prfcnt_en.l2 = value; break; - } - } else { - switch (addr) { +#endif /* !MALI_USE_CSF */ case TILER_PWRON_LO: dummy->power_on |= (value & 1) << 1; /* Also ensure L2 is powered on */ @@ -1416,6 +1480,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) case PWR_OVERRIDE0: #if !MALI_USE_CSF case JM_CONFIG: + case PRFCNT_CONFIG: #else /* !MALI_USE_CSF */ case CSF_CONFIG: #endif /* !MALI_USE_CSF */ @@ -1434,13 +1499,18 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) midgard_model_update(dummy); midgard_model_get_outputs(dummy); + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); return 1; } u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) { + unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; + + spin_lock_irqsave(&hw_error_status.access_lock, flags); + *value = 0; /* 0 by default */ #if !MALI_USE_CSF if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { @@ -1475,24 +1545,31 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) #endif /* !MALI_USE_CSF */ else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | - (dummy->power_changed_mask << 9) | (1 << 7) | 1; + ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | + (dummy->power_changed_mask << 9) | (1 << 7) | 1; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | (dummy->reset_completed << 8) | +#if !MALI_USE_CSF + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | +#endif /* !MALI_USE_CSF */ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | - (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; + hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | +#if !MALI_USE_CSF + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | +#endif /* !MALI_USE_CSF */ (((dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ? 1u : 0u) << 17) | - (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; + hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { *value = 0; @@ -1827,6 +1904,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, is_low_word); + } else if (addr == USER_REG(LATEST_FLUSH)) { + *value = 0; } #endif else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { @@ -1840,18 +1919,20 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = 0; } + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); CSTD_UNUSED(dummy); return 1; } -static u32 set_user_sample_core_type(u64 *counters, - u32 *usr_data_start, u32 usr_data_offset, - u32 usr_data_size, u32 core_count) +static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) { u32 sample_size; u32 *usr_data = NULL; + lockdep_assert_held(&performance_counters.access_lock); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); @@ -1866,11 +1947,7 @@ static u32 set_user_sample_core_type(u64 *counters, u32 i; for (i = 0; i < loop_cnt; i++) { - if (copy_from_user(&counters[i], &usr_data[i], - sizeof(u32))) { - model_error_log(KBASE_CORE, "Unable to set counter sample 2"); - break; - } + counters[i] = usr_data[i]; } } @@ -1884,6 +1961,8 @@ static u32 set_kernel_sample_core_type(u64 *counters, u32 sample_size; u64 *usr_data = NULL; + lockdep_assert_held(&performance_counters.access_lock); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); @@ -1900,49 +1979,70 @@ static u32 set_kernel_sample_core_type(u64 *counters, } /* Counter values injected through ioctl are of 32 bits */ -void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size) +int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size) { + unsigned long flags; + u32 *user_data; u32 offset = 0; + if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32)) + return -EINVAL; + + /* copy_from_user might sleep so can't be called from inside a spinlock + * allocate a temporary buffer for user data and copy to that before taking + * the lock + */ + user_data = kmalloc(size, GFP_KERNEL); + if (!user_data) + return -ENOMEM; + + if (copy_from_user(user_data, data, size)) { + model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace"); + kfree(user_data); + return -EINVAL; + } + + spin_lock_irqsave(&performance_counters.access_lock, flags); #if !MALI_USE_CSF - offset = set_user_sample_core_type(performance_counters.jm_counters, - usr_data, offset, usr_data_size, 1); + offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset, + size, 1); #else - offset = set_user_sample_core_type(performance_counters.cshw_counters, - usr_data, offset, usr_data_size, 1); + offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset, + size, 1); #endif /* !MALI_USE_CSF */ - offset = set_user_sample_core_type(performance_counters.tiler_counters, - usr_data, offset, usr_data_size, - hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); - offset = set_user_sample_core_type(performance_counters.l2_counters, - usr_data, offset, usr_data_size, - KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); - offset = set_user_sample_core_type(performance_counters.shader_counters, - usr_data, offset, usr_data_size, - KBASE_DUMMY_MODEL_MAX_SHADER_CORES); + offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset, + size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset, + size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); + offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset, + size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); + + kfree(user_data); + return 0; } /* Counter values injected through kutf are of 64 bits */ -void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size) +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) { + unsigned long flags; u32 offset = 0; + spin_lock_irqsave(&performance_counters.access_lock, flags); #if !MALI_USE_CSF - offset = set_kernel_sample_core_type(performance_counters.jm_counters, - usr_data, offset, usr_data_size, 1); + offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size, + 1); #else - offset = set_kernel_sample_core_type(performance_counters.cshw_counters, - usr_data, offset, usr_data_size, 1); + offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size, + 1); #endif /* !MALI_USE_CSF */ - offset = set_kernel_sample_core_type(performance_counters.tiler_counters, - usr_data, offset, usr_data_size, - hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); - offset = set_kernel_sample_core_type(performance_counters.l2_counters, - usr_data, offset, usr_data_size, - hweight64(performance_counters.l2_present)); - offset = set_kernel_sample_core_type(performance_counters.shader_counters, - usr_data, offset, usr_data_size, - hweight64(performance_counters.shader_present)); + offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset, + size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size, + hweight64(performance_counters.l2_present)); + offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset, + size, hweight64(performance_counters.shader_present)); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); @@ -1977,21 +2077,12 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); -void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, - struct tagged_addr *pages, - size_t page_count) -{ - performance_counters.prfcnt_base_cpu = base; - performance_counters.kbdev = kbdev; - performance_counters.pages = pages; - performance_counters.page_count = page_count; -} - int gpu_model_control(void *model, struct kbase_model_control_params *params) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; int i; + unsigned long flags; if (params->command == KBASE_MC_DISABLE_JOBS) { for (i = 0; i < NUM_SLOTS; i++) @@ -2000,8 +2091,10 @@ int gpu_model_control(void *model, return -EINVAL; } + spin_lock_irqsave(&hw_error_status.access_lock, flags); midgard_model_update(dummy); midgard_model_get_outputs(dummy); + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); return 0; } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h index 87690f4..8eaf1b0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h @@ -116,6 +116,8 @@ struct kbase_error_atom { /*struct to track the system error state*/ struct error_status_t { + spinlock_t access_lock; + u32 errors_mask; u32 mmu_table_level; int faulty_mmu_as; @@ -138,6 +140,20 @@ struct error_status_t { u64 as_transtab[NUM_MMU_AS]; }; +/** + * struct gpu_model_prfcnt_en - Performance counter enable masks + * @fe: Enable mask for front-end block + * @tiler: Enable mask for tiler block + * @l2: Enable mask for L2/Memory system blocks + * @shader: Enable mask for shader core blocks + */ +struct gpu_model_prfcnt_en { + u32 fe; + u32 tiler; + u32 l2; + u32 shader; +}; + void *midgard_model_create(const void *config); void midgard_model_destroy(void *h); u8 midgard_model_write_reg(void *h, u32 addr, u32 value); @@ -148,18 +164,53 @@ int job_atom_inject_error(struct kbase_error_params *params); int gpu_model_control(void *h, struct kbase_model_control_params *params); -void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size); -void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size); +/** + * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values + * @data: Userspace pointer to array of counter values + * @size: Size of counter value array + * + * Counter values set by this function will be used for one sample dump only + * after which counters will be cleared back to zero. + * + * Return: 0 on success, else error code. + */ +int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); + +/** + * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values + * @data: Pointer to array of counter values + * @size: Size of counter value array + * + * Counter values set by this function will be used for one sample dump only + * after which counters will be cleared back to zero. + */ +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); + void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, u64 *shader_present); void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, u64 shader_present); -void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, - struct tagged_addr *pages, - size_t page_count); + /* Clear the counter values array maintained by the dummy model */ void gpu_model_clear_prfcnt_values(void); +#if MALI_USE_CSF +/** + * gpu_model_prfcnt_dump_request() - Request performance counter sample dump. + * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array + * in which to store dumped performance counter values. + * @enable_maps: Physical enable maps for performance counter blocks. + */ +void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps); + +/** + * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request + * flag set. + * @model: Model pointer returned by midgard_model_create(). + */ +void gpu_model_glb_request_job_irq(void *model); +#endif /* MALI_USE_CSF */ + enum gpu_dummy_irq { GPU_DUMMY_JOB_IRQ, GPU_DUMMY_GPU_IRQ, diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 3d92251..fcf98b0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -437,8 +437,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) return; /* Stop the metrics gathering framework */ - if (kbase_pm_metrics_is_active(kbdev)) - kbase_pm_metrics_stop(kbdev); + kbase_pm_metrics_stop(kbdev); /* Keep the current freq to restore it upon resume */ kbdev->previous_frequency = clk_get_rate(clk); @@ -880,7 +879,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); #if !MALI_USE_CSF - kbase_backend_slot_update(kbdev); + kbase_backend_slot_update(kbdev); #endif /* !MALI_USE_CSF */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -990,7 +989,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; - ktime_t end_timestamp = ktime_get(); + ktime_t end_timestamp = ktime_get_raw(); struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; if (!kbdev->arb.arb_if) @@ -1065,6 +1064,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); lockdep_assert_held(&kbdev->pm.lock); +#ifdef CONFIG_MALI_DEBUG /* In case of no active CSG on slot, powering up L2 could be skipped and * proceed directly to suspend GPU. * ToDo: firmware has to be reloaded after wake-up as no halt command @@ -1074,6 +1074,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) dev_info( kbdev->dev, "No active CSGs. Can skip the power up of L2 and go for suspension directly"); +#endif ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); if (ret) { diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index 7d14be9..a4d7168 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -92,29 +92,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) * for those cores to get powered down */ if ((core_mask & old_core_mask) != old_core_mask) { - bool can_wait; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* This check is ideally not required, the wait function can - * deal with the GPU power down. But it has been added to - * address the scenario where down-scaling request comes from - * the platform specific code soon after the GPU power down - * and at the time same time application thread tries to - * power up the GPU (on the flush of GPU queue). - * The platform specific @ref callback_power_on that gets - * invoked on power up does not return until down-scaling - * request is complete. The check mitigates the race caused by - * the problem in platform specific code. - */ - if (likely(can_wait)) { - if (kbase_pm_wait_for_desired_state(kbdev)) { - dev_warn(kbdev->dev, - "Wait for update of core_mask from %llx to %llx failed", - old_core_mask, core_mask); - } + if (kbase_pm_wait_for_cores_down_scale(kbdev)) { + dev_warn(kbdev->dev, + "Wait for update of core_mask from %llx to %llx failed", + old_core_mask, core_mask); } } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index a249b1e..66ca0b6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -136,7 +136,7 @@ struct kbasep_pm_metrics { * or removed from a GPU slot. * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. - * @lock: spinlock protecting the kbasep_pm_metrics_data structure + * @lock: spinlock protecting the kbasep_pm_metrics_state structure * @platform_data: pointer to data controlled by platform specific code * @kbdev: pointer to kbase device for which metrics are collected * @values: The current values of the power management metrics. The @@ -145,7 +145,7 @@ struct kbasep_pm_metrics { * @initialized: tracks whether metrics_state has been initialized or not. * @timer: timer to regularly make DVFS decisions based on the power * management metrics. - * @timer_active: boolean indicating @timer is running + * @timer_state: atomic indicating current @timer state, on, off, or stopped. * @dvfs_last: values of the PM metrics from the last DVFS tick * @dvfs_diff: different between the current and previous PM metrics. */ @@ -169,7 +169,7 @@ struct kbasep_pm_metrics_state { #ifdef CONFIG_MALI_MIDGARD_DVFS bool initialized; struct hrtimer timer; - bool timer_active; + atomic_t timer_state; struct kbasep_pm_metrics dvfs_last; struct kbasep_pm_metrics dvfs_diff; #endif @@ -572,7 +572,7 @@ struct kbase_pm_backend_data { }; #if MALI_USE_CSF -/* CSF PM flag, signaling that the MCU CORE should be kept on */ +/* CSF PM flag, signaling that the MCU shader Core should be kept on */ #define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) /* CSF PM flag, signaling no scheduler suspension on idle groups */ #define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 52e228c..aab07c9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -656,6 +656,38 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); } + +/** + * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured + * + * @kbdev: Pointer to the device. + * + * This function is called to wait for the AS used by MCU FW to get configured + * before DB notification on MCU is enabled, as a workaround for HW issue. + */ +static void wait_mcu_as_inactive(struct kbase_device *kbdev) +{ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) + return; + + /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ + while (--max_loops && + kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & + AS_STATUS_AS_ACTIVE_INT) + ; + + if (!WARN_ON_ONCE(max_loops == 0)) + return; + + dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR); + + if (kbase_prepare_to_reset_gpu(kbdev, 0)) + kbase_reset_gpu(kbdev); +} #endif /** @@ -665,10 +697,10 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) * @kbdev: Pointer to the device * @enable: boolean indicating to enable interrupts or not * - * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled - * after L2 has been turned on when FW is controlling the power for the shader - * cores. Correspondingly, the interrupts can be re-enabled after the MCU has - * been disabled before the power down of L2. + * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on + * when FW is controlling the power for the shader cores. Correspondingly, the + * interrupts can be re-enabled after the MCU has been disabled before the + * power down of L2. */ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable) { @@ -679,15 +711,15 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS - (void)enable; /* For IFPO, we require the POWER_CHANGED_ALL interrupt to be always on */ - irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; -#else - if (enable) - irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; - else - irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE); -#endif /* CONFIG_MALI_HOST_CONTROLS_SC_RAILS */ + enable = true; +#endif + if (enable) { + irq_mask |= POWER_CHANGED_ALL; + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); + } else { + irq_mask &= ~POWER_CHANGED_ALL; + } kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); } @@ -921,7 +953,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_HALT: if (kbase_csf_firmware_mcu_halted(kbdev)) { - KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL, + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); if (kbdev->csf.firmware_hctl_core_pwr) backend->mcu_state = @@ -968,7 +1000,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_SLEEP: if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { - KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL, + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); backend->mcu_state = KBASE_MCU_IN_SLEEP; kbase_pm_enable_db_mirror_interrupt(kbdev); @@ -984,6 +1016,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_IN_SLEEP: if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { + wait_mcu_as_inactive(kbdev); KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_enable_mcu_db_notification(kbdev); @@ -994,6 +1027,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, false); backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } break; #endif @@ -1120,13 +1154,24 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #endif } +static bool need_tiler_control(struct kbase_device *kbdev) +{ +#if MALI_USE_CSF + if (kbase_pm_no_mcu_core_pwroff(kbdev)) + return true; + else + return false; +#else + return true; +#endif +} + static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; u64 l2_present = kbdev->gpu_props.curr_config.l2_present; -#if !MALI_USE_CSF u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; -#endif + bool l2_power_up_done; enum kbase_l2_core_state prev_state; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1137,24 +1182,18 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); - -#if !MALI_USE_CSF - u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER); -#endif +#ifdef CONFIG_MALI_ARBITER_SUPPORT + u64 tiler_trans = kbase_pm_get_trans_cores( + kbdev, KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_TILER); /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ if (kbase_is_gpu_removed(kbdev) -#ifdef CONFIG_MALI_ARBITER_SUPPORT || kbase_pm_is_gpu_lost(kbdev)) { -#else - ) { -#endif backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; @@ -1177,32 +1216,45 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) } break; } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* mask off ready from trans in case transitions finished * between the register reads */ l2_trans &= ~l2_ready; -#if !MALI_USE_CSF - tiler_trans &= ~tiler_ready; -#endif + prev_state = backend->l2_state; switch (backend->l2_state) { case KBASE_L2_OFF: if (kbase_pm_is_l2_desired(kbdev)) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Enable HW timer of IPA control before + * L2 cache is powered-up. + */ + kbase_ipa_control_handle_gpu_sleep_exit(kbdev); +#endif /* * Set the desired config for L2 before * powering it on */ kbase_pm_l2_config_override(kbdev); kbase_pbha_write_settings(kbdev); -#if !MALI_USE_CSF - /* L2 is required, power on. Powering on the - * tiler will also power the first L2 cache. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, - tiler_present, ACTION_PWRON); + /* If Host is controlling the power for shader + * cores, then it also needs to control the + * power for Tiler. + * Powering on the tiler will also power the + * L2 cache. + */ + if (need_tiler_control(kbdev)) { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, + ACTION_PWRON); + } else { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, + ACTION_PWRON); + } +#if !MALI_USE_CSF /* If we have more than one L2 cache then we * must power them on explicitly. */ @@ -1212,30 +1264,36 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) ACTION_PWRON); /* Clear backend slot submission kctx */ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); -#else - /* With CSF firmware, Host driver doesn't need to - * handle power management with both shader and tiler cores. - * The CSF firmware will power up the cores appropriately. - * So only power the l2 cache explicitly. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present, ACTION_PWRON); #endif backend->l2_state = KBASE_L2_PEND_ON; } break; case KBASE_L2_PEND_ON: -#if !MALI_USE_CSF - if (!l2_trans && l2_ready == l2_present && !tiler_trans - && tiler_ready == tiler_present) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, - tiler_ready); -#else + l2_power_up_done = false; if (!l2_trans && l2_ready == l2_present) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, - l2_ready); -#endif + if (need_tiler_control(kbdev)) { +#ifndef CONFIG_MALI_ARBITER_SUPPORT + u64 tiler_trans = kbase_pm_get_trans_cores( + kbdev, KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_TILER); +#endif + + tiler_trans &= ~tiler_ready; + if (!tiler_trans && tiler_ready == tiler_present) { + KBASE_KTRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, tiler_ready); + l2_power_up_done = true; + } + } else { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, + l2_ready); + l2_power_up_done = true; + } + } + if (l2_power_up_done) { /* * Ensure snoops are enabled after L2 is powered * up. Note that kbase keeps track of the snoop @@ -1431,12 +1489,26 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) /* We only need to check the L2 here - if the L2 * is off then the tiler is definitely also off. */ - if (!l2_trans && !l2_ready) + if (!l2_trans && !l2_ready) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); +#endif /* L2 is now powered off */ backend->l2_state = KBASE_L2_OFF; + } } else { - if (!kbdev->cache_clean_in_progress) + if (!kbdev->cache_clean_in_progress) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); +#endif backend->l2_state = KBASE_L2_OFF; + } } break; @@ -2293,12 +2365,14 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) /* Wait for cores */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev), + timeout); #else remaining = wait_event_timeout( -#endif kbdev->pm.backend.gpu_in_desired_state_wait, kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); +#endif if (!remaining) { kbase_pm_timed_out(kbdev); @@ -2353,6 +2427,66 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); +#if MALI_USE_CSF +/** + * core_mask_update_done - Check if downscaling of shader cores is done + * + * @kbdev: The kbase device structure for the device. + * + * This function checks if the downscaling of cores is effectively complete. + * + * Return: true if the downscale is done. + */ +static bool core_mask_update_done(struct kbase_device *kbdev) +{ + bool update_done = false; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* If MCU is in stable ON state then it implies that the downscale + * request had completed. + * If MCU is not active then it implies all cores are off, so can + * consider the downscale request as complete. + */ + if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) || + kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) + update_done = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return update_done; +} + +int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) +{ + long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); + long remaining; + int err = 0; + + /* Wait for core mask update to complete */ +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + remaining = wait_event_killable_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); +#else + remaining = wait_event_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); +#endif + + if (!remaining) { + kbase_pm_timed_out(kbdev); + err = -ETIMEDOUT; + } else if (remaining < 0) { + dev_info( + kbdev->dev, + "Wait for cores down scaling got interrupted"); + err = (int)remaining; + } + + return err; +} +#endif + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2416,14 +2550,21 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->pm.lock); mutex_lock(&kbdev->csf.reg_lock); - if (kbdev->csf.mali_file_inode) { - /* This would zap the pte corresponding to the mapping of User - * register page for all the Kbase contexts. - */ - unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, - BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, - PAGE_SIZE, 1); + + /* Only if the mappings for USER page exist, update all PTEs associated to it */ + if (kbdev->csf.nr_user_page_mapped > 0) { + if (likely(kbdev->csf.mali_file_inode)) { + /* This would zap the pte corresponding to the mapping of User + * register page for all the Kbase contexts. + */ + unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, + BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1); + } else { + dev_err(kbdev->dev, + "Device file inode not exist even if USER page previously mapped"); + } } + mutex_unlock(&kbdev->csf.reg_lock); } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index 68ded7d..cd5a6a3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -269,6 +269,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); */ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); +#if MALI_USE_CSF +/** + * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function can be called to ensure that the downscaling of cores is + * effectively complete and it would be safe to lower the voltage. + * The function assumes that caller had exercised the MCU state machine for the + * downscale request through the kbase_pm_update_state() function. + * + * This function needs to be used by the caller to safely wait for the completion + * of downscale request, instead of kbase_pm_wait_for_desired_state(). + * The downscale request would trigger a state change in MCU state machine + * and so when MCU reaches the stable ON state, it can be inferred that + * downscaling is complete. But it has been observed that the wake up of the + * waiting thread can get delayed by few milli seconds and by the time the + * thread wakes up the power down transition could have started (after the + * completion of downscale request). + * On the completion of power down transition another wake up signal would be + * sent, but again by the time thread wakes up the power up transition can begin. + * And the power up transition could then get blocked inside the platform specific + * callback_power_on() function due to the thread that called into Kbase (from the + * platform specific code) to perform the downscaling and then ended up waiting + * for the completion of downscale request. + * + * Return: 0 on success, error code on error or remaining jiffies on timeout. + */ +int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev); +#endif + /** * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state * machines after changing shader core @@ -800,7 +831,7 @@ bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) /** * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the - * MCU core powered in accordance to the active + * MCU shader Core powered in accordance to the active * power management policy * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index f85b466..2df6804 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,7 @@ */ #include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> @@ -48,27 +49,51 @@ #define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) #endif +/* + * Possible state transitions + * ON -> ON | OFF | STOPPED + * STOPPED -> ON | OFF + * OFF -> ON + * + * + * ┌─e─┐┌────────────f─────────────┐ + * │ v│ v + * └───ON ──a──> STOPPED ──b──> OFF + * ^^ │ │ + * │└──────c─────┘ │ + * │ │ + * └─────────────d─────────────┘ + * + * Transition effects: + * a. None + * b. Timer expires without restart + * c. Timer is not stopped, timer period is unaffected + * d. Timer must be restarted + * e. Callback is executed and the timer is restarted + * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during + * tear-down and should not be subject to a race from an OFF->ON transition + */ +enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON }; + #ifdef CONFIG_MALI_MIDGARD_DVFS static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) { - unsigned long flags; struct kbasep_pm_metrics_state *metrics; - KBASE_DEBUG_ASSERT(timer != NULL); + if (WARN_ON(!timer)) + return HRTIMER_NORESTART; metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); - kbase_pm_get_dvfs_action(metrics->kbdev); - spin_lock_irqsave(&metrics->lock, flags); + /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */ + if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON) + return HRTIMER_NORESTART; - if (metrics->timer_active) - hrtimer_start(timer, - HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); - - spin_unlock_irqrestore(&metrics->lock, flags); + kbase_pm_get_dvfs_action(metrics->kbdev); - return HRTIMER_NORESTART; + /* Set the new expiration time and restart (transition e) */ + hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period)); + return HRTIMER_RESTART; } #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -83,7 +108,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); kbdev->pm.backend.metrics.values.time_busy = 0; kbdev->pm.backend.metrics.values.time_idle = 0; kbdev->pm.backend.metrics.values.time_in_protm = 0; @@ -111,7 +136,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) #else KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); kbdev->pm.backend.metrics.gpu_active = false; kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; @@ -134,6 +159,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; kbdev->pm.backend.metrics.initialized = true; + atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); kbase_pm_metrics_start(kbdev); #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -152,16 +178,12 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); void kbasep_pm_metrics_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_MIDGARD_DVFS - unsigned long flags; - KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbdev->pm.backend.metrics.timer_active = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + /* Cancel the timer, and block if the callback is currently executing (transition f) */ kbdev->pm.backend.metrics.initialized = false; + atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if MALI_USE_CSF @@ -199,7 +221,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) * elapsed time. The lock taken inside kbase_ipa_control_query() * function can cause lot of variation. */ - now = ktime_get(); + now = ktime_get_raw(); if (err) { dev_err(kbdev->dev, @@ -231,12 +253,14 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) * time. */ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { - /* Use a margin value that is approximately 1% of the time - * difference. + /* The margin is scaled to allow for the worst-case + * scenario where the samples are maximally separated, + * plus a small offset for sampling errors. */ - u64 margin_ns = diff_ns >> 6; + u64 const MARGIN_NS = + IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2; - if (gpu_active_counter > (diff_ns + margin_ns)) { + if (gpu_active_counter > (diff_ns + MARGIN_NS)) { dev_info( kbdev->dev, "GPU activity takes longer than time interval: %llu ns > %llu ns", @@ -331,7 +355,7 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, #if MALI_USE_CSF kbase_pm_get_dvfs_utilisation_calc(kbdev); #else - kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); + kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw()); #endif memset(diff, 0, sizeof(*diff)); @@ -396,57 +420,33 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) { - bool isactive; - unsigned long flags; - KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - isactive = kbdev->pm.backend.metrics.timer_active; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - return isactive; + return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON; } KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); void kbase_pm_metrics_start(struct kbase_device *kbdev) { - unsigned long flags; - bool update = true; + struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics; - if (unlikely(!kbdev->pm.backend.metrics.initialized)) + if (unlikely(!metrics->initialized)) return; - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - if (!kbdev->pm.backend.metrics.timer_active) - kbdev->pm.backend.metrics.timer_active = true; - else - update = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - if (update) - hrtimer_start(&kbdev->pm.backend.metrics.timer, - HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); + /* Transition to ON, from a stopped state (transition c) */ + if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF) + /* Start the timer only if it's been fully stopped (transition d)*/ + hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); } void kbase_pm_metrics_stop(struct kbase_device *kbdev) { - unsigned long flags; - bool update = true; - if (unlikely(!kbdev->pm.backend.metrics.initialized)) return; - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - if (kbdev->pm.backend.metrics.timer_active) - kbdev->pm.backend.metrics.timer_active = false; - else - update = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - if (update) - hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + /* Timer is Stopped if its currently on (transition a) */ + atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); } @@ -512,7 +512,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); if (!timestamp) { - now = ktime_get(); + now = ktime_get_raw(); timestamp = &now; } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 5f16434..deeb1b5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -310,7 +310,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.backend.policy_change_lock); if (kbase_reset_gpu_prevent_and_wait(kbdev)) { - dev_warn(kbdev->dev, "Set PM policy failed to prevent gpu reset"); + dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset"); reset_op_prevented = false; } @@ -332,7 +332,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON * flag bit. */ - sched_suspend = kbdev->csf.firmware_inited && reset_op_prevented && + sched_suspend = reset_op_prevented && (CSF_DYNAMIC_PM_CORE_KEEP_ON & (new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags)); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index a83206a..5110e3d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,6 +21,9 @@ #include <mali_kbase.h> #include <mali_kbase_hwaccess_time.h> +#if MALI_USE_CSF +#include <csf/mali_kbase_csf_timeout.h> +#endif #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> @@ -113,13 +116,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, */ u64 timeout, nr_cycles = 0; - /* Default value to mean 'no cap' */ - u64 timeout_cap = U64_MAX; - u64 freq_khz = kbdev->lowest_gpu_freq_khz; + u64 freq_khz; + /* Only for debug messages, safe default in case it's mis-maintained */ const char *selector_str = "(unknown)"; - WARN_ON(!freq_khz); + if (WARN(!kbdev->lowest_gpu_freq_khz, + "Lowest frequency uninitialized! Using reference frequency for scaling")) { + freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; + } else { + freq_khz = kbdev->lowest_gpu_freq_khz; + } switch (selector) { case KBASE_TIMEOUT_SELECTOR_COUNT: @@ -135,16 +142,15 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, fallthrough; case CSF_FIRMWARE_TIMEOUT: selector_str = "CSF_FIRMWARE_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES; - /* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS, - * if calculated timeout exceeds it. This should be adapted to - * a direct timeout comparison once the - * FIRMWARE_PING_INTERVAL_MS option is added to this timeout - * function. A compile-time check such as BUILD_BUG_ON can also - * be done once the firmware ping interval in cycles becomes - * available as a macro. + /* Any FW timeout cannot be longer than the FW ping interval, after which + * the firmware_aliveness_monitor will be triggered and may restart + * the GPU if the FW is unresponsive. */ - timeout_cap = FIRMWARE_PING_INTERVAL_MS; + nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); + + if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) + dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", + selector_str); break; case CSF_PM_TIMEOUT: selector_str = "CSF_PM_TIMEOUT"; @@ -154,21 +160,33 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, selector_str = "CSF_GPU_RESET_TIMEOUT"; nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; break; + case CSF_CSG_SUSPEND_TIMEOUT: + selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; + nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; + break; + case CSF_FIRMWARE_BOOT_TIMEOUT: + selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; + nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; + break; + case CSF_FIRMWARE_PING_TIMEOUT: + selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; + nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; + break; + case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: + selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; + nr_cycles = kbase_csf_timeout_get(kbdev); + break; #endif } timeout = div_u64(nr_cycles, freq_khz); - if (timeout > timeout_cap) { - dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str, - (unsigned long long)timeout, (unsigned long long)timeout_cap); - timeout = timeout_cap; - } if (WARN(timeout > UINT_MAX, "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) timeout = UINT_MAX; return (unsigned int)timeout; } +KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) { |