diff options
author | Kevin DuBois <kevindubois@google.com> | 2022-11-02 21:39:17 +0000 |
---|---|---|
committer | Kevin DuBois <kevindubois@google.com> | 2022-11-02 22:39:21 +0000 |
commit | 34e635317dc2a91076ac341df3867ac3bdb31ef1 (patch) | |
tree | cf1c0e597ce1e7dcd9b276ff4d51be60c7fdca58 /mali_kbase/backend | |
parent | 6dcd9736cbf84712dd7073dab4aea256e30517c2 (diff) | |
download | gpu-34e635317dc2a91076ac341df3867ac3bdb31ef1.tar.gz |
Revert "Merge r38p1 from upstream into partner/android13-gs-pixel-5.10-tm-qpr2"
This reverts commit 6dcd9736cbf84712dd7073dab4aea256e30517c2.
Reason for revert: UMD taking too long to merge
Bug: 228779790
Change-Id: I08b861ba3cfc8b025f653ef86b0a5ec643e5b13d
Diffstat (limited to 'mali_kbase/backend')
18 files changed, 521 insertions, 797 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index ddd03ca..d6b9750 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -72,6 +72,49 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) return callbacks; } +int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev) +{ + /* Uses default reference frequency defined in below macro */ + u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; + + /* Only check lowest frequency in cases when OPPs are used and + * present in the device tree. + */ +#ifdef CONFIG_PM_OPP + struct dev_pm_opp *opp_ptr; + unsigned long found_freq = 0; + + /* find lowest frequency OPP */ + opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq); + if (IS_ERR(opp_ptr)) { + dev_err(kbdev->dev, + "No OPPs found in device tree! Scaling timeouts using %llu kHz", + (unsigned long long)lowest_freq_khz); + } else { +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE + dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */ +#endif + /* convert found frequency to KHz */ + found_freq /= 1000; + + /* If lowest frequency in OPP table is still higher + * than the reference, then keep the reference frequency + * as the one to use for scaling . + */ + if (found_freq < lowest_freq_khz) + lowest_freq_khz = found_freq; + } +#else + dev_err(kbdev->dev, + "No operating-points-v2 node or operating-points property in DT"); +#endif + + kbdev->lowest_gpu_freq_khz = lowest_freq_khz; + dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", + kbdev->lowest_gpu_freq_khz); + return 0; +} + static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h index 35b3b8d..a6ee959 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,6 +61,20 @@ struct kbase_clk_data { int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); /** + * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can + * run as using the device tree, and save this + * within kbdev. + * @kbdev: Pointer to kbase device. + * + * This function could be called from kbase_clk_rate_trace_manager_init, + * but is left separate as it can be called as soon as + * dev_pm_opp_of_add_table() has been called to initialize the OPP table. + * + * Return: 0 in any case. + */ +int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev); + +/** * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 09c1863..00b32b9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,7 +57,7 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); if (IS_ERR_OR_NULL(opp)) - dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); + dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); else { voltage = dev_pm_opp_get_voltage(opp); #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE @@ -133,8 +133,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) rcu_read_unlock(); #endif if (IS_ERR_OR_NULL(opp)) { - dev_err(dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); - return IS_ERR(opp) ? PTR_ERR(opp) : -ENODEV; + dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); } #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE dev_pm_opp_put(opp); @@ -317,7 +317,6 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, dp->max_state = i; - /* Have the lowest clock as suspend clock. * It may be overridden by 'opp-mali-errata-1485982'. */ @@ -637,7 +636,6 @@ int kbase_devfreq_init(struct kbase_device *kbdev) struct devfreq_dev_profile *dp; int err; unsigned int i; - bool free_devfreq_freq_table = true; if (kbdev->nr_clocks == 0) { dev_err(kbdev->dev, "Clock not available for devfreq\n"); @@ -671,35 +669,32 @@ int kbase_devfreq_init(struct kbase_device *kbdev) dp->freq_table[0] / 1000; } -#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) - err = kbase_ipa_init(kbdev); + err = kbase_devfreq_init_core_mask_table(kbdev); if (err) { - dev_err(kbdev->dev, "IPA initialization failed"); - goto ipa_init_failed; + kbase_devfreq_term_freq_table(kbdev); + return err; } -#endif - - err = kbase_devfreq_init_core_mask_table(kbdev); - if (err) - goto init_core_mask_table_failed; kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); kbdev->devfreq = NULL; - dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); - goto devfreq_add_dev_failed; + kbase_devfreq_term_core_mask_table(kbdev); + kbase_devfreq_term_freq_table(kbdev); + dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err); + return err; } - /* Explicit free of freq table isn't needed after devfreq_add_device() */ - free_devfreq_freq_table = false; - /* Initialize devfreq suspend/resume workqueue */ err = kbase_devfreq_work_init(kbdev); if (err) { - dev_err(kbdev->dev, "Fail to init devfreq workqueue"); - goto devfreq_work_init_failed; + if (devfreq_remove_device(kbdev->devfreq)) + dev_err(kbdev->dev, "Fail to rm devfreq\n"); + kbdev->devfreq = NULL; + kbase_devfreq_term_core_mask_table(kbdev); + dev_err(kbdev->dev, "Fail to init devfreq workqueue\n"); + return err; } /* devfreq_add_device only copies a few of kbdev->dev's fields, so @@ -710,20 +705,26 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); if (err) { dev_err(kbdev->dev, - "Failed to register OPP notifier (%d)", err); + "Failed to register OPP notifier (%d)\n", err); goto opp_notifier_failed; } #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + err = kbase_ipa_init(kbdev); + if (err) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + goto ipa_init_failed; + } + kbdev->devfreq_cooling = of_devfreq_cooling_register_power( kbdev->dev->of_node, kbdev->devfreq, &kbase_ipa_power_model_ops); if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { - err = PTR_ERR_OR_ZERO(kbdev->devfreq_cooling); + err = PTR_ERR(kbdev->devfreq_cooling); dev_err(kbdev->dev, - "Failed to register cooling device (%d)", err); - err = err == 0 ? -ENODEV : err; + "Failed to register cooling device (%d)\n", + err); goto cooling_reg_failed; } #endif @@ -732,29 +733,21 @@ int kbase_devfreq_init(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) cooling_reg_failed: + kbase_ipa_term(kbdev); +ipa_init_failed: devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); #endif /* CONFIG_DEVFREQ_THERMAL */ opp_notifier_failed: kbase_devfreq_work_term(kbdev); -devfreq_work_init_failed: if (devfreq_remove_device(kbdev->devfreq)) - dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err); + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); kbdev->devfreq = NULL; -devfreq_add_dev_failed: kbase_devfreq_term_core_mask_table(kbdev); -init_core_mask_table_failed: -#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) - kbase_ipa_term(kbdev); -ipa_init_failed: -#endif - if (free_devfreq_freq_table) - kbase_devfreq_term_freq_table(kbdev); - return err; } @@ -767,6 +760,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) if (kbdev->devfreq_cooling) devfreq_cooling_unregister(kbdev->devfreq_cooling); + + kbase_ipa_term(kbdev); #endif devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); @@ -780,8 +775,4 @@ void kbase_devfreq_term(struct kbase_device *kbdev) kbdev->devfreq = NULL; kbase_devfreq_term_core_mask_table(kbdev); - -#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) - kbase_ipa_term(kbdev); -#endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 10e92ec..0ea14bc 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,7 +40,19 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, registers.l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); - + registers.core_features = 0; +#if !MALI_USE_CSF + /* TGOx */ + registers.core_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CORE_FEATURES)); +#else /* !MALI_USE_CSF */ + if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TDUX) || + ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TODX))) + registers.core_features = + kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); +#endif /* MALI_USE_CSF */ registers.tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES)); registers.mem_features = kbase_reg_read(kbdev, @@ -158,11 +170,6 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, regdump->coherency_features = coherency_features; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) - regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); - else - regdump->core_features = 0; - kbase_pm_register_access_disable(kbdev); return error; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index b89b917..0ece571 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,20 +29,6 @@ #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_instr_internal.h> -static int wait_prfcnt_ready(struct kbase_device *kbdev) -{ - u32 loops; - - for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { - const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_PRFCNT_ACTIVE; - if (!prfcnt_active) - return 0; - } - - dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); - return -EBUSY; -} int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, @@ -57,20 +43,20 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* alignment failure */ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) - return err; + goto out_err; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; + goto out_err; } if (kbase_is_gpu_removed(kbdev)) { /* GPU has been removed by Arbiter */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; + goto out_err; } /* Enable interrupt */ @@ -95,19 +81,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #endif - /* Wait until prfcnt config register can be written */ - err = wait_prfcnt_ready(kbdev); - if (err) - return err; - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_OFF); - /* Wait until prfcnt is disabled before writing configuration registers */ - err = wait_prfcnt_ready(kbdev); - if (err) - return err; - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), enable->dump_buffer & 0xFFFFFFFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -135,8 +111,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + err = 0; + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); - return 0; + return err; + out_err: + return err; } static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) @@ -155,10 +135,7 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); - /* Wait until prfcnt config register can be written, then disable the counters. - * Return value is ignored as we are disabling anyway. - */ - wait_prfcnt_ready(kbdev); + /* Disable the counters */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); kbdev->hwcnt.kctx = NULL; @@ -169,6 +146,7 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; + int err = -EINVAL; struct kbase_device *kbdev = kctx->kbdev; while (1) { @@ -189,14 +167,14 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - return -EINVAL; + return err; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - return -EINVAL; + return err; } if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) @@ -255,11 +233,6 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; - /* Wait until prfcnt is ready to request dump */ - err = wait_prfcnt_ready(kbdev); - if (err) - goto unlock; - /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF); @@ -275,8 +248,11 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); + err = 0; + unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); @@ -370,24 +346,21 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) - goto unlock; + goto out; if (kbase_is_gpu_removed(kbdev)) { /* GPU has been removed by Arbiter */ - goto unlock; + goto out; } - /* Wait until prfcnt is ready to clear */ - err = wait_prfcnt_ready(kbdev); - if (err) - goto unlock; - /* Clear the counters */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR); -unlock: + err = 0; + +out: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 20905f7..32bdf72 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -191,7 +191,9 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js) { struct kbase_context *kctx; u32 cfg; @@ -200,13 +202,13 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; lockdep_assert_held(&kbdev->hwaccess_lock); + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(katom); kctx = katom->kctx; /* Command register must be available */ - if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx), - "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx)) - return -EPERM; + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, (void *)katom); @@ -279,7 +281,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. */ - katom->start_timestamp = ktime_get_raw(); + katom->start_timestamp = ktime_get(); /* GO ! */ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", @@ -327,8 +329,6 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_START); - - return 0; } /** @@ -393,9 +393,11 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) lockdep_assert_held(&kbdev->hwaccess_lock); + KBASE_DEBUG_ASSERT(kbdev); + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); - end_timestamp = ktime_get_raw(); + end_timestamp = ktime_get(); while (done) { u32 failed = done >> 16; @@ -407,8 +409,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * numbered interrupts before the higher numbered ones. */ i = ffs(finished) - 1; - if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__)) - break; + KBASE_DEBUG_ASSERT(i >= 0); do { int nr_done; @@ -589,7 +590,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) failed = done >> 16; finished = (done & 0xFFFF) | failed; if (done) - end_timestamp = ktime_get_raw(); + end_timestamp = ktime_get(); } while (finished & (1 << i)); kbasep_job_slot_update_head_start_timestamp(kbdev, i, @@ -618,7 +619,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, u64 job_in_head_before; u32 status_reg_after; - WARN_ON(action & (~JS_COMMAND_MASK)); + KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); /* Check the head pointer */ job_in_head_before = ((u64) kbase_reg_read(kbdev, @@ -696,8 +697,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); break; default: - WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, - (void *)target_katom, (void *)target_katom->kctx); + BUG(); break; } } else { @@ -726,8 +726,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); break; default: - WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, - (void *)target_katom, (void *)target_katom->kctx); + BUG(); break; } } @@ -753,7 +752,9 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, int i; bool stop_sent = false; + KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -933,11 +934,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); - if (sw_flags & JS_COMMAND_MASK) { - WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, - target_katom ? (void *)target_katom->kctx : NULL, sw_flags); - sw_flags &= ~((u32)JS_COMMAND_MASK); - } + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, JS_COMMAND_SOFT_STOP | sw_flags); } @@ -1055,14 +1052,17 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags; struct kbase_device *kbdev; - ktime_t end_timestamp = ktime_get_raw(); + ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; bool silent = false; u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + KBASE_DEBUG_ASSERT(data); + kbdev = container_of(data, struct kbase_device, hwaccess.backend.reset_work); + KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == @@ -1097,7 +1097,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) return; } - WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__); + KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); spin_lock(&kbdev->mmu_mask_change); @@ -1138,8 +1138,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ - if (unlikely(!kbdev->pm.backend.pm_current_policy)) - dev_warn(kbdev->dev, "No power policy set!"); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); /* All slot have been soft-stopped and we've waited * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we @@ -1236,6 +1235,8 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) struct kbase_device *kbdev = container_of(timer, struct kbase_device, hwaccess.backend.reset_timer); + KBASE_DEBUG_ASSERT(kbdev); + /* Reset still pending? */ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == @@ -1256,6 +1257,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) int i; int pending_jobs = 0; + KBASE_DEBUG_ASSERT(kbdev); + /* Count the number of jobs */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); @@ -1313,6 +1316,8 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, { int i; + KBASE_DEBUG_ASSERT(kbdev); + #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { /* GPU access has been removed, reset will be done by @@ -1366,11 +1371,13 @@ KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); */ void kbase_reset_gpu(struct kbase_device *kbdev) { + KBASE_DEBUG_ASSERT(kbdev); + /* Note this is an assert/atomic_set because it is a software issue for * a race to be occurring here */ - if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) - return; + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); @@ -1388,11 +1395,13 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu); void kbase_reset_gpu_locked(struct kbase_device *kbdev) { + KBASE_DEBUG_ASSERT(kbdev); + /* Note this is an assert/atomic_set because it is a software issue for * a race to be occurring here */ - if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) - return; + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 1ebb843..1039e85 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -76,6 +76,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, } #endif + /** * kbase_job_hw_submit() - Submit a job to the GPU * @kbdev: Device pointer @@ -87,10 +88,10 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, * * The following locking conditions are made on the caller: * - it must hold the hwaccess_lock - * - * Return: 0 if the job was successfully submitted to hardware, an error otherwise. */ -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); #if !MALI_USE_CSF /** diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 4fe8046..48d1de8 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -346,35 +346,16 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; - - /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means - * one of two events prevented it from progressing to the next state and - * ultimately reach protected mode: - * - hwcnts were enabled, and the atom had to schedule a worker to - * disable them. - * - the hwcnts were already disabled, but some other error occurred. - * In the first case, if the worker has not yet completed - * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable - * them and signal to the worker they have already been enabled - */ - if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - } - /* If the atom has suspended hwcnt but has not yet entered * protected mode, then resume hwcnt now. If the GPU is now in * protected mode then hwcnt will be resumed by GPU reset so * don't resume it here. */ if (kbase_jd_katom_is_protected(katom) && - ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || - (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || - (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { + ((katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { @@ -525,14 +506,17 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); if (err) { /* - * Failed to switch into protected mode. - * - * At this point we expect: - * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && - * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED - * ==> - * kbdev->protected_mode_hwcnt_disabled = false + * Failed to switch into protected mode, resume + * GPU hwcnt and fail atom. */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* @@ -552,9 +536,12 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, /* * Protected mode sanity checks. */ - WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -964,6 +951,18 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) cores_ready = kbase_pm_cores_requested(kbdev, true); + if (katom[idx]->event_code == + BASE_JD_EVENT_PM_EVENT) { + KBASE_KTRACE_ADD_JM_SLOT_INFO( + kbdev, JM_MARK_FOR_RETURN_TO_JS, + katom[idx]->kctx, katom[idx], + katom[idx]->jc, js, + katom[idx]->event_code); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_RETURN_TO_JS; + break; + } + if (!cores_ready) break; @@ -1012,10 +1011,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); - if (!kbase_job_hw_submit(kbdev, katom[idx], js)) - katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; - else - break; + kbase_job_hw_submit(kbdev, katom[idx], js); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_SUBMITTED; kbasep_platform_event_work_begin(katom[idx]); @@ -1348,9 +1346,11 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } else { char js_string[16]; - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, - sizeof(js_string)), - ktime_to_ns(ktime_get_raw()), 0, 0, 0); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(ktime_get()), 0, 0, + 0); } } #endif @@ -1406,14 +1406,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { /* protected mode sanity checks */ - WARN(kbase_jd_katom_is_protected(katom) != - kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom), - kbase_gpu_in_protected_mode(kbdev)); - WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && - kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_protected(katom) && js == 0) || + !kbase_jd_katom_is_protected(katom), + "Protected atom on JS%d not supported", js); } if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && !kbase_ctx_flag(katom->kctx, KCTX_DYING)) @@ -1804,9 +1804,11 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req) { if (!kbdev->pm.active_count) { - kbase_pm_lock(kbdev); + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); kbase_pm_update_active(kbdev); - kbase_pm_unlock(kbdev); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); } } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 961a951..603ffcf 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,7 +80,6 @@ static bool ipa_control_timer_enabled; #endif #define LO_MASK(M) ((M) & 0xFFFFFFFF) -#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) static u32 get_implementation_register(u32 reg) { @@ -105,15 +104,20 @@ static u32 get_implementation_register(u32 reg) } struct { - spinlock_t access_lock; -#if !MALI_USE_CSF unsigned long prfcnt_base; -#endif /* !MALI_USE_CSF */ u32 *prfcnt_base_cpu; + struct kbase_device *kbdev; + struct tagged_addr *pages; + size_t page_count; u32 time; - struct gpu_model_prfcnt_en prfcnt_en; + struct { + u32 jm; + u32 tiler; + u32 l2; + u32 shader; + } prfcnt_en; u64 l2_present; u64 shader_present; @@ -177,9 +181,7 @@ struct control_reg_values_t { struct dummy_model_t { int reset_completed; int reset_completed_mask; -#if !MALI_USE_CSF int prfcnt_sample_completed; -#endif /* !MALI_USE_CSF */ int power_changed_mask; /* 2bits: _ALL,_SINGLE */ int power_changed; /* 1bit */ bool clean_caches_completed; @@ -462,7 +464,6 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 event_index; u64 value = 0; u32 core; - unsigned long flags; if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) return 0; @@ -486,8 +487,6 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, event_index -= 4; - spin_lock_irqsave(&performance_counters.access_lock, flags); - switch (core_type) { case KBASE_IPA_CORE_TYPE_CSHW: core_count = 1; @@ -515,46 +514,28 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; } - spin_unlock_irqrestore(&performance_counters.access_lock, flags); - if (is_low_word) return (value & U32_MAX); else return (value >> 32); } -#endif /* MALI_USE_CSF */ -/** - * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values - * - * Sets all performance counter values to zero. The performance counter access - * lock must be held when calling this function. - */ -static void gpu_model_clear_prfcnt_values_nolock(void) -{ - lockdep_assert_held(&performance_counters.access_lock); -#if !MALI_USE_CSF - memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters)); -#else - memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters)); -#endif /* !MALI_USE_CSF */ - memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters)); - memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters)); - memset(performance_counters.shader_counters, 0, - sizeof(performance_counters.shader_counters)); -} - -#if MALI_USE_CSF void gpu_model_clear_prfcnt_values(void) { - unsigned long flags; + memset(performance_counters.cshw_counters, 0, + sizeof(performance_counters.cshw_counters)); - spin_lock_irqsave(&performance_counters.access_lock, flags); - gpu_model_clear_prfcnt_values_nolock(); - spin_unlock_irqrestore(&performance_counters.access_lock, flags); + memset(performance_counters.tiler_counters, 0, + sizeof(performance_counters.tiler_counters)); + + memset(performance_counters.l2_counters, 0, + sizeof(performance_counters.l2_counters)); + + memset(performance_counters.shader_counters, 0, + sizeof(performance_counters.shader_counters)); } KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); -#endif /* MALI_USE_CSF */ +#endif /** * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer @@ -564,20 +545,17 @@ KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); * @block_count: Number of blocks to dump * @prfcnt_enable_mask: Counter enable mask * @blocks_present: Available blocks bit mask - * - * The performance counter access lock must be held before calling this - * function. */ -static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count, - u32 prfcnt_enable_mask, u64 blocks_present) +static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, + u32 block_count, + u32 prfcnt_enable_mask, + u64 blocks_present) { u32 block_idx, counter; u32 counter_value = 0; u32 *prfcnt_base; u32 index = 0; - lockdep_assert_held(&performance_counters.access_lock); - prfcnt_base = performance_counters.prfcnt_base_cpu; for (block_idx = 0; block_idx < block_count; block_idx++) { @@ -616,18 +594,35 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_ } } -static void gpu_model_dump_nolock(void) +/** + * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values + * + * Used to ensure counter values are not lost if cache invalidation is performed + * prior to reading. + */ +static void gpu_model_sync_dummy_prfcnt(void) { - u32 index = 0; + int i; + struct page *pg; + + for (i = 0; i < performance_counters.page_count; i++) { + pg = as_page(performance_counters.pages[i]); + kbase_sync_single_for_device(performance_counters.kbdev, + kbase_dma_addr(pg), PAGE_SIZE, + DMA_BIDIRECTIONAL); + } +} - lockdep_assert_held(&performance_counters.access_lock); +static void midgard_model_dump_prfcnt(void) +{ + u32 index = 0; #if !MALI_USE_CSF - gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1, - performance_counters.prfcnt_en.fe, 0x1); + gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, + 1, 0xffffffff, 0x1); #else - gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, - performance_counters.prfcnt_en.fe, 0x1); + gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, + 1, 0xffffffff, 0x1); #endif /* !MALI_USE_CSF */ gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, &index, 1, @@ -642,48 +637,12 @@ static void gpu_model_dump_nolock(void) performance_counters.prfcnt_en.shader, performance_counters.shader_present); - /* Counter values are cleared after each dump */ - gpu_model_clear_prfcnt_values_nolock(); + gpu_model_sync_dummy_prfcnt(); /* simulate a 'long' time between samples */ performance_counters.time += 10; } -#if !MALI_USE_CSF -static void midgard_model_dump_prfcnt(void) -{ - unsigned long flags; - - spin_lock_irqsave(&performance_counters.access_lock, flags); - gpu_model_dump_nolock(); - spin_unlock_irqrestore(&performance_counters.access_lock, flags); -} -#else -void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps) -{ - unsigned long flags; - - if (WARN_ON(!sample_buf)) - return; - - spin_lock_irqsave(&performance_counters.access_lock, flags); - performance_counters.prfcnt_base_cpu = sample_buf; - performance_counters.prfcnt_en = enable_maps; - gpu_model_dump_nolock(); - spin_unlock_irqrestore(&performance_counters.access_lock, flags); -} - -void gpu_model_glb_request_job_irq(void *model) -{ - unsigned long flags; - - spin_lock_irqsave(&hw_error_status.access_lock, flags); - hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; - spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); -} -#endif /* !MALI_USE_CSF */ - static void init_register_statuses(struct dummy_model_t *dummy) { int i; @@ -714,8 +673,6 @@ static void init_register_statuses(struct dummy_model_t *dummy) static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) { - lockdep_assert_held(&hw_error_status.access_lock); - if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { if (job_slot == hw_error_status.current_job_slot) { #if !MALI_USE_CSF @@ -965,7 +922,6 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) { int i; - lockdep_assert_held(&hw_error_status.access_lock); pr_debug("%s", "Updating the JS_ACTIVE register"); for (i = 0; i < NUM_SLOTS; i++) { @@ -1034,9 +990,6 @@ void *midgard_model_create(const void *config) { struct dummy_model_t *dummy = NULL; - spin_lock_init(&hw_error_status.access_lock); - spin_lock_init(&performance_counters.access_lock); - dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); if (dummy) { @@ -1056,18 +1009,14 @@ static void midgard_model_get_outputs(void *h) { struct dummy_model_t *dummy = (struct dummy_model_t *)h; - lockdep_assert_held(&hw_error_status.access_lock); - if (hw_error_status.job_irq_status) gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || hw_error_status.gpu_error_irq || -#if !MALI_USE_CSF - dummy->prfcnt_sample_completed || -#endif - (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) + (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) || + dummy->prfcnt_sample_completed) gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) @@ -1079,8 +1028,6 @@ static void midgard_model_update(void *h) struct dummy_model_t *dummy = (struct dummy_model_t *)h; int i; - lockdep_assert_held(&hw_error_status.access_lock); - for (i = 0; i < NUM_SLOTS; i++) { if (!dummy->slots[i].job_active) continue; @@ -1127,8 +1074,6 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) { int i; - lockdep_assert_held(&hw_error_status.access_lock); - for (i = 0; i < NUM_SLOTS; i++) { if (dummy->slots[i].job_active) { hw_error_status.job_irq_rawstat |= (1 << (16 + i)); @@ -1140,11 +1085,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) u8 midgard_model_write_reg(void *h, u32 addr, u32 value) { - unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; - - spin_lock_irqsave(&hw_error_status.access_lock, flags); - #if !MALI_USE_CSF if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { @@ -1247,10 +1188,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) if (value & (1 << 17)) dummy->clean_caches_completed = false; -#if !MALI_USE_CSF - if (value & PRFCNT_SAMPLE_COMPLETED) + if (value & (1 << 16)) dummy->prfcnt_sample_completed = 0; -#endif /* !MALI_USE_CSF */ + /*update error status */ hw_error_status.gpu_error_irq &= ~(value); } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { @@ -1274,11 +1214,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) pr_debug("clean caches requested"); dummy->clean_caches_completed = true; break; -#if !MALI_USE_CSF case GPU_COMMAND_PRFCNT_SAMPLE: midgard_model_dump_prfcnt(); dummy->prfcnt_sample_completed = 1; -#endif /* !MALI_USE_CSF */ default: break; } @@ -1408,24 +1346,20 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) mem_addr_space, addr, value); break; } - } else { + } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) && + addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) { switch (addr) { -#if !MALI_USE_CSF case PRFCNT_BASE_LO: - performance_counters.prfcnt_base = - HI_MASK(performance_counters.prfcnt_base) | value; - performance_counters.prfcnt_base_cpu = - (u32 *)(uintptr_t)performance_counters.prfcnt_base; + performance_counters.prfcnt_base |= value; break; case PRFCNT_BASE_HI: - performance_counters.prfcnt_base = - LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32); - performance_counters.prfcnt_base_cpu = - (u32 *)(uintptr_t)performance_counters.prfcnt_base; + performance_counters.prfcnt_base |= ((u64) value) << 32; break; +#if !MALI_USE_CSF case PRFCNT_JM_EN: - performance_counters.prfcnt_en.fe = value; + performance_counters.prfcnt_en.jm = value; break; +#endif /* !MALI_USE_CSF */ case PRFCNT_SHADER_EN: performance_counters.prfcnt_en.shader = value; break; @@ -1435,7 +1369,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) case PRFCNT_MMU_L2_EN: performance_counters.prfcnt_en.l2 = value; break; -#endif /* !MALI_USE_CSF */ + } + } else { + switch (addr) { case TILER_PWRON_LO: dummy->power_on |= (value & 1) << 1; /* Also ensure L2 is powered on */ @@ -1480,7 +1416,6 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) case PWR_OVERRIDE0: #if !MALI_USE_CSF case JM_CONFIG: - case PRFCNT_CONFIG: #else /* !MALI_USE_CSF */ case CSF_CONFIG: #endif /* !MALI_USE_CSF */ @@ -1499,18 +1434,13 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) midgard_model_update(dummy); midgard_model_get_outputs(dummy); - spin_unlock_irqrestore(&hw_error_status.access_lock, flags); return 1; } u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) { - unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; - - spin_lock_irqsave(&hw_error_status.access_lock, flags); - *value = 0; /* 0 by default */ #if !MALI_USE_CSF if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { @@ -1545,31 +1475,24 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) #endif /* !MALI_USE_CSF */ else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | - ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | - (dummy->power_changed_mask << 9) | (1 << 7) | 1; + (dummy->power_changed_mask << 9) | (1 << 7) | 1; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | (dummy->reset_completed << 8) | -#if !MALI_USE_CSF - (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -#endif /* !MALI_USE_CSF */ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | - hw_error_status.gpu_error_irq; + (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | -#if !MALI_USE_CSF - (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -#endif /* !MALI_USE_CSF */ (((dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ? 1u : 0u) << 17) | - hw_error_status.gpu_error_irq; + (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { *value = 0; @@ -1904,8 +1827,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, is_low_word); - } else if (addr == USER_REG(LATEST_FLUSH)) { - *value = 0; } #endif else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { @@ -1919,20 +1840,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = 0; } - spin_unlock_irqrestore(&hw_error_status.access_lock, flags); CSTD_UNUSED(dummy); return 1; } -static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, - u32 usr_data_size, u32 core_count) +static u32 set_user_sample_core_type(u64 *counters, + u32 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) { u32 sample_size; u32 *usr_data = NULL; - lockdep_assert_held(&performance_counters.access_lock); - sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); @@ -1947,7 +1866,11 @@ static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr u32 i; for (i = 0; i < loop_cnt; i++) { - counters[i] = usr_data[i]; + if (copy_from_user(&counters[i], &usr_data[i], + sizeof(u32))) { + model_error_log(KBASE_CORE, "Unable to set counter sample 2"); + break; + } } } @@ -1961,8 +1884,6 @@ static u32 set_kernel_sample_core_type(u64 *counters, u32 sample_size; u64 *usr_data = NULL; - lockdep_assert_held(&performance_counters.access_lock); - sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); @@ -1979,70 +1900,49 @@ static u32 set_kernel_sample_core_type(u64 *counters, } /* Counter values injected through ioctl are of 32 bits */ -int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size) +void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size) { - unsigned long flags; - u32 *user_data; u32 offset = 0; - if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32)) - return -EINVAL; - - /* copy_from_user might sleep so can't be called from inside a spinlock - * allocate a temporary buffer for user data and copy to that before taking - * the lock - */ - user_data = kmalloc(size, GFP_KERNEL); - if (!user_data) - return -ENOMEM; - - if (copy_from_user(user_data, data, size)) { - model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace"); - kfree(user_data); - return -EINVAL; - } - - spin_lock_irqsave(&performance_counters.access_lock, flags); #if !MALI_USE_CSF - offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset, - size, 1); + offset = set_user_sample_core_type(performance_counters.jm_counters, + usr_data, offset, usr_data_size, 1); #else - offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset, - size, 1); + offset = set_user_sample_core_type(performance_counters.cshw_counters, + usr_data, offset, usr_data_size, 1); #endif /* !MALI_USE_CSF */ - offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset, - size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); - offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset, - size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); - offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset, - size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES); - spin_unlock_irqrestore(&performance_counters.access_lock, flags); - - kfree(user_data); - return 0; + offset = set_user_sample_core_type(performance_counters.tiler_counters, + usr_data, offset, usr_data_size, + hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_user_sample_core_type(performance_counters.l2_counters, + usr_data, offset, usr_data_size, + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); + offset = set_user_sample_core_type(performance_counters.shader_counters, + usr_data, offset, usr_data_size, + KBASE_DUMMY_MODEL_MAX_SHADER_CORES); } /* Counter values injected through kutf are of 64 bits */ -void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size) { - unsigned long flags; u32 offset = 0; - spin_lock_irqsave(&performance_counters.access_lock, flags); #if !MALI_USE_CSF - offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size, - 1); + offset = set_kernel_sample_core_type(performance_counters.jm_counters, + usr_data, offset, usr_data_size, 1); #else - offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size, - 1); + offset = set_kernel_sample_core_type(performance_counters.cshw_counters, + usr_data, offset, usr_data_size, 1); #endif /* !MALI_USE_CSF */ - offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset, - size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); - offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size, - hweight64(performance_counters.l2_present)); - offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset, - size, hweight64(performance_counters.shader_present)); - spin_unlock_irqrestore(&performance_counters.access_lock, flags); + offset = set_kernel_sample_core_type(performance_counters.tiler_counters, + usr_data, offset, usr_data_size, + hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_kernel_sample_core_type(performance_counters.l2_counters, + usr_data, offset, usr_data_size, + hweight64(performance_counters.l2_present)); + offset = set_kernel_sample_core_type(performance_counters.shader_counters, + usr_data, offset, usr_data_size, + hweight64(performance_counters.shader_present)); } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); @@ -2077,12 +1977,21 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); +void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, + struct tagged_addr *pages, + size_t page_count) +{ + performance_counters.prfcnt_base_cpu = base; + performance_counters.kbdev = kbdev; + performance_counters.pages = pages; + performance_counters.page_count = page_count; +} + int gpu_model_control(void *model, struct kbase_model_control_params *params) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; int i; - unsigned long flags; if (params->command == KBASE_MC_DISABLE_JOBS) { for (i = 0; i < NUM_SLOTS; i++) @@ -2091,10 +2000,8 @@ int gpu_model_control(void *model, return -EINVAL; } - spin_lock_irqsave(&hw_error_status.access_lock, flags); midgard_model_update(dummy); midgard_model_get_outputs(dummy); - spin_unlock_irqrestore(&hw_error_status.access_lock, flags); return 0; } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h index 8eaf1b0..87690f4 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h @@ -116,8 +116,6 @@ struct kbase_error_atom { /*struct to track the system error state*/ struct error_status_t { - spinlock_t access_lock; - u32 errors_mask; u32 mmu_table_level; int faulty_mmu_as; @@ -140,20 +138,6 @@ struct error_status_t { u64 as_transtab[NUM_MMU_AS]; }; -/** - * struct gpu_model_prfcnt_en - Performance counter enable masks - * @fe: Enable mask for front-end block - * @tiler: Enable mask for tiler block - * @l2: Enable mask for L2/Memory system blocks - * @shader: Enable mask for shader core blocks - */ -struct gpu_model_prfcnt_en { - u32 fe; - u32 tiler; - u32 l2; - u32 shader; -}; - void *midgard_model_create(const void *config); void midgard_model_destroy(void *h); u8 midgard_model_write_reg(void *h, u32 addr, u32 value); @@ -164,53 +148,18 @@ int job_atom_inject_error(struct kbase_error_params *params); int gpu_model_control(void *h, struct kbase_model_control_params *params); -/** - * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values - * @data: Userspace pointer to array of counter values - * @size: Size of counter value array - * - * Counter values set by this function will be used for one sample dump only - * after which counters will be cleared back to zero. - * - * Return: 0 on success, else error code. - */ -int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); - -/** - * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values - * @data: Pointer to array of counter values - * @size: Size of counter value array - * - * Counter values set by this function will be used for one sample dump only - * after which counters will be cleared back to zero. - */ -void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); - +void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size); +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size); void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, u64 *shader_present); void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, u64 shader_present); - +void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, + struct tagged_addr *pages, + size_t page_count); /* Clear the counter values array maintained by the dummy model */ void gpu_model_clear_prfcnt_values(void); -#if MALI_USE_CSF -/** - * gpu_model_prfcnt_dump_request() - Request performance counter sample dump. - * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array - * in which to store dumped performance counter values. - * @enable_maps: Physical enable maps for performance counter blocks. - */ -void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps); - -/** - * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request - * flag set. - * @model: Model pointer returned by midgard_model_create(). - */ -void gpu_model_glb_request_job_irq(void *model); -#endif /* MALI_USE_CSF */ - enum gpu_dummy_irq { GPU_DUMMY_JOB_IRQ, GPU_DUMMY_GPU_IRQ, diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index fcf98b0..3d92251 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -437,7 +437,8 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) return; /* Stop the metrics gathering framework */ - kbase_pm_metrics_stop(kbdev); + if (kbase_pm_metrics_is_active(kbdev)) + kbase_pm_metrics_stop(kbdev); /* Keep the current freq to restore it upon resume */ kbdev->previous_frequency = clk_get_rate(clk); @@ -879,7 +880,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); #if !MALI_USE_CSF - kbase_backend_slot_update(kbdev); + kbase_backend_slot_update(kbdev); #endif /* !MALI_USE_CSF */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -989,7 +990,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; - ktime_t end_timestamp = ktime_get_raw(); + ktime_t end_timestamp = ktime_get(); struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; if (!kbdev->arb.arb_if) @@ -1064,7 +1065,6 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); lockdep_assert_held(&kbdev->pm.lock); -#ifdef CONFIG_MALI_DEBUG /* In case of no active CSG on slot, powering up L2 could be skipped and * proceed directly to suspend GPU. * ToDo: firmware has to be reloaded after wake-up as no halt command @@ -1074,7 +1074,6 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) dev_info( kbdev->dev, "No active CSGs. Can skip the power up of L2 and go for suspension directly"); -#endif ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); if (ret) { diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index a4d7168..7d14be9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -92,10 +92,29 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) * for those cores to get powered down */ if ((core_mask & old_core_mask) != old_core_mask) { - if (kbase_pm_wait_for_cores_down_scale(kbdev)) { - dev_warn(kbdev->dev, - "Wait for update of core_mask from %llx to %llx failed", - old_core_mask, core_mask); + bool can_wait; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* This check is ideally not required, the wait function can + * deal with the GPU power down. But it has been added to + * address the scenario where down-scaling request comes from + * the platform specific code soon after the GPU power down + * and at the time same time application thread tries to + * power up the GPU (on the flush of GPU queue). + * The platform specific @ref callback_power_on that gets + * invoked on power up does not return until down-scaling + * request is complete. The check mitigates the race caused by + * the problem in platform specific code. + */ + if (likely(can_wait)) { + if (kbase_pm_wait_for_desired_state(kbdev)) { + dev_warn(kbdev->dev, + "Wait for update of core_mask from %llx to %llx failed", + old_core_mask, core_mask); + } } } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 66ca0b6..a249b1e 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -136,7 +136,7 @@ struct kbasep_pm_metrics { * or removed from a GPU slot. * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. - * @lock: spinlock protecting the kbasep_pm_metrics_state structure + * @lock: spinlock protecting the kbasep_pm_metrics_data structure * @platform_data: pointer to data controlled by platform specific code * @kbdev: pointer to kbase device for which metrics are collected * @values: The current values of the power management metrics. The @@ -145,7 +145,7 @@ struct kbasep_pm_metrics { * @initialized: tracks whether metrics_state has been initialized or not. * @timer: timer to regularly make DVFS decisions based on the power * management metrics. - * @timer_state: atomic indicating current @timer state, on, off, or stopped. + * @timer_active: boolean indicating @timer is running * @dvfs_last: values of the PM metrics from the last DVFS tick * @dvfs_diff: different between the current and previous PM metrics. */ @@ -169,7 +169,7 @@ struct kbasep_pm_metrics_state { #ifdef CONFIG_MALI_MIDGARD_DVFS bool initialized; struct hrtimer timer; - atomic_t timer_state; + bool timer_active; struct kbasep_pm_metrics dvfs_last; struct kbasep_pm_metrics dvfs_diff; #endif @@ -572,7 +572,7 @@ struct kbase_pm_backend_data { }; #if MALI_USE_CSF -/* CSF PM flag, signaling that the MCU shader Core should be kept on */ +/* CSF PM flag, signaling that the MCU CORE should be kept on */ #define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) /* CSF PM flag, signaling no scheduler suspension on idle groups */ #define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index aab07c9..52e228c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -656,38 +656,6 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); } - -/** - * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured - * - * @kbdev: Pointer to the device. - * - * This function is called to wait for the AS used by MCU FW to get configured - * before DB notification on MCU is enabled, as a workaround for HW issue. - */ -static void wait_mcu_as_inactive(struct kbase_device *kbdev) -{ - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) - return; - - /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ - while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & - AS_STATUS_AS_ACTIVE_INT) - ; - - if (!WARN_ON_ONCE(max_loops == 0)) - return; - - dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR); - - if (kbase_prepare_to_reset_gpu(kbdev, 0)) - kbase_reset_gpu(kbdev); -} #endif /** @@ -697,10 +665,10 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) * @kbdev: Pointer to the device * @enable: boolean indicating to enable interrupts or not * - * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on - * when FW is controlling the power for the shader cores. Correspondingly, the - * interrupts can be re-enabled after the MCU has been disabled before the - * power down of L2. + * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled + * after L2 has been turned on when FW is controlling the power for the shader + * cores. Correspondingly, the interrupts can be re-enabled after the MCU has + * been disabled before the power down of L2. */ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable) { @@ -711,15 +679,15 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS + (void)enable; /* For IFPO, we require the POWER_CHANGED_ALL interrupt to be always on */ - enable = true; -#endif - if (enable) { - irq_mask |= POWER_CHANGED_ALL; - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); - } else { - irq_mask &= ~POWER_CHANGED_ALL; - } + irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; +#else + if (enable) + irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; + else + irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE); +#endif /* CONFIG_MALI_HOST_CONTROLS_SC_RAILS */ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); } @@ -953,7 +921,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_HALT: if (kbase_csf_firmware_mcu_halted(kbdev)) { - KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, + KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); if (kbdev->csf.firmware_hctl_core_pwr) backend->mcu_state = @@ -1000,7 +968,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_SLEEP: if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { - KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, + KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); backend->mcu_state = KBASE_MCU_IN_SLEEP; kbase_pm_enable_db_mirror_interrupt(kbdev); @@ -1016,7 +984,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_IN_SLEEP: if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { - wait_mcu_as_inactive(kbdev); KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_enable_mcu_db_notification(kbdev); @@ -1027,7 +994,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, false); backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; - kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } break; #endif @@ -1154,24 +1120,13 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #endif } -static bool need_tiler_control(struct kbase_device *kbdev) -{ -#if MALI_USE_CSF - if (kbase_pm_no_mcu_core_pwroff(kbdev)) - return true; - else - return false; -#else - return true; -#endif -} - static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; u64 l2_present = kbdev->gpu_props.curr_config.l2_present; +#if !MALI_USE_CSF u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; - bool l2_power_up_done; +#endif enum kbase_l2_core_state prev_state; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1182,18 +1137,24 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - u64 tiler_trans = kbase_pm_get_trans_cores( - kbdev, KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_TILER); + +#if !MALI_USE_CSF + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER); +#endif /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ if (kbase_is_gpu_removed(kbdev) +#ifdef CONFIG_MALI_ARBITER_SUPPORT || kbase_pm_is_gpu_lost(kbdev)) { +#else + ) { +#endif backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; @@ -1216,45 +1177,32 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) } break; } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* mask off ready from trans in case transitions finished * between the register reads */ l2_trans &= ~l2_ready; - +#if !MALI_USE_CSF + tiler_trans &= ~tiler_ready; +#endif prev_state = backend->l2_state; switch (backend->l2_state) { case KBASE_L2_OFF: if (kbase_pm_is_l2_desired(kbdev)) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Enable HW timer of IPA control before - * L2 cache is powered-up. - */ - kbase_ipa_control_handle_gpu_sleep_exit(kbdev); -#endif /* * Set the desired config for L2 before * powering it on */ kbase_pm_l2_config_override(kbdev); kbase_pbha_write_settings(kbdev); - - /* If Host is controlling the power for shader - * cores, then it also needs to control the - * power for Tiler. - * Powering on the tiler will also power the - * L2 cache. - */ - if (need_tiler_control(kbdev)) { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, - ACTION_PWRON); - } else { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, - ACTION_PWRON); - } #if !MALI_USE_CSF + /* L2 is required, power on. Powering on the + * tiler will also power the first L2 cache. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); + /* If we have more than one L2 cache then we * must power them on explicitly. */ @@ -1264,36 +1212,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) ACTION_PWRON); /* Clear backend slot submission kctx */ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); +#else + /* With CSF firmware, Host driver doesn't need to + * handle power management with both shader and tiler cores. + * The CSF firmware will power up the cores appropriately. + * So only power the l2 cache explicitly. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present, ACTION_PWRON); #endif backend->l2_state = KBASE_L2_PEND_ON; } break; case KBASE_L2_PEND_ON: - l2_power_up_done = false; +#if !MALI_USE_CSF + if (!l2_trans && l2_ready == l2_present && !tiler_trans + && tiler_ready == tiler_present) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, + tiler_ready); +#else if (!l2_trans && l2_ready == l2_present) { - if (need_tiler_control(kbdev)) { -#ifndef CONFIG_MALI_ARBITER_SUPPORT - u64 tiler_trans = kbase_pm_get_trans_cores( - kbdev, KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_TILER); -#endif - - tiler_trans &= ~tiler_ready; - if (!tiler_trans && tiler_ready == tiler_present) { - KBASE_KTRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, tiler_ready); - l2_power_up_done = true; - } - } else { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, - l2_ready); - l2_power_up_done = true; - } - } - if (l2_power_up_done) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, + l2_ready); +#endif /* * Ensure snoops are enabled after L2 is powered * up. Note that kbase keeps track of the snoop @@ -1489,26 +1431,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) /* We only need to check the L2 here - if the L2 * is off then the tiler is definitely also off. */ - if (!l2_trans && !l2_ready) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Allow clock gating within the GPU and prevent it - * from being seen as active during sleep. - */ - kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -#endif + if (!l2_trans && !l2_ready) /* L2 is now powered off */ backend->l2_state = KBASE_L2_OFF; - } } else { - if (!kbdev->cache_clean_in_progress) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Allow clock gating within the GPU and prevent it - * from being seen as active during sleep. - */ - kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -#endif + if (!kbdev->cache_clean_in_progress) backend->l2_state = KBASE_L2_OFF; - } } break; @@ -2365,14 +2293,12 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) /* Wait for cores */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state_with_l2_powered(kbdev), - timeout); + remaining = wait_event_killable_timeout( #else remaining = wait_event_timeout( +#endif kbdev->pm.backend.gpu_in_desired_state_wait, kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); -#endif if (!remaining) { kbase_pm_timed_out(kbdev); @@ -2427,66 +2353,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); -#if MALI_USE_CSF -/** - * core_mask_update_done - Check if downscaling of shader cores is done - * - * @kbdev: The kbase device structure for the device. - * - * This function checks if the downscaling of cores is effectively complete. - * - * Return: true if the downscale is done. - */ -static bool core_mask_update_done(struct kbase_device *kbdev) -{ - bool update_done = false; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - /* If MCU is in stable ON state then it implies that the downscale - * request had completed. - * If MCU is not active then it implies all cores are off, so can - * consider the downscale request as complete. - */ - if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) || - kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) - update_done = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return update_done; -} - -int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) -{ - long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); - long remaining; - int err = 0; - - /* Wait for core mask update to complete */ -#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - core_mask_update_done(kbdev), timeout); -#else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - core_mask_update_done(kbdev), timeout); -#endif - - if (!remaining) { - kbase_pm_timed_out(kbdev); - err = -ETIMEDOUT; - } else if (remaining < 0) { - dev_info( - kbdev->dev, - "Wait for cores down scaling got interrupted"); - err = (int)remaining; - } - - return err; -} -#endif - void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2550,21 +2416,14 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->pm.lock); mutex_lock(&kbdev->csf.reg_lock); - - /* Only if the mappings for USER page exist, update all PTEs associated to it */ - if (kbdev->csf.nr_user_page_mapped > 0) { - if (likely(kbdev->csf.mali_file_inode)) { - /* This would zap the pte corresponding to the mapping of User - * register page for all the Kbase contexts. - */ - unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, - BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1); - } else { - dev_err(kbdev->dev, - "Device file inode not exist even if USER page previously mapped"); - } + if (kbdev->csf.mali_file_inode) { + /* This would zap the pte corresponding to the mapping of User + * register page for all the Kbase contexts. + */ + unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, + BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, + PAGE_SIZE, 1); } - mutex_unlock(&kbdev->csf.reg_lock); } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index cd5a6a3..68ded7d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -269,37 +269,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); */ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); -#if MALI_USE_CSF -/** - * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * This function can be called to ensure that the downscaling of cores is - * effectively complete and it would be safe to lower the voltage. - * The function assumes that caller had exercised the MCU state machine for the - * downscale request through the kbase_pm_update_state() function. - * - * This function needs to be used by the caller to safely wait for the completion - * of downscale request, instead of kbase_pm_wait_for_desired_state(). - * The downscale request would trigger a state change in MCU state machine - * and so when MCU reaches the stable ON state, it can be inferred that - * downscaling is complete. But it has been observed that the wake up of the - * waiting thread can get delayed by few milli seconds and by the time the - * thread wakes up the power down transition could have started (after the - * completion of downscale request). - * On the completion of power down transition another wake up signal would be - * sent, but again by the time thread wakes up the power up transition can begin. - * And the power up transition could then get blocked inside the platform specific - * callback_power_on() function due to the thread that called into Kbase (from the - * platform specific code) to perform the downscaling and then ended up waiting - * for the completion of downscale request. - * - * Return: 0 on success, error code on error or remaining jiffies on timeout. - */ -int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev); -#endif - /** * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state * machines after changing shader core @@ -831,7 +800,7 @@ bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) /** * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the - * MCU shader Core powered in accordance to the active + * MCU core powered in accordance to the active * power management policy * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index 2df6804..f85b466 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,6 @@ */ #include <mali_kbase.h> -#include <mali_kbase_config_defaults.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> @@ -49,51 +48,27 @@ #define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) #endif -/* - * Possible state transitions - * ON -> ON | OFF | STOPPED - * STOPPED -> ON | OFF - * OFF -> ON - * - * - * ┌─e─┐┌────────────f─────────────┐ - * │ v│ v - * └───ON ──a──> STOPPED ──b──> OFF - * ^^ │ │ - * │└──────c─────┘ │ - * │ │ - * └─────────────d─────────────┘ - * - * Transition effects: - * a. None - * b. Timer expires without restart - * c. Timer is not stopped, timer period is unaffected - * d. Timer must be restarted - * e. Callback is executed and the timer is restarted - * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during - * tear-down and should not be subject to a race from an OFF->ON transition - */ -enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON }; - #ifdef CONFIG_MALI_MIDGARD_DVFS static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) { + unsigned long flags; struct kbasep_pm_metrics_state *metrics; - if (WARN_ON(!timer)) - return HRTIMER_NORESTART; + KBASE_DEBUG_ASSERT(timer != NULL); metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); + kbase_pm_get_dvfs_action(metrics->kbdev); - /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */ - if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON) - return HRTIMER_NORESTART; + spin_lock_irqsave(&metrics->lock, flags); - kbase_pm_get_dvfs_action(metrics->kbdev); + if (metrics->timer_active) + hrtimer_start(timer, + HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&metrics->lock, flags); - /* Set the new expiration time and restart (transition e) */ - hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period)); - return HRTIMER_RESTART; + return HRTIMER_NORESTART; } #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -108,7 +83,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); + kbdev->pm.backend.metrics.time_period_start = ktime_get(); kbdev->pm.backend.metrics.values.time_busy = 0; kbdev->pm.backend.metrics.values.time_idle = 0; kbdev->pm.backend.metrics.values.time_in_protm = 0; @@ -136,7 +111,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) #else KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); + kbdev->pm.backend.metrics.time_period_start = ktime_get(); kbdev->pm.backend.metrics.gpu_active = false; kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; @@ -159,7 +134,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; kbdev->pm.backend.metrics.initialized = true; - atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); kbase_pm_metrics_start(kbdev); #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -178,12 +152,16 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); void kbasep_pm_metrics_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_MIDGARD_DVFS + unsigned long flags; + KBASE_DEBUG_ASSERT(kbdev != NULL); - /* Cancel the timer, and block if the callback is currently executing (transition f) */ - kbdev->pm.backend.metrics.initialized = false; - atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + kbdev->pm.backend.metrics.initialized = false; #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if MALI_USE_CSF @@ -221,7 +199,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) * elapsed time. The lock taken inside kbase_ipa_control_query() * function can cause lot of variation. */ - now = ktime_get_raw(); + now = ktime_get(); if (err) { dev_err(kbdev->dev, @@ -253,14 +231,12 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) * time. */ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { - /* The margin is scaled to allow for the worst-case - * scenario where the samples are maximally separated, - * plus a small offset for sampling errors. + /* Use a margin value that is approximately 1% of the time + * difference. */ - u64 const MARGIN_NS = - IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2; + u64 margin_ns = diff_ns >> 6; - if (gpu_active_counter > (diff_ns + MARGIN_NS)) { + if (gpu_active_counter > (diff_ns + margin_ns)) { dev_info( kbdev->dev, "GPU activity takes longer than time interval: %llu ns > %llu ns", @@ -355,7 +331,7 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, #if MALI_USE_CSF kbase_pm_get_dvfs_utilisation_calc(kbdev); #else - kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw()); + kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); #endif memset(diff, 0, sizeof(*diff)); @@ -420,33 +396,57 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) { + bool isactive; + unsigned long flags; + KBASE_DEBUG_ASSERT(kbdev != NULL); - return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON; + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + isactive = kbdev->pm.backend.metrics.timer_active; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + return isactive; } KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); void kbase_pm_metrics_start(struct kbase_device *kbdev) { - struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics; + unsigned long flags; + bool update = true; - if (unlikely(!metrics->initialized)) + if (unlikely(!kbdev->pm.backend.metrics.initialized)) return; - /* Transition to ON, from a stopped state (transition c) */ - if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF) - /* Start the timer only if it's been fully stopped (transition d)*/ - hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + if (!kbdev->pm.backend.metrics.timer_active) + kbdev->pm.backend.metrics.timer_active = true; + else + update = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + if (update) + hrtimer_start(&kbdev->pm.backend.metrics.timer, + HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); } void kbase_pm_metrics_stop(struct kbase_device *kbdev) { + unsigned long flags; + bool update = true; + if (unlikely(!kbdev->pm.backend.metrics.initialized)) return; - /* Timer is Stopped if its currently on (transition a) */ - atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + if (kbdev->pm.backend.metrics.timer_active) + kbdev->pm.backend.metrics.timer_active = false; + else + update = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + if (update) + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); } @@ -512,7 +512,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); if (!timestamp) { - now = ktime_get_raw(); + now = ktime_get(); timestamp = &now; } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index deeb1b5..5f16434 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -310,7 +310,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.backend.policy_change_lock); if (kbase_reset_gpu_prevent_and_wait(kbdev)) { - dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset"); + dev_warn(kbdev->dev, "Set PM policy failed to prevent gpu reset"); reset_op_prevented = false; } @@ -332,7 +332,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON * flag bit. */ - sched_suspend = reset_op_prevented && + sched_suspend = kbdev->csf.firmware_inited && reset_op_prevented && (CSF_DYNAMIC_PM_CORE_KEEP_ON & (new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags)); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 5110e3d..a83206a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,9 +21,6 @@ #include <mali_kbase.h> #include <mali_kbase_hwaccess_time.h> -#if MALI_USE_CSF -#include <csf/mali_kbase_csf_timeout.h> -#endif #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> @@ -116,17 +113,13 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, */ u64 timeout, nr_cycles = 0; - u64 freq_khz; - + /* Default value to mean 'no cap' */ + u64 timeout_cap = U64_MAX; + u64 freq_khz = kbdev->lowest_gpu_freq_khz; /* Only for debug messages, safe default in case it's mis-maintained */ const char *selector_str = "(unknown)"; - if (WARN(!kbdev->lowest_gpu_freq_khz, - "Lowest frequency uninitialized! Using reference frequency for scaling")) { - freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - } else { - freq_khz = kbdev->lowest_gpu_freq_khz; - } + WARN_ON(!freq_khz); switch (selector) { case KBASE_TIMEOUT_SELECTOR_COUNT: @@ -142,15 +135,16 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, fallthrough; case CSF_FIRMWARE_TIMEOUT: selector_str = "CSF_FIRMWARE_TIMEOUT"; - /* Any FW timeout cannot be longer than the FW ping interval, after which - * the firmware_aliveness_monitor will be triggered and may restart - * the GPU if the FW is unresponsive. + nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES; + /* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS, + * if calculated timeout exceeds it. This should be adapted to + * a direct timeout comparison once the + * FIRMWARE_PING_INTERVAL_MS option is added to this timeout + * function. A compile-time check such as BUILD_BUG_ON can also + * be done once the firmware ping interval in cycles becomes + * available as a macro. */ - nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); - - if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) - dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", - selector_str); + timeout_cap = FIRMWARE_PING_INTERVAL_MS; break; case CSF_PM_TIMEOUT: selector_str = "CSF_PM_TIMEOUT"; @@ -160,33 +154,21 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, selector_str = "CSF_GPU_RESET_TIMEOUT"; nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; break; - case CSF_CSG_SUSPEND_TIMEOUT: - selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; - nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_BOOT_TIMEOUT: - selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_PING_TIMEOUT: - selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; - break; - case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: - selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; - nr_cycles = kbase_csf_timeout_get(kbdev); - break; #endif } timeout = div_u64(nr_cycles, freq_khz); + if (timeout > timeout_cap) { + dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str, + (unsigned long long)timeout, (unsigned long long)timeout_cap); + timeout = timeout_cap; + } if (WARN(timeout > UINT_MAX, "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) timeout = UINT_MAX; return (unsigned int)timeout; } -KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) { |