diff options
author | Vamsidhar reddy Gaddam <gvamsi@google.com> | 2023-12-20 12:42:26 +0000 |
---|---|---|
committer | Vamsidhar reddy Gaddam <gvamsi@google.com> | 2024-01-05 09:19:17 +0000 |
commit | 11473542814286e59a89a70c969fb50a25ba921f (patch) | |
tree | bd4aa60e7d3dc895d82a36fcea0026569e3a04aa /mali_kbase/backend | |
parent | 8768eedce66a4373c96f35c8dfb73d4668703180 (diff) | |
parent | 049a542207ed694271316782397b78b2e202086a (diff) | |
download | gpu-11473542814286e59a89a70c969fb50a25ba921f.tar.gz |
Merge branch 'upstream' into HEAD
Update KMD to R47P0
Bug: 315267052
Test: Outlined in go/pixel-gpu-kmd-r47p0
Change-Id: I89454c4c862033fe330b260a9bc6cc777a3ca231
Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
Diffstat (limited to 'mali_kbase/backend')
38 files changed, 2755 insertions, 3109 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_backend_config.h b/mali_kbase/backend/gpu/mali_kbase_backend_config.h deleted file mode 100644 index 6924fdb..0000000 --- a/mali_kbase/backend/gpu/mali_kbase_backend_config.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * Backend specific configuration - */ - -#ifndef _KBASE_BACKEND_CONFIG_H_ -#define _KBASE_BACKEND_CONFIG_H_ - -#endif /* _KBASE_BACKEND_CONFIG_H_ */ - diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c index 86539d5..aa84364 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c @@ -22,59 +22,43 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" #include <device/mali_kbase_device.h> -/** - * kbasep_amba_register_present() - Check AMBA_<> register is present - * in the GPU. - * @kbdev: Device pointer - * - * Note: Only for arch version 12.x.1 onwards. - * - * Return: true if AMBA_FEATURES/ENABLE registers are present. - */ -static bool kbasep_amba_register_present(struct kbase_device *kbdev) +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { - return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >= - GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1)); -} -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode) -{ kbdev->current_gpu_coherency_mode = mode; - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); +#if MALI_USE_CSF + if (kbdev->gpu_props.gpu_id.arch_id >= GPU_ID_ARCH_MAKE(12, 0, 1)) { + /* AMBA_ENABLE present from 12.0.1 */ + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE)); val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); - kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); - } else - kbase_reg_write(kbdev, GPU_CONTROL_REG(COHERENCY_ENABLE), mode); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE), val); + } else { + /* Fallback to COHERENCY_ENABLE for older versions */ + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(COHERENCY_ENABLE), mode); + } +#else /* MALI_USE_CSF */ + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(COHERENCY_ENABLE), mode); +#endif /* MALI_USE_CSF */ } -u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) +void kbase_amba_set_shareable_cache_support(struct kbase_device *kbdev) { - u32 coherency_features; - - if (kbasep_amba_register_present(kbdev)) - coherency_features = - kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES)); - else - coherency_features = kbase_reg_read( - kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); - - return coherency_features; -} +#if MALI_USE_CSF -void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, - bool enable) -{ - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); + /* AMBA registers only present from 12.0.1 */ + if (kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(12, 0, 1)) + return; - val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); - kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); + if (kbdev->system_coherency != COHERENCY_NONE) { + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_FEATURES)); - } else { - WARN(1, "memory_cache_support not supported"); + if (AMBA_FEATURES_SHAREABLE_CACHE_SUPPORT_GET(val)) { + val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE)); + val = AMBA_ENABLE_SHAREABLE_CACHE_SUPPORT_SET(val, 1); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(AMBA_ENABLE), val); + } } +#endif /* MALI_USE_CSF */ } diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 0103695..7317f14 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -22,8 +22,9 @@ #ifndef _KBASE_CACHE_POLICY_BACKEND_H_ #define _KBASE_CACHE_POLICY_BACKEND_H_ -#include "mali_kbase.h" -#include <uapi/gpu/arm/midgard/mali_base_kernel.h> +#include <linux/types.h> + +struct kbase_device; /** * kbase_cache_set_coherency_mode() - Sets the system coherency mode @@ -31,26 +32,14 @@ * @kbdev: Device pointer * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE */ -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode); - -/** - * kbase_cache_get_coherency_features() - Get the coherency features - * in the GPU. - * @kbdev: Device pointer - * - * Return: Register value to be returned - */ -u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode); /** - * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support - * in the GPU. + * kbase_amba_set_shareable_cache_support() - Sets AMBA shareable cache support + * in the GPU. * @kbdev: Device pointer - * @enable: true for enable. * * Note: Only for arch version 12.x.1 onwards. */ -void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, - bool enable); +void kbase_amba_set_shareable_cache_support(struct kbase_device *kbdev); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index cca4f74..e47dd44 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -74,8 +74,7 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) return callbacks; } -static int gpu_clk_rate_change_notifier(struct notifier_block *nb, - unsigned long event, void *data) +static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { struct kbase_gpu_clk_notifier_data *ndata = data; struct kbase_clk_data *clk_data = @@ -88,10 +87,9 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb, spin_lock_irqsave(&clk_rtm->lock, flags); if (event == POST_RATE_CHANGE) { - if (!clk_rtm->gpu_idle && - (clk_data->clock_val != ndata->new_rate)) { - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, ndata->new_rate); + if (!clk_rtm->gpu_idle && (clk_data->clock_val != ndata->new_rate)) { + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, + ndata->new_rate); } clk_data->clock_val = ndata->new_rate; @@ -101,8 +99,7 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb, return NOTIFY_DONE; } -static int gpu_clk_data_init(struct kbase_device *kbdev, - void *gpu_clk_handle, unsigned int index) +static int gpu_clk_data_init(struct kbase_device *kbdev, void *gpu_clk_handle, unsigned int index) { struct kbase_clk_rate_trace_op_conf *callbacks; struct kbase_clk_data *clk_data; @@ -111,44 +108,42 @@ static int gpu_clk_data_init(struct kbase_device *kbdev, callbacks = get_clk_rate_trace_callbacks(kbdev); - if (WARN_ON(!callbacks) || - WARN_ON(!gpu_clk_handle) || + if (WARN_ON(!callbacks) || WARN_ON(!gpu_clk_handle) || WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) return -EINVAL; clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); if (!clk_data) { - dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); + dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", + index); return -ENOMEM; } clk_data->index = (u8)index; clk_data->gpu_clk_handle = gpu_clk_handle; /* Store the initial value of clock */ - clk_data->clock_val = - callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); + clk_data->clock_val = callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); { /* At the initialization time, GPU is powered off. */ unsigned long flags; spin_lock_irqsave(&clk_rtm->lock, flags); - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, 0); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, 0); spin_unlock_irqrestore(&clk_rtm->lock, flags); } clk_data->clk_rtm = clk_rtm; clk_rtm->clks[index] = clk_data; - clk_data->clk_rate_change_nb.notifier_call = - gpu_clk_rate_change_notifier; + clk_data->clk_rate_change_nb.notifier_call = gpu_clk_rate_change_notifier; if (callbacks->gpu_clk_notifier_register) - ret = callbacks->gpu_clk_notifier_register(kbdev, - gpu_clk_handle, &clk_data->clk_rate_change_nb); + ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle, + &clk_data->clk_rate_change_nb); if (ret) { - dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); + dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", + index); kfree(clk_data); } @@ -176,8 +171,7 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) clk_rtm->gpu_idle = true; for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { - void *gpu_clk_handle = - callbacks->enumerate_gpu_clk(kbdev, i); + void *gpu_clk_handle = callbacks->enumerate_gpu_clk(kbdev, i); if (!gpu_clk_handle) break; @@ -202,8 +196,8 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) error: while (i--) { clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( - kbdev, clk_rtm->clks[i]->gpu_clk_handle, - &clk_rtm->clks[i]->clk_rate_change_nb); + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); kfree(clk_rtm->clks[i]); } @@ -225,9 +219,9 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) break; if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) - clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister - (kbdev, clk_rtm->clks[i]->gpu_clk_handle, - &clk_rtm->clks[i]->clk_rate_change_nb); + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); kfree(clk_rtm->clks[i]); } @@ -254,8 +248,8 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) if (unlikely(!clk_data->clock_val)) continue; - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, clk_data->clock_val); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, + clk_data->clock_val); } clk_rtm->gpu_idle = false; @@ -282,18 +276,15 @@ void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) if (unlikely(!clk_data->clock_val)) continue; - kbase_clk_rate_trace_manager_notify_all( - clk_rtm, clk_data->index, 0); + kbase_clk_rate_trace_manager_notify_all(clk_rtm, clk_data->index, 0); } clk_rtm->gpu_idle = true; spin_unlock_irqrestore(&clk_rtm->lock, flags); } -void kbase_clk_rate_trace_manager_notify_all( - struct kbase_clk_rate_trace_manager *clk_rtm, - u32 clk_index, - unsigned long new_rate) +void kbase_clk_rate_trace_manager_notify_all(struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clk_index, unsigned long new_rate) { struct kbase_clk_rate_listener *pos; struct kbase_device *kbdev; @@ -302,8 +293,8 @@ void kbase_clk_rate_trace_manager_notify_all( kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); - dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", - __func__, clk_index, new_rate, current->pid); + dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", __func__, clk_index, + new_rate, current->pid); /* Raise standard `power/gpu_frequency` ftrace event */ { diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h index 35b3b8d..81a1e15 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -90,9 +90,9 @@ void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); * * kbase_clk_rate_trace_manager:lock must be held by the caller. */ -static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( - struct kbase_clk_rate_trace_manager *clk_rtm, - struct kbase_clk_rate_listener *listener) +static inline void +kbase_clk_rate_trace_manager_subscribe_no_lock(struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) { lockdep_assert_held(&clk_rtm->lock); list_add(&listener->node, &clk_rtm->listeners); @@ -104,15 +104,14 @@ static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( * @clk_rtm: Clock rate manager instance. * @listener: Listener handle */ -static inline void kbase_clk_rate_trace_manager_subscribe( - struct kbase_clk_rate_trace_manager *clk_rtm, - struct kbase_clk_rate_listener *listener) +static inline void +kbase_clk_rate_trace_manager_subscribe(struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) { unsigned long flags; spin_lock_irqsave(&clk_rtm->lock, flags); - kbase_clk_rate_trace_manager_subscribe_no_lock( - clk_rtm, listener); + kbase_clk_rate_trace_manager_subscribe_no_lock(clk_rtm, listener); spin_unlock_irqrestore(&clk_rtm->lock, flags); } @@ -122,9 +121,9 @@ static inline void kbase_clk_rate_trace_manager_subscribe( * @clk_rtm: Clock rate manager instance. * @listener: Listener handle */ -static inline void kbase_clk_rate_trace_manager_unsubscribe( - struct kbase_clk_rate_trace_manager *clk_rtm, - struct kbase_clk_rate_listener *listener) +static inline void +kbase_clk_rate_trace_manager_unsubscribe(struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) { unsigned long flags; @@ -145,10 +144,7 @@ static inline void kbase_clk_rate_trace_manager_unsubscribe( * This function is exported to be used by clock rate trace test * portal. */ -void kbase_clk_rate_trace_manager_notify_all( - struct kbase_clk_rate_trace_manager *clk_rtm, - u32 clock_index, - unsigned long new_rate); +void kbase_clk_rate_trace_manager_notify_all(struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clock_index, unsigned long new_rate); #endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ - diff --git a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c index cd3b29d..af8d1e3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -21,71 +21,51 @@ #include <mali_kbase.h> #include <device/mali_kbase_device.h> +#include <hw_access/mali_kbase_hw_access.h> #include "mali_kbase_debug_job_fault.h" #if IS_ENABLED(CONFIG_DEBUG_FS) /*GPU_CONTROL_REG(r)*/ -static int gpu_control_reg_snapshot[] = { - GPU_ID, - SHADER_READY_LO, - SHADER_READY_HI, - TILER_READY_LO, - TILER_READY_HI, - L2_READY_LO, - L2_READY_HI -}; +static unsigned int gpu_control_reg_snapshot[] = { GPU_CONTROL_ENUM(GPU_ID), + GPU_CONTROL_ENUM(SHADER_READY), + GPU_CONTROL_ENUM(TILER_READY), + GPU_CONTROL_ENUM(L2_READY) }; /* JOB_CONTROL_REG(r) */ -static int job_control_reg_snapshot[] = { - JOB_IRQ_MASK, - JOB_IRQ_STATUS -}; +static unsigned int job_control_reg_snapshot[] = { JOB_CONTROL_ENUM(JOB_IRQ_MASK), + JOB_CONTROL_ENUM(JOB_IRQ_STATUS) }; /* JOB_SLOT_REG(n,r) */ -static int job_slot_reg_snapshot[] = { - JS_HEAD_LO, - JS_HEAD_HI, - JS_TAIL_LO, - JS_TAIL_HI, - JS_AFFINITY_LO, - JS_AFFINITY_HI, - JS_CONFIG, - JS_STATUS, - JS_HEAD_NEXT_LO, - JS_HEAD_NEXT_HI, - JS_AFFINITY_NEXT_LO, - JS_AFFINITY_NEXT_HI, - JS_CONFIG_NEXT +static unsigned int job_slot_reg_snapshot[] = { + JOB_SLOT_ENUM(0, HEAD) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, TAIL) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, AFFINITY) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, CONFIG) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, STATUS) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, HEAD_NEXT) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, AFFINITY_NEXT) - JOB_SLOT_BASE_ENUM(0), + JOB_SLOT_ENUM(0, CONFIG_NEXT) - JOB_SLOT_BASE_ENUM(0) }; /*MMU_CONTROL_REG(r)*/ -static int mmu_reg_snapshot[] = { - MMU_IRQ_MASK, - MMU_IRQ_STATUS -}; +static unsigned int mmu_reg_snapshot[] = { MMU_CONTROL_ENUM(IRQ_MASK), + MMU_CONTROL_ENUM(IRQ_STATUS) }; /* MMU_AS_REG(n,r) */ -static int as_reg_snapshot[] = { - AS_TRANSTAB_LO, - AS_TRANSTAB_HI, - AS_TRANSCFG_LO, - AS_TRANSCFG_HI, - AS_MEMATTR_LO, - AS_MEMATTR_HI, - AS_FAULTSTATUS, - AS_FAULTADDRESS_LO, - AS_FAULTADDRESS_HI, - AS_STATUS -}; - -bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, - int reg_range) +static unsigned int as_reg_snapshot[] = { MMU_AS_ENUM(0, TRANSTAB) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, TRANSCFG) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, MEMATTR) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, FAULTSTATUS) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, FAULTADDRESS) - MMU_AS_BASE_ENUM(0), + MMU_AS_ENUM(0, STATUS) - MMU_AS_BASE_ENUM(0) }; + +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, int reg_range) { - int i, j; + uint i, j; int offset = 0; - int slot_number; - int as_number; + uint slot_number; + uint as_number; if (kctx->reg_dump == NULL) return false; @@ -94,50 +74,61 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, as_number = kctx->kbdev->gpu_props.num_address_spaces; /* get the GPU control registers*/ - for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(gpu_control_reg_snapshot); i++) { + kctx->reg_dump[offset] = gpu_control_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } /* get the Job control registers*/ - for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - JOB_CONTROL_REG(job_control_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(job_control_reg_snapshot); i++) { + kctx->reg_dump[offset] = job_control_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } /* get the Job Slot registers*/ - for (j = 0; j < slot_number; j++) { - for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); - offset += 2; + for (j = 0; j < slot_number; j++) { + for (i = 0; i < ARRAY_SIZE(job_slot_reg_snapshot); i++) { + kctx->reg_dump[offset] = JOB_SLOT_BASE_OFFSET(j) + job_slot_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } } /* get the MMU registers*/ - for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_CONTROL_REG(mmu_reg_snapshot[i]); - offset += 2; + for (i = 0; i < ARRAY_SIZE(mmu_reg_snapshot); i++) { + kctx->reg_dump[offset] = mmu_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } /* get the Address space registers*/ for (j = 0; j < as_number; j++) { - for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_STAGE1_REG(MMU_AS_REG(j, as_reg_snapshot[i])); - offset += 2; + for (i = 0; i < ARRAY_SIZE(as_reg_snapshot); i++) { + kctx->reg_dump[offset] = MMU_AS_BASE_OFFSET(j) + as_reg_snapshot[i]; + if (kbase_reg_is_size64(kctx->kbdev, kctx->reg_dump[offset])) + offset += 4; + else + offset += 2; } } - WARN_ON(offset >= (reg_range*2/4)); + WARN_ON(offset >= (reg_range * 2 / 4)); /* set the termination flag*/ kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; - dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", - offset); + dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", offset); return true; } @@ -145,18 +136,32 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) { int offset = 0; + u32 reg_enum; + u64 val64; if (kctx->reg_dump == NULL) return false; while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { - kctx->reg_dump[offset+1] = - kbase_reg_read(kctx->kbdev, - kctx->reg_dump[offset]); - offset += 2; + reg_enum = kctx->reg_dump[offset]; + /* Get register offset from enum */ + kbase_reg_get_offset(kctx->kbdev, reg_enum, &kctx->reg_dump[offset]); + + if (kbase_reg_is_size64(kctx->kbdev, reg_enum)) { + val64 = kbase_reg_read64(kctx->kbdev, reg_enum); + + /* offset computed offset to get _HI offset */ + kctx->reg_dump[offset + 2] = kctx->reg_dump[offset] + 4; + + kctx->reg_dump[offset + 1] = (u32)(val64 & 0xFFFFFFFF); + kctx->reg_dump[offset + 3] = (u32)(val64 >> 32); + offset += 4; + } else { + kctx->reg_dump[offset + 1] = kbase_reg_read32(kctx->kbdev, reg_enum); + offset += 2; + } } return true; } - #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h b/mali_kbase/backend/gpu/mali_kbase_defs.h index 136aa52..bff0b72 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,7 +50,7 @@ struct rb_entry { * u64 for serving as tagged value. * @kctx: Pointer to kbase context. */ -#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx)) +#define SLOT_RB_TAG_KCTX(kctx) ((u64)(uintptr_t)(kctx)) /** * struct slot_rb - Slot ringbuffer * @entries: Ringbuffer entries @@ -109,21 +109,21 @@ struct kbase_backend_data { atomic_t reset_gpu; /* The GPU reset isn't pending */ -#define KBASE_RESET_GPU_NOT_PENDING 0 +#define KBASE_RESET_GPU_NOT_PENDING 0 /* kbase_prepare_to_reset_gpu has been called */ -#define KBASE_RESET_GPU_PREPARED 1 +#define KBASE_RESET_GPU_PREPARED 1 /* kbase_reset_gpu has been called - the reset will now definitely happen * within the timeout period */ -#define KBASE_RESET_GPU_COMMITTED 2 -/* The GPU reset process is currently occuring (timeout has expired or +#define KBASE_RESET_GPU_COMMITTED 2 +/* The GPU reset process is currently occurring (timeout has expired or * kbasep_try_reset_gpu_early was called) */ -#define KBASE_RESET_GPU_HAPPENING 3 +#define KBASE_RESET_GPU_HAPPENING 3 /* Reset the GPU silently, used when resetting the GPU as part of normal * behavior (e.g. when exiting protected mode). */ -#define KBASE_RESET_GPU_SILENT 4 +#define KBASE_RESET_GPU_SILENT 4 struct workqueue_struct *reset_workq; struct work_struct reset_work; wait_queue_head_t reset_wait; diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index a389cd9..905d188 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -73,8 +73,8 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) return voltage; } -void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, - u64 *core_mask, unsigned long *freqs, unsigned long *volts) +void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask, + unsigned long *freqs, unsigned long *volts) { unsigned int i; @@ -84,10 +84,8 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, *core_mask = kbdev->devfreq_table[i].core_mask; for (j = 0; j < kbdev->nr_clocks; j++) { - freqs[j] = - kbdev->devfreq_table[i].real_freqs[j]; - volts[j] = - kbdev->devfreq_table[i].opp_volts[j]; + freqs[j] = kbdev->devfreq_table[i].real_freqs[j]; + volts[j] = kbdev->devfreq_table[i].opp_volts[j]; } break; @@ -100,7 +98,7 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, if (i == kbdev->num_opps) { unsigned long voltage = get_voltage(kbdev, freq); - *core_mask = kbdev->gpu_props.props.raw_props.shader_present; + *core_mask = kbdev->gpu_props.shader_present; for (i = 0; i < kbdev->nr_clocks; i++) { freqs[i] = freq; @@ -109,17 +107,16 @@ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, } } -static int -kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) +static int kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) { struct kbase_device *kbdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; unsigned long nominal_freq; - unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = { 0 }; #if IS_ENABLED(CONFIG_REGULATOR) - unsigned long original_freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned long original_freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = { 0 }; #endif - unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = { 0 }; unsigned int i; u64 core_mask; @@ -148,8 +145,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) return 0; } - kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, - freqs, volts); + kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); #if IS_ENABLED(CONFIG_REGULATOR) /* Regulators and clocks work in pairs: every clock has a regulator, @@ -167,18 +163,16 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) * (even if that happens for a short transition interval). */ for (i = 0; i < kbdev->nr_clocks; i++) { - if (kbdev->regulators[i] && - kbdev->current_voltages[i] != volts[i] && - kbdev->current_freqs[i] < freqs[i]) { + if (kbdev->regulators[i] && kbdev->current_voltages[i] != volts[i] && + kbdev->current_freqs[i] < freqs[i]) { int err; - err = regulator_set_voltage(kbdev->regulators[i], - volts[i], volts[i]); + err = regulator_set_voltage(kbdev->regulators[i], volts[i], volts[i]); if (!err) { kbdev->current_voltages[i] = volts[i]; } else { - dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n", - err, volts[i]); + dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n", err, + volts[i]); return err; } } @@ -196,8 +190,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) #endif kbdev->current_freqs[i] = freqs[i]; } else { - dev_err(dev, "Failed to set clock %lu (target %lu)\n", - freqs[i], *target_freq); + dev_err(dev, "Failed to set clock %lu (target %lu)\n", freqs[i], + *target_freq); return err; } } @@ -207,18 +201,16 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) #if IS_ENABLED(CONFIG_REGULATOR) for (i = 0; i < kbdev->nr_clocks; i++) { - if (kbdev->regulators[i] && - kbdev->current_voltages[i] != volts[i] && - original_freqs[i] > freqs[i]) { + if (kbdev->regulators[i] && kbdev->current_voltages[i] != volts[i] && + original_freqs[i] > freqs[i]) { int err; - err = regulator_set_voltage(kbdev->regulators[i], - volts[i], volts[i]); + err = regulator_set_voltage(kbdev->regulators[i], volts[i], volts[i]); if (!err) { kbdev->current_voltages[i] = volts[i]; } else { - dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n", - err, volts[i]); + dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n", err, + volts[i]); return err; } } @@ -241,8 +233,7 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq) kbase_devfreq_target(kbdev->dev, &target_freq, 0); } -static int -kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +static int kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) { struct kbase_device *kbdev = dev_get_drvdata(dev); @@ -251,8 +242,7 @@ kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) return 0; } -static int -kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +static int kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) { struct kbase_device *kbdev = dev_get_drvdata(dev); struct kbasep_pm_metrics diff; @@ -271,11 +261,10 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) return 0; } -static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, - struct devfreq_dev_profile *dp) +static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, struct devfreq_dev_profile *dp) { int count; - int i = 0; + unsigned int i = 0; unsigned long freq; struct dev_pm_opp *opp; @@ -289,15 +278,14 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, if (count < 0) return count; - dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), - GFP_KERNEL); + dp->freq_table = kmalloc_array((size_t)count, sizeof(dp->freq_table[0]), GFP_KERNEL); if (!dp->freq_table) return -ENOMEM; #if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_lock(); #endif - for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { + for (i = 0, freq = ULONG_MAX; i < (unsigned int)count; i++, freq--) { opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); if (IS_ERR(opp)) break; @@ -311,9 +299,8 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, rcu_read_unlock(); #endif - if (count != i) - dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", - count, i); + if ((unsigned int)count != i) + dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%u\n", count, i); dp->max_state = i; @@ -357,8 +344,7 @@ static void kbase_devfreq_exit(struct device *dev) kbase_devfreq_term_freq_table(kbdev); } -static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, - struct device_node *node) +static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, struct device_node *node) { u64 freq = 0; int err = 0; @@ -387,8 +373,7 @@ static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, return; kbdev->pm.backend.gpu_clock_suspend_freq = freq; - dev_info(kbdev->dev, - "suspend clock %llu by opp-mali-errata-1485982", freq); + dev_info(kbdev->dev, "suspend clock %llu by opp-mali-errata-1485982", freq); } static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) @@ -403,12 +388,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) */ return 0; #else - struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, - "operating-points-v2", 0); + struct device_node *opp_node = + of_parse_phandle(kbdev->dev->of_node, "operating-points-v2", 0); struct device_node *node; - int i = 0; + unsigned int i = 0; int count; - u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; + u64 shader_present = kbdev->gpu_props.shader_present; if (!opp_node) return 0; @@ -416,15 +401,14 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) return 0; count = dev_pm_opp_get_opp_count(kbdev->dev); - kbdev->devfreq_table = kmalloc_array(count, - sizeof(struct kbase_devfreq_opp), GFP_KERNEL); + kbdev->devfreq_table = + kmalloc_array((size_t)count, sizeof(struct kbase_devfreq_opp), GFP_KERNEL); if (!kbdev->devfreq_table) return -ENOMEM; for_each_available_child_of_node(opp_node, node) { const void *core_count_p; - u64 core_mask, opp_freq, - real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 core_mask, opp_freq, real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; int err; #if IS_ENABLED(CONFIG_REGULATOR) u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; @@ -436,30 +420,27 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u64(node, "opp-hz", &opp_freq); if (err) { - dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", - err); + dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", err); continue; } - #if BASE_MAX_NR_CLOCKS_REGULATORS > 1 - err = of_property_read_u64_array(node, "opp-hz-real", - real_freqs, kbdev->nr_clocks); + err = of_property_read_u64_array(node, "opp-hz-real", real_freqs, kbdev->nr_clocks); #else WARN_ON(kbdev->nr_clocks != 1); err = of_property_read_u64(node, "opp-hz-real", real_freqs); #endif if (err < 0) { dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", - err); + err); continue; } #if IS_ENABLED(CONFIG_REGULATOR) - err = of_property_read_u32_array(node, - "opp-microvolt", opp_volts, kbdev->nr_regulators); + err = of_property_read_u32_array(node, "opp-microvolt", opp_volts, + kbdev->nr_regulators); if (err < 0) { - dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n", - err); + dev_warn(kbdev->dev, + "Failed to read opp-microvolt property with error %d\n", err); continue; } #endif @@ -467,16 +448,16 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) if (of_property_read_u64(node, "opp-core-mask", &core_mask)) core_mask = shader_present; if (core_mask != shader_present && corestack_driver_control) { - - dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", - opp_freq); + dev_warn( + kbdev->dev, + "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + opp_freq); continue; } core_count_p = of_get_property(node, "opp-core-count", NULL); if (core_count_p) { - u64 remaining_core_mask = - kbdev->gpu_props.props.raw_props.shader_present; + u64 remaining_core_mask = kbdev->gpu_props.shader_present; int core_count = be32_to_cpup(core_count_p); core_mask = 0; @@ -489,8 +470,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) return -ENODEV; } - core_mask |= (1ull << (core-1)); - remaining_core_mask &= ~(1ull << (core-1)); + core_mask |= (1ull << (core - 1)); + remaining_core_mask &= ~(1ull << (core - 1)); } } @@ -502,24 +483,22 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) kbdev->devfreq_table[i].opp_freq = opp_freq; kbdev->devfreq_table[i].core_mask = core_mask; if (kbdev->nr_clocks > 0) { - int j; + unsigned int j; for (j = 0; j < kbdev->nr_clocks; j++) - kbdev->devfreq_table[i].real_freqs[j] = - real_freqs[j]; + kbdev->devfreq_table[i].real_freqs[j] = real_freqs[j]; } #if IS_ENABLED(CONFIG_REGULATOR) if (kbdev->nr_regulators > 0) { - int j; + unsigned int j; for (j = 0; j < kbdev->nr_regulators; j++) - kbdev->devfreq_table[i].opp_volts[j] = - opp_volts[j]; + kbdev->devfreq_table[i].opp_volts[j] = opp_volts[j]; } #endif - dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", - i, opp_freq, core_mask); + dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", i, opp_freq, + core_mask); i++; } @@ -552,10 +531,9 @@ static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) { - struct kbase_devfreq_queue_info *info = container_of(work, - struct kbase_devfreq_queue_info, work); - struct kbase_device *kbdev = container_of(info, struct kbase_device, - devfreq_queue); + struct kbase_devfreq_queue_info *info = + container_of(work, struct kbase_devfreq_queue_info, work); + struct kbase_device *kbdev = container_of(info, struct kbase_device, devfreq_queue); unsigned long flags; enum kbase_devfreq_work_type type, acted_type; @@ -565,8 +543,7 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) acted_type = kbdev->devfreq_queue.acted_type; dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n", - kbase_devfreq_req_type_name(type), - kbase_devfreq_req_type_name(acted_type)); + kbase_devfreq_req_type_name(type), kbase_devfreq_req_type_name(acted_type)); switch (type) { case DEVFREQ_WORK_SUSPEND: case DEVFREQ_WORK_RESUME: @@ -586,8 +563,7 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) } } -void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, - enum kbase_devfreq_work_type work_type) +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type) { unsigned long flags; @@ -596,12 +572,10 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, /* Skip enqueuing a work if workqueue has already been terminated. */ if (likely(kbdev->devfreq_queue.workq)) { kbdev->devfreq_queue.req_type = work_type; - queue_work(kbdev->devfreq_queue.workq, - &kbdev->devfreq_queue.work); + queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", - kbase_devfreq_req_type_name(work_type)); + dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", kbase_devfreq_req_type_name(work_type)); } static int kbase_devfreq_work_init(struct kbase_device *kbdev) @@ -613,8 +587,7 @@ static int kbase_devfreq_work_init(struct kbase_device *kbdev) if (!kbdev->devfreq_queue.workq) return -ENOMEM; - INIT_WORK(&kbdev->devfreq_queue.work, - kbase_devfreq_suspend_resume_worker); + INIT_WORK(&kbdev->devfreq_queue.work, kbase_devfreq_suspend_resume_worker); return 0; } @@ -645,10 +618,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) for (i = 0; i < kbdev->nr_clocks; i++) { if (kbdev->clocks[i]) - kbdev->current_freqs[i] = - clk_get_rate(kbdev->clocks[i]); - else - kbdev->current_freqs[i] = 0; + kbdev->current_freqs[i] = clk_get_rate(kbdev->clocks[i]); } kbdev->current_nominal_freq = kbdev->current_freqs[0]; @@ -666,8 +636,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) if (dp->max_state > 0) { /* Record the maximum frequency possible */ - kbdev->gpu_props.props.core_props.gpu_freq_khz_max = - dp->freq_table[0] / 1000; + kbdev->gpu_props.gpu_freq_khz_max = dp->freq_table[0] / 1000; } #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) @@ -682,8 +651,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) if (err) goto init_core_mask_table_failed; - kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, - "simple_ondemand", NULL); + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); kbdev->devfreq = NULL; @@ -708,20 +676,16 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); if (err) { - dev_err(kbdev->dev, - "Failed to register OPP notifier (%d)", err); + dev_err(kbdev->dev, "Failed to register OPP notifier (%d)", err); goto opp_notifier_failed; } #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) kbdev->devfreq_cooling = of_devfreq_cooling_register_power( - kbdev->dev->of_node, - kbdev->devfreq, - &kbase_ipa_power_model_ops); + kbdev->dev->of_node, kbdev->devfreq, &kbase_ipa_power_model_ops); if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { err = PTR_ERR_OR_ZERO(kbdev->devfreq_cooling); - dev_err(kbdev->dev, - "Failed to register cooling device (%d)", err); + dev_err(kbdev->dev, "Failed to register cooling device (%d)", err); err = err == 0 ? -ENODEV : err; goto cooling_reg_failed; } diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.h b/mali_kbase/backend/gpu/mali_kbase_devfreq.h index ac88b02..d75157c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.h +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,14 @@ #ifndef _BASE_DEVFREQ_H_ #define _BASE_DEVFREQ_H_ +/** + * kbase_devfreq_init - Initialize kbase device for DevFreq. + * @kbdev: Device pointer + * + * This function must be called only when a kbase device is initialized. + * + * Return: 0 on success. + */ int kbase_devfreq_init(struct kbase_device *kbdev); void kbase_devfreq_term(struct kbase_device *kbdev); @@ -39,8 +47,7 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); * @kbdev: Device pointer * @work_type: The type of the devfreq work item, i.e. suspend or resume */ -void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, - enum kbase_devfreq_work_type work_type); +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type); /** * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree @@ -57,6 +64,6 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, * untranslated frequency (and corresponding voltage) and all cores enabled. * The voltages returned are in micro Volts (uV). */ -void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, - u64 *core_mask, unsigned long *freqs, unsigned long *volts); +void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask, + unsigned long *freqs, unsigned long *volts); #endif /* _BASE_DEVFREQ_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 10e92ec..414ad54 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,175 +25,112 @@ #include <mali_kbase.h> #include <device/mali_kbase_device.h> -#include <backend/gpu/mali_kbase_pm_internal.h> -#include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <mali_kbase_hwaccess_gpuprops.h> +#include <mali_kbase_gpuprops_private_types.h> -int kbase_backend_gpuprops_get(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprops_regdump *regdump) { - int i; - struct kbase_gpuprops_regdump registers = { 0 }; - - /* Fill regdump with the content of the relevant registers */ - registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); - - registers.l2_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_FEATURES)); - - registers.tiler_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_FEATURES)); - registers.mem_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MEM_FEATURES)); - registers.mmu_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MMU_FEATURES)); - registers.as_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(AS_PRESENT)); + uint i; + + /* regdump is zero intiialized, individual entries do not need to be explicitly set */ + regdump->gpu_id = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)); + + regdump->shader_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PRESENT)); + regdump->tiler_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_PRESENT)); + regdump->l2_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PRESENT)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(AS_PRESENT))) + regdump->as_present = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(AS_PRESENT)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(STACK_PRESENT))) + regdump->stack_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(STACK_PRESENT)); + #if !MALI_USE_CSF - registers.js_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_PRESENT)); -#else /* !MALI_USE_CSF */ - registers.js_present = 0; + regdump->js_present = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JS_PRESENT)); + /* Not a valid register on TMIX */ + + /* TGOx specific register */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_TLS_ALLOC)) + regdump->thread_tls_alloc = + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_TLS_ALLOC)); #endif /* !MALI_USE_CSF */ - for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + regdump->thread_max_threads = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_THREADS)); + regdump->thread_max_workgroup_size = + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_WORKGROUP_SIZE)); + regdump->thread_max_barrier_size = + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_MAX_BARRIER_SIZE)); + regdump->thread_features = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_FEATURES)); + + /* Feature Registers */ + /* AMBA_FEATURES enum is mapped to COHERENCY_FEATURES enum */ + regdump->coherency_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES)); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) + regdump->core_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(CORE_FEATURES)); + +#if MALI_USE_CSF + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(GPU_FEATURES))) + regdump->gpu_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_FEATURES)); +#endif /* MALI_USE_CSF */ + + regdump->tiler_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(TILER_FEATURES)); + regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); + regdump->mem_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MEM_FEATURES)); + regdump->mmu_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MMU_FEATURES)); + #if !MALI_USE_CSF - registers.js_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_FEATURES_REG(i))); -#else /* !MALI_USE_CSF */ - registers.js_features[i] = 0; + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + regdump->js_features[i] = kbase_reg_read32(kbdev, GPU_JS_FEATURES_OFFSET(i)); #endif /* !MALI_USE_CSF */ - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - registers.texture_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); - - registers.thread_max_threads = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_THREADS)); - registers.thread_max_workgroup_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); - registers.thread_max_barrier_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); - registers.thread_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_FEATURES)); - registers.thread_tls_alloc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_TLS_ALLOC)); - - registers.shader_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_LO)); - registers.shader_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_HI)); - - registers.tiler_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_LO)); - registers.tiler_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_HI)); - - registers.l2_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_LO)); - registers.l2_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_HI)); - - registers.stack_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_LO)); - registers.stack_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_HI)); - - if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) { - registers.gpu_features_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FEATURES_LO)); - registers.gpu_features_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FEATURES_HI)); - } else { - registers.gpu_features_lo = 0; - registers.gpu_features_hi = 0; +#if MALI_USE_CSF +#endif /* MALI_USE_CSF */ + { + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + regdump->texture_features[i] = + kbase_reg_read32(kbdev, GPU_TEXTURE_FEATURES_OFFSET(i)); } - if (!kbase_is_gpu_removed(kbdev)) { - *regdump = registers; - return 0; - } else + if (kbase_is_gpu_removed(kbdev)) return -EIO; + return 0; } int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, - struct kbase_current_config_regdump *curr_config_regdump) + struct kbase_current_config_regdump *curr_config_regdump) { if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) return -EINVAL; - curr_config_regdump->mem_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MEM_FEATURES)); - - curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_LO)); - curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_HI)); - - curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_LO)); - curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_HI)); + curr_config_regdump->mem_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(MEM_FEATURES)); + curr_config_regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); + curr_config_regdump->shader_present = + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PRESENT)); + curr_config_regdump->l2_present = kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PRESENT)); if (kbase_is_gpu_removed(kbdev)) return -EIO; return 0; - -} - -int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) -{ - u32 coherency_features; - int error = 0; - - /* Ensure we can access the GPU registers */ - kbase_pm_register_access_enable(kbdev); - - coherency_features = kbase_cache_get_coherency_features(kbdev); - - if (kbase_is_gpu_removed(kbdev)) - error = -EIO; - - regdump->coherency_features = coherency_features; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) - regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); - else - regdump->core_features = 0; - - kbase_pm_register_access_disable(kbdev); - - return error; } int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) + struct kbasep_gpuprops_regdump *regdump) { if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { - u32 l2_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_FEATURES)); - u32 l2_config = - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); - u32 asn_hash[ASN_HASH_COUNT] = { - 0, - }; - int i; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { - for (i = 0; i < ASN_HASH_COUNT; i++) - asn_hash[i] = kbase_reg_read( - kbdev, GPU_CONTROL_REG(ASN_HASH(i))); + regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES)); + regdump->l2_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); + +#if MALI_USE_CSF + if (kbase_hw_has_l2_slice_hash_feature(kbdev)) { + uint i; + for (i = 0; i < GPU_L2_SLICE_HASH_COUNT; i++) + regdump->l2_slice_hash[i] = + kbase_reg_read32(kbdev, GPU_L2_SLICE_HASH_OFFSET(i)); } +#endif /* MALI_USE_CSF */ if (kbase_is_gpu_removed(kbdev)) return -EIO; - - regdump->l2_features = l2_features; - regdump->l2_config = l2_config; - for (i = 0; i < ASN_HASH_COUNT; i++) - regdump->l2_asn_hash[i] = asn_hash[i]; } return 0; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index b89b917..71c4b71 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,29 +24,28 @@ */ #include <mali_kbase.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> #include <mali_kbase_hwaccess_instr.h> #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_instr_internal.h> static int wait_prfcnt_ready(struct kbase_device *kbdev) { - u32 loops; - - for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { - const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_PRFCNT_ACTIVE; - if (!prfcnt_active) - return 0; + u32 val; + const u32 timeout_us = + kbase_get_timeout_ms(kbdev, KBASE_PRFCNT_ACTIVE_TIMEOUT) * USEC_PER_MSEC; + const int err = kbase_reg_poll32_timeout(kbdev, GPU_CONTROL_ENUM(GPU_STATUS), val, + !(val & GPU_STATUS_PRFCNT_ACTIVE), 0, timeout_us, + false); + if (err) { + dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); + return -EBUSY; } - - dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); - return -EBUSY; + return 0; } -int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_instr_hwcnt_enable *enable) +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable) { unsigned long flags; int err = -EINVAL; @@ -74,9 +73,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, } /* Enable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | - PRFCNT_SAMPLE_COMPLETED); + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask | PRFCNT_SAMPLE_COMPLETED); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; @@ -87,12 +86,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Configure */ - prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; + prfcnt_config = (u32)kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS - prfcnt_config |= kbdev->hwcnt.backend.override_counter_set + prfcnt_config |= (u32)kbdev->hwcnt.backend.override_counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #else - prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; + prfcnt_config |= (u32)enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #endif /* Wait until prfcnt config register can be written */ @@ -100,32 +99,25 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, if (err) return err; - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_OFF); /* Wait until prfcnt is disabled before writing configuration registers */ err = wait_prfcnt_ready(kbdev); if (err) return err; - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - enable->dump_buffer & 0xFFFFFFFF); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - enable->dump_buffer >> 32); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(PRFCNT_BASE), enable->dump_buffer); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - enable->fe_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_JM_EN), enable->fe_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - enable->shader_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - enable->mmu_l2_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_SHADER_EN), enable->shader_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_MMU_L2_EN), enable->mmu_l2_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_TILER_EN), enable->tiler_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -151,15 +143,16 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) return; /* Disable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask & ~PRFCNT_SAMPLE_COMPLETED); /* Wait until prfcnt config register can be written, then disable the counters. * Return value is ignored as we are disabling anyway. */ wait_prfcnt_ready(kbdev); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(PRFCNT_CONFIG), 0); kbdev->hwcnt.kctx = NULL; kbdev->hwcnt.addr = 0ULL; @@ -206,8 +199,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Ongoing dump/setup - wait for its completion */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); + wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); } kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; @@ -218,8 +210,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", - kctx); + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx); return 0; } @@ -261,28 +252,22 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) goto unlock; /* Reconfigure the dump address */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - kbdev->hwcnt.addr & 0xFFFFFFFF); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - kbdev->hwcnt.addr >> 32); + kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(PRFCNT_BASE), kbdev->hwcnt.addr); /* Start dumping */ - KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, - kbdev->hwcnt.addr); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, kbdev->hwcnt.addr); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_SAMPLE); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE); dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); - unlock: +unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); -bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, - bool * const success) +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, bool *const success) { unsigned long flags; bool complete = false; @@ -335,8 +320,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) int err; /* Wait for dump & cache clean to complete */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); + wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -347,8 +331,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) err = -EIO; } else { /* Dump done */ - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_IDLE); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE); err = 0; } @@ -368,8 +351,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) /* Check it's the context previously set up and we're not in IDLE * state. */ - if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_IDLE) + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) goto unlock; if (kbase_is_gpu_removed(kbdev)) { @@ -384,8 +366,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) /* Clear the counters */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_CLEAR); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR); unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -443,8 +424,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->hwcnt.backend.wait); - kbdev->hwcnt.backend.triggered = 0; - #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS /* Use the build time option for the override default. */ #if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) @@ -474,8 +453,7 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) * Valid inputs are the values accepted bythe SET_SELECT bits of the * PRFCNT_CONFIG register as defined in the architecture specification. */ - debugfs_create_u8("hwcnt_set_select", 0644, - kbdev->mali_debugfs_directory, + debugfs_create_u8("hwcnt_set_select", 0644, kbdev->mali_debugfs_directory, (u8 *)&kbdev->hwcnt.backend.override_counter_set); } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_internal.h b/mali_kbase/backend/gpu/mali_kbase_irq_internal.h index 66cda8c..4374793 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_irq_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,8 +26,29 @@ #ifndef _KBASE_IRQ_INTERNAL_H_ #define _KBASE_IRQ_INTERNAL_H_ +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +/** + * kbase_install_interrupts - Install IRQs handlers. + * + * @kbdev: The kbase device + * + * This function must be called once only when a kbase device is initialized. + * + * Return: 0 on success. Error code (negative) on failure. + */ int kbase_install_interrupts(struct kbase_device *kbdev); +/** + * kbase_release_interrupts - Uninstall IRQs handlers. + * + * @kbdev: The kbase device + * + * This function needs to be called when a kbase device is terminated. + */ void kbase_release_interrupts(struct kbase_device *kbdev); /** @@ -37,11 +58,52 @@ void kbase_release_interrupts(struct kbase_device *kbdev); */ void kbase_synchronize_irqs(struct kbase_device *kbdev); -int kbasep_common_test_interrupt_handlers( - struct kbase_device * const kbdev); +#ifdef CONFIG_MALI_DEBUG +#if IS_ENABLED(CONFIG_MALI_REAL_HW) +/** + * kbase_validate_interrupts - Validate interrupts + * + * @kbdev: The kbase device + * + * This function will be called once when a kbase device is initialized + * to check whether interrupt handlers are configured appropriately. + * If interrupt numbers and/or flags defined in the device tree are + * incorrect, then the validation might fail. + * The whold device initialization will fail if it returns error code. + * + * Return: 0 on success. Error code (negative) on failure. + */ +int kbase_validate_interrupts(struct kbase_device *const kbdev); +#endif /* CONFIG_MALI_REAL_HW */ +#endif /* CONFIG_MALI_DEBUG */ + +/** + * kbase_get_interrupt_handler - Return default interrupt handler + * @kbdev: Kbase device + * @irq_tag: Tag to choose the handler + * + * If single interrupt line is used the combined interrupt handler + * will be returned regardless of irq_tag. Otherwise the corresponding + * interrupt handler will be returned. + * + * Return: Interrupt handler corresponding to the tag. NULL on failure. + */ +irq_handler_t kbase_get_interrupt_handler(struct kbase_device *kbdev, u32 irq_tag); -irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, int irq_type); +/** + * kbase_set_custom_irq_handler - Set a custom IRQ handler + * + * @kbdev: The kbase device for which the handler is to be registered + * @custom_handler: Handler to be registered + * @irq_tag: Interrupt tag + * + * Register given interrupt handler for requested interrupt tag + * In the case where irq handler is not specified, the default handler shall be + * registered + * + * Return: 0 case success, error code otherwise + */ +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + u32 irq_tag); #endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index b95277c..0edbe75 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -26,20 +26,15 @@ #include <linux/interrupt.h> #if IS_ENABLED(CONFIG_MALI_REAL_HW) - -/* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 - static void *kbase_tag(void *ptr, u32 tag) { - return (void *)(((uintptr_t) ptr) | tag); + return (void *)(((uintptr_t)ptr) | tag); } +#endif static void *kbase_untag(void *ptr) { - return (void *)(((uintptr_t) ptr) & ~3); + return (void *)(((uintptr_t)ptr) & ~(uintptr_t)3); } static irqreturn_t kbase_job_irq_handler(int irq, void *data) @@ -56,13 +51,7 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); - -#ifdef CONFIG_MALI_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); -#endif /* CONFIG_MALI_DEBUG */ + val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)); if (!val) { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -99,13 +88,8 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) atomic_inc(&kbdev->faults_pending); - val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS)); -#ifdef CONFIG_MALI_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); -#endif /* CONFIG_MALI_DEBUG */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (!val) { @@ -122,11 +106,13 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) + +static irqreturn_t kbase_gpuonly_irq_handler(int irq, void *data) { unsigned long flags; struct kbase_device *kbdev = kbase_untag(data); - u32 val; + u32 gpu_irq_status; + irqreturn_t irq_state = IRQ_NONE; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -136,104 +122,105 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + gpu_irq_status = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_STATUS)); -#ifdef CONFIG_MALI_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); -#endif /* CONFIG_MALI_DEBUG */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (!val) - return IRQ_NONE; - - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + if (gpu_irq_status) { + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, gpu_irq_status); + kbase_gpu_interrupt(kbdev, gpu_irq_status); - kbase_gpu_interrupt(kbdev, val); + irq_state = IRQ_HANDLED; + } - return IRQ_HANDLED; + return irq_state; } -static irq_handler_t kbase_handler_table[] = { - [JOB_IRQ_TAG] = kbase_job_irq_handler, - [MMU_IRQ_TAG] = kbase_mmu_irq_handler, - [GPU_IRQ_TAG] = kbase_gpu_irq_handler, -}; - -#ifdef CONFIG_MALI_DEBUG -#define JOB_IRQ_HANDLER JOB_IRQ_TAG -#define GPU_IRQ_HANDLER GPU_IRQ_TAG +/** + * kbase_gpu_irq_handler - GPU interrupt handler + * @irq: IRQ number + * @data: Data associated with this IRQ (i.e. kbdev) + * + * Return: IRQ_HANDLED if any interrupt request has been successfully handled. + * IRQ_NONE otherwise. + */ +static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + irqreturn_t irq_state = kbase_gpuonly_irq_handler(irq, data); + return irq_state; +} /** - * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() + * kbase_combined_irq_handler - Combined interrupt handler for all interrupts * @irq: IRQ number * @data: Data associated with this IRQ (i.e. kbdev) - * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) * - * Handle the GPU device interrupt source requests reflected in the - * given source bit-pattern. The test code caller is responsible for - * undertaking the required device power maintenace. + * This handler will be used for the GPU with single interrupt line. * - * Return: IRQ_HANDLED if the requests are from the GPU device, - * IRQ_NONE otherwise + * Return: IRQ_HANDLED if any interrupt request has been successfully handled. + * IRQ_NONE otherwise. */ -irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +static irqreturn_t kbase_combined_irq_handler(int irq, void *data) { - struct kbase_device *kbdev = kbase_untag(data); + irqreturn_t irq_state = IRQ_NONE; - if (!val) - return IRQ_NONE; + if (kbase_job_irq_handler(irq, data) == IRQ_HANDLED) + irq_state = IRQ_HANDLED; + if (kbase_mmu_irq_handler(irq, data) == IRQ_HANDLED) + irq_state = IRQ_HANDLED; + if (kbase_gpu_irq_handler(irq, data) == IRQ_HANDLED) + irq_state = IRQ_HANDLED; - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + return irq_state; +} - kbase_gpu_interrupt(kbdev, val); +static irq_handler_t kbase_handler_table[] = { + [JOB_IRQ_TAG] = kbase_job_irq_handler, + [MMU_IRQ_TAG] = kbase_mmu_irq_handler, + [GPU_IRQ_TAG] = kbase_gpu_irq_handler, +}; - return IRQ_HANDLED; +irq_handler_t kbase_get_interrupt_handler(struct kbase_device *kbdev, u32 irq_tag) +{ + if (kbdev->nr_irqs == 1) + return kbase_combined_irq_handler; + else if (irq_tag < ARRAY_SIZE(kbase_handler_table)) + return kbase_handler_table[irq_tag]; + else + return NULL; } -KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); - -/** - * kbase_set_custom_irq_handler - Set a custom IRQ handler - * @kbdev: Device for which the handler is to be registered - * @custom_handler: Handler to be registered - * @irq_type: Interrupt type - * - * Registers given interrupt handler for requested interrupt type - * In the case where irq handler is not specified, the default handler shall be - * registered - * - * Return: 0 case success, error code otherwise - */ -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) +#ifdef CONFIG_MALI_DEBUG +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + u32 irq_tag) { int result = 0; - irq_handler_t requested_irq_handler = NULL; + irq_handler_t handler = custom_handler; + const int irq = (kbdev->nr_irqs == 1) ? 0 : irq_tag; - KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && - (irq_type <= GPU_IRQ_HANDLER)); + if (unlikely(!((irq_tag >= JOB_IRQ_TAG) && (irq_tag <= GPU_IRQ_TAG)))) { + dev_err(kbdev->dev, "Invalid irq_tag (%d)\n", irq_tag); + return -EINVAL; + } /* Release previous handler */ - if (kbdev->irqs[irq_type].irq) - free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + if (kbdev->irqs[irq].irq) + free_irq(kbdev->irqs[irq].irq, kbase_tag(kbdev, irq)); - requested_irq_handler = (custom_handler != NULL) ? - custom_handler : - kbase_handler_table[irq_type]; + /* If a custom handler isn't provided use the default handler */ + if (!handler) + handler = kbase_get_interrupt_handler(kbdev, irq_tag); - if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, - kbdev->irqs[irq_type].flags | IRQF_SHARED, - dev_name(kbdev->dev), - kbase_tag(kbdev, irq_type)) != 0) { + if (request_irq(kbdev->irqs[irq].irq, handler, + kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), + dev_name(kbdev->dev), kbase_tag(kbdev, irq)) != 0) { result = -EINVAL; - dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", - kbdev->irqs[irq_type].irq, irq_type); -#if IS_ENABLED(CONFIG_SPARSE_IRQ) - dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -#endif /* CONFIG_SPARSE_IRQ */ + dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", kbdev->irqs[irq].irq, + irq_tag); + if (IS_ENABLED(CONFIG_SPARSE_IRQ)) + dev_err(kbdev->dev, + "CONFIG_SPARSE_IRQ enabled - is the interrupt number correct for this config?\n"); } return result; @@ -251,7 +238,7 @@ struct kbasep_irq_test { static struct kbasep_irq_test kbasep_irq_test_data; -#define IRQ_TEST_TIMEOUT 500 +#define IRQ_TEST_TIMEOUT 500 static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) { @@ -267,7 +254,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -279,7 +266,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -298,7 +285,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -310,15 +297,14 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), val); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), val); return IRQ_HANDLED; } static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) { - struct kbasep_irq_test *test_data = container_of(timer, - struct kbasep_irq_test, timer); + struct kbasep_irq_test *test_data = container_of(timer, struct kbasep_irq_test, timer); test_data->timeout = 1; test_data->triggered = 1; @@ -326,96 +312,105 @@ static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int kbasep_common_test_interrupt( - struct kbase_device * const kbdev, u32 tag) +/** + * validate_interrupt - Validate an interrupt + * @kbdev: Kbase device + * @tag: Tag to choose the interrupt + * + * To validate the settings for the interrupt, write a value on RAWSTAT + * register to trigger interrupt. Then with custom interrupt handler + * check whether the interrupt happens within reasonable time. + * + * Return: 0 if validating interrupt succeeds. + */ +static int validate_interrupt(struct kbase_device *const kbdev, u32 tag) { int err = 0; - irq_handler_t test_handler; - + irq_handler_t handler; + const int irq = (kbdev->nr_irqs == 1) ? 0 : tag; u32 old_mask_val; u16 mask_offset; u16 rawstat_offset; switch (tag) { case JOB_IRQ_TAG: - test_handler = kbase_job_irq_test_handler; - rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); - mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + handler = kbase_job_irq_test_handler; + rawstat_offset = JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_ENUM(JOB_IRQ_MASK); break; case MMU_IRQ_TAG: - test_handler = kbase_mmu_irq_test_handler; - rawstat_offset = MMU_CONTROL_REG(MMU_IRQ_RAWSTAT); - mask_offset = MMU_CONTROL_REG(MMU_IRQ_MASK); + handler = kbase_mmu_irq_test_handler; + rawstat_offset = MMU_CONTROL_ENUM(IRQ_RAWSTAT); + mask_offset = MMU_CONTROL_ENUM(IRQ_MASK); break; case GPU_IRQ_TAG: /* already tested by pm_driver - bail out */ - default: return 0; + default: + dev_err(kbdev->dev, "Invalid tag (%d)\n", tag); + return -EINVAL; } /* store old mask */ - old_mask_val = kbase_reg_read(kbdev, mask_offset); + old_mask_val = kbase_reg_read32(kbdev, mask_offset); /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0); + kbase_reg_write32(kbdev, mask_offset, 0x0); - if (kbdev->irqs[tag].irq) { + if (kbdev->irqs[irq].irq) { /* release original handler and install test handler */ - if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { + if (kbase_set_custom_irq_handler(kbdev, handler, tag) != 0) { err = -EINVAL; } else { kbasep_irq_test_data.timeout = 0; - hrtimer_init(&kbasep_irq_test_data.timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - kbasep_irq_test_data.timer.function = - kbasep_test_interrupt_timeout; + hrtimer_init(&kbasep_irq_test_data.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = kbasep_test_interrupt_timeout; /* trigger interrupt */ - kbase_reg_write(kbdev, mask_offset, 0x1); - kbase_reg_write(kbdev, rawstat_offset, 0x1); + kbase_reg_write32(kbdev, mask_offset, 0x1); + kbase_reg_write32(kbdev, rawstat_offset, 0x1); hrtimer_start(&kbasep_irq_test_data.timer, - HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), HRTIMER_MODE_REL); - wait_event(kbasep_irq_test_data.wait, - kbasep_irq_test_data.triggered != 0); + wait_event(kbasep_irq_test_data.wait, kbasep_irq_test_data.triggered != 0); if (kbasep_irq_test_data.timeout != 0) { - dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", - kbdev->irqs[tag].irq, tag); + dev_err(kbdev->dev, "Interrupt %u (index %u) didn't reach CPU.\n", + kbdev->irqs[irq].irq, irq); err = -EINVAL; } else { - dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", - kbdev->irqs[tag].irq, tag); + dev_dbg(kbdev->dev, "Interrupt %u (index %u) reached CPU.\n", + kbdev->irqs[irq].irq, irq); } hrtimer_cancel(&kbasep_irq_test_data.timer); kbasep_irq_test_data.triggered = 0; /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0); + kbase_reg_write32(kbdev, mask_offset, 0x0); /* release test handler */ - free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); + free_irq(kbdev->irqs[irq].irq, kbase_tag(kbdev, irq)); } /* restore original interrupt */ - if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], - kbdev->irqs[tag].flags | IRQF_SHARED, - dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { - dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", - kbdev->irqs[tag].irq, tag); + if (request_irq(kbdev->irqs[irq].irq, kbase_get_interrupt_handler(kbdev, tag), + kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), + dev_name(kbdev->dev), kbase_tag(kbdev, irq))) { + dev_err(kbdev->dev, "Can't restore original interrupt %u (index %u)\n", + kbdev->irqs[irq].irq, tag); err = -EINVAL; } } /* restore old mask */ - kbase_reg_write(kbdev, mask_offset, old_mask_val); + kbase_reg_write32(kbdev, mask_offset, old_mask_val); return err; } -int kbasep_common_test_interrupt_handlers( - struct kbase_device * const kbdev) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) +int kbase_validate_interrupts(struct kbase_device *const kbdev) { int err; @@ -425,63 +420,64 @@ int kbasep_common_test_interrupt_handlers( /* A suspend won't happen during startup/insmod */ kbase_pm_context_active(kbdev); - err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); + err = validate_interrupt(kbdev, JOB_IRQ_TAG); if (err) { - dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, + "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); goto out; } - err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); + err = validate_interrupt(kbdev, MMU_IRQ_TAG); if (err) { - dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, + "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); goto out; } dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); - out: +out: kbase_pm_context_idle(kbdev); return err; } +#endif /* CONFIG_MALI_REAL_HW */ #endif /* CONFIG_MALI_DEBUG */ int kbase_install_interrupts(struct kbase_device *kbdev) { - u32 nr = ARRAY_SIZE(kbase_handler_table); - int err; u32 i; - for (i = 0; i < nr; i++) { - err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], - kbdev->irqs[i].flags | IRQF_SHARED, - dev_name(kbdev->dev), - kbase_tag(kbdev, i)); - if (err) { - dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", - kbdev->irqs[i].irq, i); -#if IS_ENABLED(CONFIG_SPARSE_IRQ) - dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -#endif /* CONFIG_SPARSE_IRQ */ + for (i = 0; i < kbdev->nr_irqs; i++) { + const int result = request_irq( + kbdev->irqs[i].irq, kbase_get_interrupt_handler(kbdev, i), + kbdev->irqs[i].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED), + dev_name(kbdev->dev), kbase_tag(kbdev, i)); + if (result) { + dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", + kbdev->irqs[i].irq, i); goto release; } } return 0; - release: +release: + if (IS_ENABLED(CONFIG_SPARSE_IRQ)) + dev_err(kbdev->dev, + "CONFIG_SPARSE_IRQ enabled - is the interrupt number correct for this config?\n"); + while (i-- > 0) free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); - return err; + return -EINVAL; } void kbase_release_interrupts(struct kbase_device *kbdev) { - u32 nr = ARRAY_SIZE(kbase_handler_table); u32 i; - for (i = 0; i < nr; i++) { + for (i = 0; i < kbdev->nr_irqs; i++) { if (kbdev->irqs[i].irq) free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); } @@ -489,10 +485,9 @@ void kbase_release_interrupts(struct kbase_device *kbdev) void kbase_synchronize_irqs(struct kbase_device *kbdev) { - u32 nr = ARRAY_SIZE(kbase_handler_table); u32 i; - for (i = 0; i < nr; i++) { + for (i = 0; i < kbdev->nr_irqs; i++) { if (kbdev->irqs[i].irq) synchronize_irq(kbdev->irqs[i].irq); } diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_as.c b/mali_kbase/backend/gpu/mali_kbase_jm_as.c index 7059c84..1add461 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_as.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_as.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + CSTD_UNUSED(current_as); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -88,8 +90,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_contex return false; } -void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, struct kbase_context *kctx) { int as_nr = kctx->as_nr; @@ -111,13 +112,14 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, kbase_js_runpool_dec_context_count(kbdev, kctx); } -void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, - struct kbase_context *kctx) +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, struct kbase_context *kctx) { + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); } -int kbase_backend_find_and_release_free_address_space( - struct kbase_device *kbdev, struct kbase_context *kctx) +int kbase_backend_find_and_release_free_address_space(struct kbase_device *kbdev, + struct kbase_context *kctx) { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; @@ -146,12 +148,11 @@ int kbase_backend_find_and_release_free_address_space( * descheduled. */ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && - atomic_read(&as_kctx->refcount) == 1) { + atomic_read(&as_kctx->refcount) == 1) { if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { WARN(1, "Failed to retain active context\n"); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); rt_mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -168,7 +169,6 @@ int kbase_backend_find_and_release_free_address_space( mutex_unlock(&js_devdata->runpool_mutex); rt_mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - /* Release context from address space */ rt_mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); @@ -176,9 +176,7 @@ int kbase_backend_find_and_release_free_address_space( kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, - as_kctx, - true); + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, as_kctx, true); mutex_unlock(&js_devdata->runpool_mutex); rt_mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); @@ -206,9 +204,7 @@ int kbase_backend_find_and_release_free_address_space( return KBASEP_AS_NR_INVALID; } -bool kbase_backend_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int as_nr) +bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr) { struct kbasep_js_device_data *js_devdata; struct kbase_as *new_address_space = NULL; diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index dd8f4d9..43292f1 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -25,7 +25,7 @@ #include <mali_kbase.h> #include <mali_kbase_config.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> #include <tl/mali_kbase_tracepoints.h> #include <mali_linux_trace.h> #include <mali_kbase_hw.h> @@ -41,8 +41,8 @@ #include <mali_kbase_regs_history_debugfs.h> static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); -static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, - const u64 affinity, const u64 limited_core_mask); +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, + const u64 limited_core_mask); static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, unsigned int js, const u64 limited_core_mask) @@ -50,33 +50,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req u64 affinity; bool skip_affinity_check = false; - if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == - BASE_JD_REQ_T) { + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { /* Tiler-only atom, affinity value can be programed as 0 */ affinity = 0; skip_affinity_check = true; - } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { - unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; - struct mali_base_gpu_coherent_group_info *coherency_info = - &kbdev->gpu_props.props.coherency_info; + } else if ((core_req & + (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; - affinity = kbdev->pm.backend.shaders_avail & - kbdev->pm.debug_core_mask[js]; - - /* JS2 on a dual core group system targets core group 1. All - * other cases target core group 0. + /* Bifrost onwards GPUs only have 1 coherent group which is equal to + * shader_present */ - if (js == 2 && num_core_groups > 1) - affinity &= coherency_info->group[1].core_mask; - else if (num_core_groups > 1) - affinity &= coherency_info->group[0].core_mask; - else - affinity &= kbdev->gpu_props.curr_config.shader_present; + affinity &= kbdev->gpu_props.curr_config.shader_present; } else { /* Use all cores */ - affinity = kbdev->pm.backend.shaders_avail & - kbdev->pm.debug_core_mask[js]; + affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; } if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { @@ -86,8 +74,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req if (unlikely(!affinity && !skip_affinity_check)) { #ifdef CONFIG_MALI_DEBUG - u64 shaders_ready = - kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); #endif @@ -96,7 +83,8 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ - affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + affinity = + kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); #ifdef CONFIG_MALI_DEBUG /* affinity should never be 0 */ @@ -105,10 +93,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req } } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), - affinity & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), - affinity >> 32); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(js, AFFINITY_NEXT), affinity); return affinity; } @@ -140,8 +125,8 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[katom->renderpass_id]; /* We can read a subset of renderpass state without holding @@ -182,9 +167,8 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) break; } - dev_dbg(kctx->kbdev->dev, - "Selected job chain 0x%llx for end atom %pK in state %d\n", - jc, (void *)katom, (int)rp->state); + dev_dbg(kctx->kbdev->dev, "Selected job chain 0x%llx for end atom %pK in state %d\n", jc, + (void *)katom, (int)rp->state); katom->jc = jc; return jc; @@ -193,17 +177,14 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js, struct kbase_context *kctx) { - const ktime_t wait_loop_start = ktime_get_raw(); - const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms; - s64 diff = 0; - + u32 val; + const u32 timeout_us = kbdev->js_data.js_free_wait_time_ms * USEC_PER_MSEC; /* wait for the JS_COMMAND_NEXT register to reach the given status value */ - do { - if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT))) - return true; + const int err = kbase_reg_poll32_timeout(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), val, + !val, 0, timeout_us, false); - diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); - } while (diff < max_timeout); + if (!err) + return true; dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js, kctx->tgid, kctx->id); @@ -227,21 +208,17 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, if (!kbasep_jm_wait_js_free(kbdev, js, kctx)) return -EPERM; - dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", - jc_head, (void *)katom); + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, + (void *)katom); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), - jc_head & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), - jc_head >> 32); + kbase_reg_write64(kbdev, JOB_SLOT_OFFSET(js, HEAD_NEXT), jc_head); - affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, - kctx->limited_core_mask); + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, kctx->limited_core_mask); /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ - cfg = kctx->as_nr; + cfg = (u32)kctx->as_nr; if(!kbase_jd_katom_is_protected(katom)) { if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && @@ -300,11 +277,10 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, ptr_slot_rb->job_chain_flag = false; } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, CONFIG_NEXT), cfg); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), - katom->flush_id); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, FLUSH_ID_NEXT), katom->flush_id); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -313,27 +289,18 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, /* GO ! */ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", - katom, kctx, js, jc_head); + katom, kctx, js, jc_head); - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, - (u32)affinity); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32)affinity); - KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, - js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, js, kbase_jd_atom_id(kctx, katom), + TL_JS_EVENT_START); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, - affinity, cfg); - KBASE_TLSTREAM_TL_RET_CTX_LPU( - kbdev, - kctx, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, affinity, cfg); + KBASE_TLSTREAM_TL_RET_CTX_LPU(kbdev, kctx, &kbdev->gpu_props.js_features[katom->slot_nr]); KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); - KBASE_TLSTREAM_TL_RET_ATOM_LPU( - kbdev, - katom, - &kbdev->gpu_props.props.raw_props.js_features[js], - "ctx_nr,atom_nr"); + KBASE_TLSTREAM_TL_RET_ATOM_LPU(kbdev, katom, &kbdev->gpu_props.js_features[js], + "ctx_nr,atom_nr"); kbase_kinstr_jm_atom_hw_submit(katom); /* Update the slot's last katom submission kctx */ @@ -344,19 +311,16 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, /* If this is the only job on the slot, trace it as starting */ char js_string[16]; - trace_gpu_sched_switch( - kbasep_make_job_slot_string(js, js_string, - sizeof(js_string)), - ktime_to_ns(katom->start_timestamp), - (u32)katom->kctx->id, 0, katom->work_id); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(katom->start_timestamp), (u32)katom->kctx->id, 0, + katom->work_id); } #endif - trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, - kbase_jd_atom_id(kctx, katom), js); + trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom), js); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_START); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), JS_COMMAND_START); return 0; } @@ -383,8 +347,7 @@ static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbd /* Checking the HEAD position for the job slot */ katom = kbase_gpu_inspect(kbdev, js, 0); if (katom != NULL) { - timestamp_diff = ktime_sub(end_timestamp, - katom->start_timestamp); + timestamp_diff = ktime_sub(end_timestamp, katom->start_timestamp); if (ktime_to_ns(timestamp_diff) >= 0) { /* Only update the timestamp if it's a better estimate * than what's currently stored. This is because our @@ -407,9 +370,7 @@ static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbd */ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) { - KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( - kbdev, - &kbdev->gpu_props.props.raw_props.js_features[js]); + KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(kbdev, &kbdev->gpu_props.js_features[js]); } void kbase_job_done(struct kbase_device *kbdev, u32 done) @@ -433,45 +394,37 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) /* Note: This is inherently unfair, as we always check for lower * numbered interrupts before the higher numbered ones. */ - i = ffs(finished) - 1; + i = (unsigned int)ffs((int)finished) - 1u; do { - int nr_done; + u32 nr_done; u32 active; - u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ + u32 completion_code = BASE_JD_EVENT_DONE; /* assume OK */ u64 job_tail = 0; if (failed & (1u << i)) { /* read out the job slot status code if the job * slot reported failure */ - completion_code = kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_STATUS)); + completion_code = + kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(i, STATUS)); if (completion_code == BASE_JD_EVENT_STOPPED) { u64 job_head; - KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( - kbdev, NULL, - i, 0, TL_JS_EVENT_SOFT_STOP); + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, i, 0, + TL_JS_EVENT_SOFT_STOP); - kbasep_trace_tl_event_lpu_softstop( - kbdev, i); + kbasep_trace_tl_event_lpu_softstop(kbdev, i); /* Soft-stopped job - read the value of * JS<n>_TAIL so that the job chain can * be resumed */ - job_tail = (u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_LO)) | - ((u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_HI)) - << 32); - job_head = (u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_HEAD_LO)) | - ((u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_HEAD_HI)) - << 32); + job_tail = + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(i, TAIL)); + job_head = + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(i, HEAD)); /* For a soft-stopped job chain js_tail should * same as the js_head, but if not then the * job chain was incorrectly marked as @@ -482,15 +435,13 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if (job_tail != job_head) completion_code = BASE_JD_EVENT_UNKNOWN; - } else if (completion_code == - BASE_JD_EVENT_NOT_STARTED) { + } else if (completion_code == BASE_JD_EVENT_NOT_STARTED) { /* PRLAM-10673 can cause a TERMINATED * job to come back as NOT_STARTED, * but the error interrupt helps us * detect it */ - completion_code = - BASE_JD_EVENT_TERMINATED; + completion_code = BASE_JD_EVENT_TERMINATED; } kbase_gpu_irq_evict(kbdev, i, completion_code); @@ -503,20 +454,17 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { if (kbase_prepare_to_reset_gpu_locked( - kbdev, - RESET_FLAGS_NONE)) + kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } } - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), - done & ((1 << i) | (1 << (i + 16)))); - active = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), + done & ((1u << i) | (1u << (i + 16)))); + active = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_JS_STATE)); - if (((active >> i) & 1) == 0 && - (((done >> (i + 16)) & 1) == 0)) { + if (((active >> i) & 1) == 0 && (((done >> (i + 16)) & 1) == 0)) { /* There is a potential race we must work * around: * @@ -557,8 +505,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * have prevented any futher jobs from starting * execution. */ - u32 rawstat = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + u32 rawstat = + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)); if ((rawstat >> (i + 16)) & 1) { /* There is a failed job that we've @@ -568,16 +516,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } } - dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", - completion_code); + dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", completion_code); nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); nr_done -= (active >> i) & 1; nr_done -= (active >> (i + 16)) & 1; - if (nr_done <= 0) { - dev_warn(kbdev->dev, "Spurious interrupt on slot %d", - i); + if (nr_done == 0 || nr_done > SLOT_RB_SIZE) { + dev_warn(kbdev->dev, "Spurious interrupt on slot %u", i); goto spurious; } @@ -586,10 +532,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) while (nr_done) { if (likely(nr_done == 1)) { - kbase_gpu_complete_hw(kbdev, i, - completion_code, - job_tail, - &end_timestamp); + kbase_gpu_complete_hw(kbdev, i, completion_code, job_tail, + &end_timestamp); kbase_jm_try_kick_all(kbdev); } else { /* More than one job has completed. @@ -600,10 +544,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * slot to complete until the failed job * is cleared from the IRQ status. */ - kbase_gpu_complete_hw(kbdev, i, - BASE_JD_EVENT_DONE, - 0, - &end_timestamp); + kbase_gpu_complete_hw(kbdev, i, BASE_JD_EVENT_DONE, 0, + &end_timestamp); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) /* Increment the end timestamp value by 1 ns to * avoid having the same value for 'start_time_ns' @@ -615,22 +557,19 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } nr_done--; } - spurious: - done = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); +spurious: + done = kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)); failed = done >> 16; finished = (done & 0xFFFF) | failed; if (done) end_timestamp = ktime_get_raw(); - } while (finished & (1 << i)); + } while (finished & (1u << i)); - kbasep_job_slot_update_head_start_timestamp(kbdev, i, - end_timestamp); + kbasep_job_slot_update_head_start_timestamp(kbdev, i, end_timestamp); } - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_COMMITTED) { + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { /* If we're trying to reset the GPU then we might be able to do * it early (without waiting for a timeout) because some jobs * have completed @@ -649,24 +588,22 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns u64 job_in_head_before; u32 status_reg_after; - WARN_ON(action & (~JS_COMMAND_MASK)); + WARN_ON(action & (~(u32)JS_COMMAND_MASK)); /* Check the head pointer */ - job_in_head_before = ((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_LO))) - | (((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_HI))) - << 32); - status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); + job_in_head_before = kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(js, HEAD)); + status_reg_before = kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, STATUS)); #endif if (action == JS_COMMAND_SOFT_STOP) { if (kbase_jd_katom_is_protected(target_katom)) { #ifdef CONFIG_MALI_DEBUG dev_dbg(kbdev->dev, - "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", - (unsigned int)core_reqs); -#endif /* CONFIG_MALI_DEBUG */ + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", + (unsigned int)core_reqs); +#else + CSTD_UNUSED(core_reqs); +#endif /* CONFIG_MALI_DEBUG */ return; } @@ -678,23 +615,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns /* Mark the point where we issue the soft-stop command */ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; + action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; } else if (action == JS_COMMAND_HARD_STOP) { target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; + action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND), action); #if KBASE_KTRACE_ENABLE - status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); + status_reg_after = kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, STATUS)); if (status_reg_after == BASE_JD_EVENT_ACTIVE) { struct kbase_jd_atom *head; struct kbase_context *head_kctx; @@ -707,7 +642,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns head_kctx = head->kctx; if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, + job_in_head_before, js); else KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); @@ -716,19 +652,23 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); break; case JS_COMMAND_SOFT_STOP_0: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, + js); break; case JS_COMMAND_SOFT_STOP_1: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, + js); break; case JS_COMMAND_HARD_STOP: KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); break; case JS_COMMAND_HARD_STOP_0: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, + js); break; case JS_COMMAND_HARD_STOP_1: - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, + js); break; default: WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, @@ -737,7 +677,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns } } else { if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + job_in_head_before, js); else KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); @@ -781,7 +722,7 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) } void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, - struct kbase_jd_atom *target_katom) + struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; unsigned int target_js = target_katom->slot_nr; @@ -802,9 +743,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (kbase_js_atom_runs_before(kbdev, target_katom, slot_katom, KBASE_ATOM_ORDERING_FLAG_SEQNR)) { if (!stop_sent) - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( - kbdev, - target_katom); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(kbdev, target_katom); kbase_job_slot_softstop(kbdev, target_js, slot_katom); stop_sent = true; @@ -812,8 +751,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, } } -static int softstop_start_rp_nolock( - struct kbase_context *kctx, struct kbase_va_region *reg) +static int softstop_start_rp_nolock(struct kbase_context *kctx, struct kbase_va_region *reg) { struct kbase_device *const kbdev = kctx->kbdev; struct kbase_jd_atom *katom; @@ -829,33 +767,30 @@ static int softstop_start_rp_nolock( } if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { - dev_dbg(kctx->kbdev->dev, - "Atom %pK on job slot is not start RP\n", (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK on job slot is not start RP\n", (void *)katom); return -EPERM; } compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= - ARRAY_SIZE(kctx->jctx.renderpasses), - "Should check invalid access to renderpasses"); + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); rp = &kctx->jctx.renderpasses[katom->renderpass_id]; - if (WARN_ON(rp->state != KBASE_JD_RP_START && - rp->state != KBASE_JD_RP_RETRY)) + if (WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", - (int)rp->state, (void *)reg); + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg); if (WARN_ON(katom != rp->start_katom)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", - (void *)reg, (void *)&rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg, + (void *)&rp->oom_reg_list); list_move_tail(®->link, &rp->oom_reg_list); dev_dbg(kctx->kbdev->dev, "Added region to list\n"); - rp->state = (rp->state == KBASE_JD_RP_START ? - KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); + rp->state = (rp->state == KBASE_JD_RP_START ? KBASE_JD_RP_PEND_OOM : + KBASE_JD_RP_RETRY_PEND_OOM); kbase_job_slot_softstop(kbdev, 1, katom); @@ -863,7 +798,7 @@ static int softstop_start_rp_nolock( } int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, - struct kbase_va_region *const reg) + struct kbase_va_region *const reg) { struct kbase_device *const kbdev = kctx->kbdev; int err; @@ -881,14 +816,12 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) struct kbase_device *kbdev = kctx->kbdev; unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); - timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, - kctx->jctx.job_nr == 0, timeout); + timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0, + (long)timeout); if (timeout != 0) - timeout = wait_event_timeout( - kctx->jctx.sched_info.ctx.is_scheduled_wait, - !kbase_ctx_flag(kctx, KCTX_SCHEDULED), - timeout); + timeout = wait_event_timeout(kctx->jctx.sched_info.ctx.is_scheduled_wait, + !kbase_ctx_flag(kctx, KCTX_SCHEDULED), (long)timeout); /* Neither wait timed out; all done! */ if (timeout != 0) @@ -920,8 +853,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { rt_mutex_lock(&kbdev->pm.lock); if (kbdev->pm.backend.gpu_powered) - flush_id = kbase_reg_read(kbdev, - GPU_CONTROL_REG(LATEST_FLUSH)); + flush_id = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(LATEST_FLUSH)); rt_mutex_unlock(&kbdev->pm.lock); } @@ -946,7 +878,6 @@ void kbase_job_slot_term(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_job_slot_term); - /** * kbase_job_slot_softstop_swflags - Soft-stop a job with flags * @kbdev: The kbase device @@ -965,8 +896,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { - dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", - target_katom, sw_flags, js); + dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, + js); if (sw_flags & JS_COMMAND_MASK) { WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, @@ -974,11 +905,11 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js sw_flags &= ~((u32)JS_COMMAND_MASK); } kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, - JS_COMMAND_SOFT_STOP | sw_flags); + JS_COMMAND_SOFT_STOP | sw_flags); } -void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom) +void kbase_job_slot_softstop(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *target_katom) { kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); } @@ -988,22 +919,22 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, { struct kbase_device *kbdev = kctx->kbdev; - kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, - target_katom, - JS_COMMAND_HARD_STOP); + kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, + JS_COMMAND_HARD_STOP); } void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { u32 hw_action = action & JS_COMMAND_MASK; + CSTD_UNUSED(core_reqs); + /* For soft-stop, don't enter if soft-stop not allowed, or isn't * causing disjoint. */ - if (hw_action == JS_COMMAND_SOFT_STOP && - (kbase_jd_katom_is_protected(target_katom) || - (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) + if (hw_action == JS_COMMAND_SOFT_STOP && (kbase_jd_katom_is_protected(target_katom) || + (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) return; /* Nothing to do if already logged disjoint state on this atom */ @@ -1014,8 +945,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, kbase_disjoint_state_up(kbdev); } -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom) +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom) { if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; @@ -1025,65 +955,70 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); return -EINVAL; } int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); return -EINVAL; } void kbase_reset_gpu_allow(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); } void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); } void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); WARN(true, "%s Not implemented for JM GPUs", __func__); } static void kbase_debug_dump_registers(struct kbase_device *kbdev) { - int i; + unsigned int i; kbase_io_history_dump(kbdev); dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS))); dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_JS_STATE))); for (i = 0; i < 3; i++) { - dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); + dev_err(kbdev->dev, " JS%u_STATUS=0x%08x JS%u_HEAD=0x%016llx", i, + kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(i, STATUS)), i, + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(i, HEAD))); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); + kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_RAWSTAT)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)), + kbase_reg_read32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK)), + kbase_reg_read32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE0)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(PWR_OVERRIDE1))); dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG))); dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG)), + kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG))); } static void kbasep_reset_timeout_worker(struct work_struct *data) @@ -1093,15 +1028,12 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) ktime_t end_timestamp = ktime_get_raw(); struct kbasep_js_device_data *js_devdata; bool silent = false; - u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - kbdev = container_of(data, struct kbase_device, - hwaccess.backend.reset_work); + kbdev = container_of(data, struct kbase_device, hwaccess.backend.reset_work); js_devdata = &kbdev->js_data; - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_SILENT) + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_SILENT) silent = true; KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); @@ -1118,12 +1050,11 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { /* This would re-activate the GPU. Since it's already idle, * there's no need to reset it */ - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1161,14 +1092,15 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbdev->irq_reset_flush = false; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { - /* Ensure that L2 is not transitioning when we send the reset - * command - */ - while (--max_loops && kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2)) - ; - - WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); + u64 val; + const u32 timeout_us = + kbase_get_timeout_ms(kbdev, KBASE_CLEAN_CACHE_TIMEOUT) * USEC_PER_MSEC; + /* Ensure that L2 is not transitioning when we send the reset command */ + const int err = read_poll_timeout_atomic(kbase_pm_get_trans_cores, val, !val, 0, + timeout_us, false, kbdev, + KBASE_PM_CORE_L2); + + WARN(err, "L2 power transition timed out while trying to reset\n"); } rt_mutex_lock(&kbdev->pm.lock); @@ -1183,8 +1115,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) */ if (!silent) - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", - RESET_TIMEOUT); + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); /* Output the state of some interesting registers to help in the * debugging of GPU resets @@ -1240,8 +1171,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) rt_mutex_unlock(&kbdev->pm.lock); - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); wake_up(&kbdev->hwaccess.backend.reset_wait); if (!silent) @@ -1268,15 +1198,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) { - struct kbase_device *kbdev = container_of(timer, struct kbase_device, - hwaccess.backend.reset_timer); + struct kbase_device *kbdev = + container_of(timer, struct kbase_device, hwaccess.backend.reset_timer); /* Reset still pending? */ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == - KBASE_RESET_GPU_COMMITTED) + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, + KBASE_RESET_GPU_HAPPENING) == KBASE_RESET_GPU_COMMITTED) queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); + &kbdev->hwaccess.backend.reset_work); return HRTIMER_NORESTART; } @@ -1289,7 +1218,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) { unsigned int i; - int pending_jobs = 0; + u32 pending_jobs = 0; /* Count the number of jobs */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) @@ -1310,15 +1239,13 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) * been called), and that no other thread beat this thread to starting * the reset */ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != - KBASE_RESET_GPU_COMMITTED) { + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, + KBASE_RESET_GPU_HAPPENING) != KBASE_RESET_GPU_COMMITTED) { /* Reset has already occurred */ return; } - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); + queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work); } static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) @@ -1343,10 +1270,9 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, - unsigned int flags) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int flags) { - int i; + unsigned int i; #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { @@ -1360,10 +1286,8 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) kbase_instr_hwcnt_on_unrecoverable_error(kbdev); - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_PREPARED) != - KBASE_RESET_GPU_NOT_PENDING) { + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ return false; } @@ -1406,15 +1330,14 @@ void kbase_reset_gpu(struct kbase_device *kbdev) */ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) return; - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); - dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", - kbdev->reset_timeout_ms); + dev_err(kbdev->dev, + "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, - HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL); /* Try resetting early */ kbasep_try_reset_gpu_early(kbdev); @@ -1428,14 +1351,13 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) */ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) return; - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); - dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", - kbdev->reset_timeout_ms); + dev_err(kbdev->dev, + "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); hrtimer_start(&kbdev->hwaccess.backend.reset_timer, - HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL); /* Try resetting early */ kbasep_try_reset_gpu_early_locked(kbdev); @@ -1443,26 +1365,22 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) int kbase_reset_gpu_silent(struct kbase_device *kbdev) { - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_SILENT) != - KBASE_RESET_GPU_NOT_PENDING) { + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ return -EAGAIN; } kbase_disjoint_state_up(kbdev); - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); + queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work); return 0; } bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) { - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_NOT_PENDING) + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING) return false; return true; @@ -1476,8 +1394,7 @@ bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) int kbase_reset_gpu_wait(struct kbase_device *kbdev) { wait_event(kbdev->hwaccess.backend.reset_wait, - atomic_read(&kbdev->hwaccess.backend.reset_gpu) - == KBASE_RESET_GPU_NOT_PENDING); + atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING); return 0; } @@ -1485,18 +1402,14 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); int kbase_reset_gpu_init(struct kbase_device *kbdev) { - kbdev->hwaccess.backend.reset_workq = alloc_workqueue( - "Mali reset workqueue", 0, 1); + kbdev->hwaccess.backend.reset_workq = alloc_workqueue("Mali reset workqueue", 0, 1); if (kbdev->hwaccess.backend.reset_workq == NULL) return -ENOMEM; - INIT_WORK(&kbdev->hwaccess.backend.reset_work, - kbasep_reset_timeout_worker); + INIT_WORK(&kbdev->hwaccess.backend.reset_work, kbasep_reset_timeout_worker); - hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - kbdev->hwaccess.backend.reset_timer.function = - kbasep_reset_timer_callback; + hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->hwaccess.backend.reset_timer.function = kbasep_reset_timer_callback; return 0; } @@ -1506,17 +1419,15 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) destroy_workqueue(kbdev->hwaccess.backend.reset_workq); } -static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, - const u64 affinity, const u64 limited_core_mask) +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, + const u64 limited_core_mask) { const u64 result = affinity & limited_core_mask; #ifdef CONFIG_MALI_DEBUG dev_dbg(kbdev->dev, - "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", - (unsigned long)affinity, - (unsigned long)result, - (unsigned long)limited_core_mask); + "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", + (unsigned long)affinity, (unsigned long)result, (unsigned long)limited_core_mask); #else CSTD_UNUSED(kbdev); #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 380a530..2b95e97 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -41,8 +41,8 @@ * @job_tail: Job tail address reported by GPU * @end_timestamp: Timestamp of job completion */ -void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, - u64 job_tail, ktime_t *end_timestamp); +void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, + ktime_t *end_timestamp); #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) @@ -74,8 +74,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * on the specified atom * @kbdev: Device pointer * @js: Job slot to stop on - * @action: The action to perform, either JSn_COMMAND_HARD_STOP or - * JSn_COMMAND_SOFT_STOP + * @action: The action to perform, either JS_COMMAND_HARD_STOP or + * JS_COMMAND_SOFT_STOP * @core_reqs: Core requirements of atom to stop * @target_katom: Atom to stop * @@ -94,8 +94,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, uns * @kctx: Context pointer. May be NULL * @katom: Specific atom to stop. May be NULL * @js: Job slot to hard stop - * @action: The action to perform, either JSn_COMMAND_HARD_STOP or - * JSn_COMMAND_SOFT_STOP + * @action: The action to perform, either JS_COMMAND_HARD_STOP or + * JS_COMMAND_SOFT_STOP * * If no context is provided then all jobs on the slot will be soft or hard * stopped. diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 66f068a..d938079 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -47,7 +47,7 @@ * * Note: HW access lock must be held */ -#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) +#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) /** * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer. @@ -58,9 +58,8 @@ */ #define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx)) -static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - ktime_t *end_timestamp); +static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp); /** * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer @@ -69,8 +68,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, * * Context: Caller must hold the HW access lock */ -static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; @@ -161,9 +159,9 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) return false; } -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) +u32 kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) { - int nr = 0; + u32 nr = 0; int i; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -171,8 +169,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - if (katom && (katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED)) + if (katom && (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)) nr++; } @@ -223,10 +220,9 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned i */ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) { - if (katom->gpu_rb_state >= - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && - ((kbase_jd_katom_is_protected(katom) && secure) || - (!kbase_jd_katom_is_protected(katom) && !secure))) + if (katom->gpu_rb_state >= KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) return true; return false; @@ -242,8 +238,7 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) * * Return: true if any atoms are in the given state, false otherwise */ -static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, - bool secure) +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, bool secure) { unsigned int js; @@ -251,8 +246,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, int i; for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, i); + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); if (katom) { if (check_secure_atom(katom, secure)) @@ -268,8 +262,7 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) { lockdep_assert_held(&kbdev->hwaccess_lock); - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != - KBASE_RESET_GPU_NOT_PENDING) { + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_NOT_PENDING) { /* The GPU is being reset - so prevent submission */ return 0; } @@ -291,15 +284,13 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) * * Note: Caller must hold the HW access lock. */ -static inline void trace_atom_completion_for_gpu_metrics( - struct kbase_jd_atom *const katom, - ktime_t *end_timestamp) +static inline void trace_atom_completion_for_gpu_metrics(struct kbase_jd_atom *const katom, + ktime_t *end_timestamp) { #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) u64 complete_ns; struct kbase_context *kctx = katom->kctx; - struct kbase_jd_atom *queued = - kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1); + struct kbase_jd_atom *queued = kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1); #ifdef CONFIG_MALI_DEBUG WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0)); @@ -328,12 +319,14 @@ static inline void trace_atom_completion_for_gpu_metrics( kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns); if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns); +#else + CSTD_UNUSED(katom); + CSTD_UNUSED(end_timestamp); #endif } -static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - ktime_t *end_timestamp) +static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp) { struct kbase_context *kctx = katom->kctx; @@ -361,12 +354,10 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, kbase_pm_release_gpu_cycle_counter_nolock(kbdev); KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, - &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); + &kbdev->gpu_props.js_features[katom->slot_nr]); KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, - &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); + &kbdev->gpu_props.js_features[katom->slot_nr]); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ fallthrough; @@ -378,30 +369,23 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK) && - (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { + (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_CHECK) && + (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { kbase_pm_protected_override_disable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); } if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) kbase_pm_protected_entry_override_disable(kbdev); if (!kbase_jd_katom_is_protected(katom) && - (katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) && - (katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { + (katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) && + (katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { kbase_pm_protected_override_disable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); } - if (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK || - katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) + if (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means @@ -435,18 +419,15 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbdev->protected_mode_hwcnt_disabled = false; } } if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { - if (katom->atom_flags & - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + if (katom->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { kbase_pm_protected_l2_override(kbdev, false); - katom->atom_flags &= - ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } } @@ -466,14 +447,12 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; } -static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, - katom->kctx, katom, katom->jc, - katom->slot_nr, katom->event_code); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, katom->kctx, katom, + katom->jc, katom->slot_nr, katom->event_code); kbase_gpu_release_atom(kbdev, katom, NULL); katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; } @@ -494,8 +473,7 @@ static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) if (slot == js) continue; - if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, - KBASE_ATOM_GPU_RB_SUBMITTED)) + if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, KBASE_ATOM_GPU_RB_SUBMITTED)) return true; } @@ -527,16 +505,14 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, - "Cannot enter protected mode: protected callbacks not specified.\n"); + "Cannot enter protected mode: protected callbacks not specified.\n"); if (kbdev->protected_ops) { /* Switch GPU to protected mode */ - err = kbdev->protected_ops->protected_mode_enable( - kbdev->protected_dev); + err = kbdev->protected_ops->protected_mode_enable(kbdev->protected_dev); if (err) { - dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", - err); + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", err); } else { kbdev->protected_mode = true; kbase_ipa_protection_mode_switch_event(kbdev); @@ -551,7 +527,7 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, - "Cannot exit protected mode: protected callbacks not specified.\n"); + "Cannot exit protected mode: protected callbacks not specified.\n"); if (!kbdev->protected_ops) return -EINVAL; @@ -561,8 +537,8 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) return kbase_reset_gpu_silent(kbdev); } -static int kbase_jm_protected_entry(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_jm_protected_entry(struct kbase_device *kbdev, struct kbase_jd_atom **katom, + int idx, unsigned int js) { int err = 0; @@ -597,8 +573,7 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, * already removed - as atoms must be returned * in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } @@ -612,14 +587,13 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; return err; } -static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, + int idx, unsigned int js) { int err = 0; @@ -639,8 +613,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, */ WARN_ON(kbdev->protected_mode_hwcnt_disabled); - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_HWCNT; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_HWCNT; kbdev->protected_mode_transition = true; @@ -650,16 +623,14 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, /* See if we can get away with disabling hwcnt atomically */ kbdev->protected_mode_hwcnt_desired = false; if (!kbdev->protected_mode_hwcnt_disabled) { - if (kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)) + if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) kbdev->protected_mode_hwcnt_disabled = true; } /* We couldn't disable atomically, so kick off a worker */ if (!kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_queue_work( - kbdev->hwcnt_gpu_ctx, - &kbdev->protected_mode_hwcnt_disable_work); + kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, + &kbdev->protected_mode_hwcnt_disable_work); return -EAGAIN; } @@ -675,8 +646,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Entering protected mode requires us to power down the L2, * and drop out of fully coherent mode. */ - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; kbase_pm_protected_override_enable(kbdev); /* @@ -701,11 +671,9 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, return -EAGAIN; } - if (kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2) || - kbase_is_gpu_removed(kbdev)) { + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_is_gpu_removed(kbdev)) { /* * The L2 is still powered, wait for all * the users to finish with it before doing @@ -715,8 +683,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, } } - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -741,12 +708,10 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Set the flag on the atom that additional * L2 references are taken. */ - katom[idx]->atom_flags |= - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + katom[idx]->atom_flags |= KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_FINISHED; + katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) return -EAGAIN; @@ -764,10 +729,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * Remove additional L2 reference and reset * the atom flag which denotes it. */ - if (katom[idx]->atom_flags & - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { - kbase_pm_protected_l2_override(kbdev, - false); + if (katom[idx]->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, false); katom[idx]->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } @@ -793,8 +756,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, return 0; } -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, + int idx, unsigned int js) { int err = 0; @@ -815,8 +778,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, * needs to be powered down to ensure it's not active when the * reset is issued. */ - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; kbdev->protected_mode_transition = true; kbase_pm_protected_override_enable(kbdev); @@ -832,8 +794,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, */ return -EAGAIN; } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_RESET; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -860,8 +821,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, /* Only return if head atom or previous atom * already removed - as atoms must be returned in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || + katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } @@ -872,16 +833,14 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbdev->protected_mode_hwcnt_disabled = false; } return -EINVAL; } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -905,8 +864,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_reset_gpu_is_active(kbdev) || - kbase_is_gpu_removed(kbdev)) + if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_removed(kbdev)) #else if (kbase_reset_gpu_is_active(kbdev)) #endif @@ -946,15 +904,12 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: - if (kbase_gpu_check_secure_atoms(kbdev, - !kbase_jd_katom_is_protected( - katom[idx]))) + if (kbase_gpu_check_secure_atoms( + kbdev, !kbase_jd_katom_is_protected(katom[idx]))) break; - if ((idx == 1) && (kbase_jd_katom_is_protected( - katom[0]) != - kbase_jd_katom_is_protected( - katom[1]))) + if ((idx == 1) && (kbase_jd_katom_is_protected(katom[0]) != + kbase_jd_katom_is_protected(katom[1]))) break; if (kbdev->protected_mode_transition) @@ -976,22 +931,19 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) */ if (!kbase_gpu_in_protected_mode(kbdev) && - kbase_jd_katom_is_protected(katom[idx])) { + kbase_jd_katom_is_protected(katom[idx])) { /* Atom needs to transition into protected mode. */ - ret = kbase_jm_enter_protected_mode(kbdev, - katom, idx, js); + ret = kbase_jm_enter_protected_mode(kbdev, katom, idx, js); if (ret) break; } else if (kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx])) { + !kbase_jd_katom_is_protected(katom[idx])) { /* Atom needs to transition out of protected mode. */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); + ret = kbase_jm_exit_protected_mode(kbdev, katom, idx, js); if (ret) break; } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* Atom needs no protected mode transition. */ @@ -1002,33 +954,29 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) fallthrough; case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: if (katom[idx]->will_fail_event_code) { - kbase_gpu_mark_atom_for_return(kbdev, - katom[idx]); + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Set EVENT_DONE so this atom will be * completed, not unpulled. */ - katom[idx]->event_code = - BASE_JD_EVENT_DONE; + katom[idx]->event_code = BASE_JD_EVENT_DONE; /* Only return if head atom or previous * atom already removed - as atoms must * be returned in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } break; } - cores_ready = kbase_pm_cores_requested(kbdev, - true); + cores_ready = kbase_pm_cores_requested(kbdev, true); if (!cores_ready) break; - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; @@ -1047,26 +995,23 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) /* Only submit if head atom or previous * atom already submitted */ - if ((atom_0_gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED && - atom_0_gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + if ((atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + atom_0_gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) break; /* If intra-slot serialization in use * then don't submit atom to NEXT slot */ - if (kbdev->serialize_jobs & - KBASE_SERIALIZE_INTRA_SLOT) + if (kbdev->serialize_jobs & KBASE_SERIALIZE_INTRA_SLOT) break; } /* If inter-slot serialization in use then don't * submit atom if any other slots are in use */ - if ((kbdev->serialize_jobs & - KBASE_SERIALIZE_INTER_SLOT) && - other_slots_busy(kbdev, js)) + if ((kbdev->serialize_jobs & KBASE_SERIALIZE_INTER_SLOT) && + other_slots_busy(kbdev, js)) break; /* Check if this job needs the cycle counter @@ -1083,7 +1028,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) * metrics. */ kbase_pm_metrics_update(kbdev, - &katom[idx]->start_timestamp); + &katom[idx]->start_timestamp); /* Inform platform at start/finish of atom */ @@ -1112,11 +1057,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) * already removed - as atoms must be returned * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + if (idx == 0 || + katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, - katom[idx]); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); } break; } @@ -1124,9 +1068,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) } } - -void kbase_backend_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); @@ -1145,21 +1087,17 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, * * Return: true if @katom_b might depend on @katom_a, false if it cannot depend. */ -static inline bool -kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, - const struct kbase_jd_atom *katom_b) +static inline bool kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, + const struct kbase_jd_atom *katom_b) { if (katom_a->kctx != katom_b->kctx) return false; - return (katom_b->pre_dep || - (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | - KBASE_KATOM_FLAG_FAIL_BLOCKER))); + return (katom_b->pre_dep || (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | + KBASE_KATOM_FLAG_FAIL_BLOCKER))); } -static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - u32 action, - bool disjoint) +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + u32 action, bool disjoint) { struct kbase_context *kctx = katom->kctx; @@ -1167,17 +1105,15 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_gpu_mark_atom_for_return(kbdev, katom); - kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, - katom->sched_priority); + kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, katom->sched_priority); if (disjoint) - kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, - katom); + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); } /** - * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is - * related to a failed JSn_HEAD atom + * kbase_gpu_irq_evict - evict a slot's JS_HEAD_NEXT atom from the HW if it is + * related to a failed JS_HEAD atom * @kbdev: kbase device * @js: job slot to check * @completion_code: completion code of the failed atom @@ -1186,18 +1122,18 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, * unlike other failure codes we _can_ re-run them. * * This forms step 1 in a 2-step process of removing any related atoms from a - * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have - * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0). + * slot's JS_HEAD_NEXT (ringbuffer index 1), should there have + * been a 'failure' on an atom in JS_HEAD (ringbuffer index 0). * * This step only removes the atoms from the HW, and marks them as * (potentially) ready to run again. * - * Step 2 is on marking the JSn_HEAD atom as complete + * Step 2 is on marking the JS_HEAD atom as complete * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS * as appropriate, or re-submit them. * * Hence, this function must evict at a minimum the atoms related to the atom - * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable + * in JS_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as * the next kbase_backend_slot_update() will resubmit any remaining. * @@ -1217,25 +1153,22 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple } next_katom = kbase_gpu_inspect(kbdev, js, 1); - if (next_katom && - next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + if (next_katom && next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && (kbase_rb_atom_might_depend(katom, next_katom) || kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) && - (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_NOP); + kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(js, HEAD_NEXT)) != 0) { + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), JS_COMMAND_NOP); if (completion_code == BASE_JD_EVENT_STOPPED) { kbase_gpu_remove_atom(kbdev, next_katom, JS_COMMAND_SOFT_STOP, false); - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, - &kbdev->gpu_props.props.raw_props.js_features - [next_katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as - [next_katom->kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, - &kbdev->gpu_props.props.raw_props.js_features - [next_katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_LPU( + kbdev, next_katom, + &kbdev->gpu_props.js_features[next_katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, + &kbdev->as[next_katom->kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU( + kbdev, next_katom->kctx, + &kbdev->gpu_props.js_features[next_katom->slot_nr]); } else { next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -1258,24 +1191,24 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple } /** - * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD + * kbase_gpu_complete_hw - complete the atom in a slot's JS_HEAD * @kbdev: kbase device * @js: job slot to check * @completion_code: completion code of the completed atom - * @job_tail: value read from JSn_TAIL, for STOPPED atoms + * @job_tail: value read from JS_TAIL, for STOPPED atoms * @end_timestamp: pointer to approximate ktime value when the katom completed * * Among other operations, this also executes step 2 of a 2-step process of - * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), - * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index + * removing any related atoms from a slot's JS_HEAD_NEXT (ringbuffer index 1), + * should there have been a 'failure' on an atom in JS_HEAD (ringbuffer index * 0). The first step is done in kbase_gpu_irq_evict(). * * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but * unlike other failure codes we _can_ re-run them. * - * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue + * When the JS_HEAD atom is considered to be 'failed', then this will dequeue * and return to the JS some (usually all) of the atoms evicted from the HW - * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an + * during the kbase_gpu_irq_evict() for that JS_HEAD atom. If it dequeues an * atom, that atom must not have been running or must already be evicted, as * otherwise we would be in the incorrect state of having an atom both running * on the HW and returned to the JS. @@ -1294,8 +1227,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp kctx = katom->kctx; - dev_dbg(kbdev->dev, - "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + dev_dbg(kbdev->dev, "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", (void *)katom, completion_code, job_tail, js); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1306,15 +1238,13 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { if (completion_code == BASE_JD_EVENT_STOPPED && - (katom->atom_flags & - KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { + (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { completion_code = BASE_JD_EVENT_TERMINATED; } } if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && - completion_code != BASE_JD_EVENT_DONE && - !(completion_code & BASE_JD_SW_EVENT)) { + completion_code != BASE_JD_EVENT_DONE && !(completion_code & BASE_JD_SW_EVENT)) { /* When a job chain fails, on a T60x or when * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not * flushed. To prevent future evictions causing possible memory @@ -1327,8 +1257,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); if (completion_code == BASE_JD_EVENT_STOPPED) { - struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, - 0); + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, 0); /* * Dequeue next atom from ringbuffers on same slot if required. @@ -1336,10 +1265,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that * the atoms on this slot are returned in the correct order. */ - if (next_katom && - kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { - WARN_ON(next_katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED); + if (next_katom && kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { + WARN_ON(next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED); kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); kbase_jm_return_atom_to_js(kbdev, next_katom); } @@ -1347,11 +1274,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned int i; - if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { + if (!kbase_ctx_flag(katom->kctx, KCTX_DYING) && + !kbase_ctx_flag(katom->kctx, KCTX_PAGE_FAULT_REPORT_SKIP)) { dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", - js, completion_code, - kbase_gpu_exception_name( - completion_code)); + js, completion_code, kbase_gpu_exception_name(completion_code)); } @@ -1367,66 +1293,53 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp * atom. */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { - struct kbase_jd_atom *katom_idx0 = - kbase_gpu_inspect(kbdev, i, 0); - struct kbase_jd_atom *katom_idx1 = - kbase_gpu_inspect(kbdev, i, 1); - - if (katom_idx0 && - kbase_rb_atom_might_depend(katom, katom_idx0) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + struct kbase_jd_atom *katom_idx0 = kbase_gpu_inspect(kbdev, i, 0); + struct kbase_jd_atom *katom_idx1 = kbase_gpu_inspect(kbdev, i, 1); + + if (katom_idx0 && kbase_rb_atom_might_depend(katom, katom_idx0) && + katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx0 from ringbuffer */ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); - if (katom_idx1 && kbase_rb_atom_might_depend( - katom, katom_idx1) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx1 && kbase_rb_atom_might_depend(katom, katom_idx1) && + katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx1 from ringbuffer */ - kbase_gpu_dequeue_atom(kbdev, i, - end_timestamp); + kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); - katom_idx1->event_code = - BASE_JD_EVENT_STOPPED; - kbase_jm_return_atom_to_js(kbdev, - katom_idx1); + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, katom_idx1); } katom_idx0->event_code = BASE_JD_EVENT_STOPPED; kbase_jm_return_atom_to_js(kbdev, katom_idx0); - } else if (katom_idx1 && kbase_rb_atom_might_depend( - katom, katom_idx1) && - katom_idx1->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { + } else if (katom_idx1 && kbase_rb_atom_might_depend(katom, katom_idx1) && + katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Can not dequeue this atom yet - will be * dequeued when atom at idx0 completes */ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; - kbase_gpu_mark_atom_for_return(kbdev, - katom_idx1); + kbase_gpu_mark_atom_for_return(kbdev, katom_idx1); } } } - KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, + completion_code); if (job_tail != 0 && job_tail != katom->jc) { /* Some of the job has been executed */ - dev_dbg(kbdev->dev, - "Update job chain address of atom %pK to resume from 0x%llx\n", + dev_dbg(kbdev->dev, "Update job chain address of atom %pK to resume from 0x%llx\n", (void *)katom, job_tail); katom->jc = job_tail; - KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, - katom, job_tail, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, katom, job_tail, js); } /* Only update the event code for jobs that weren't cancelled */ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) katom->event_code = (enum base_jd_event_code)completion_code; - /* Complete the job, and start new ones + /* Complete the job, and start new ones * * Also defer remaining work onto the workqueue: * - Re-queue Soft-stopped jobs @@ -1437,19 +1350,15 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) { /* The atom in the HEAD */ - struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, - 0); + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, 0); - if (next_katom && next_katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (next_katom && next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { char js_string[16]; - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, - js_string, - sizeof(js_string)), - ktime_to_ns(*end_timestamp), - (u32)next_katom->kctx->id, 0, - next_katom->work_id); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(*end_timestamp), + (u32)next_katom->kctx->id, 0, next_katom->work_id); } else { char js_string[16]; @@ -1469,14 +1378,13 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp katom = kbase_jm_complete(kbdev, katom, end_timestamp); if (katom) { - dev_dbg(kbdev->dev, - "Cross-slot dependency %pK has become runnable.\n", + dev_dbg(kbdev->dev, "Cross-slot dependency %pK has become runnable.\n", (void *)katom); /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); - kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + kbase_jm_try_kick(kbdev, 1UL << katom->slot_nr); } /* For partial shader core off L2 cache flush */ @@ -1502,14 +1410,12 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) int idx; for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, atom_idx); + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, atom_idx); bool keep_in_jm_rb = false; if (!katom) break; - if (katom->protected_state.exit == - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { + if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { /* protected mode sanity checks */ WARN(kbase_jd_katom_is_protected(katom) != kbase_gpu_in_protected_mode(kbdev), @@ -1573,9 +1479,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * should_stop_next_atom - given a soft/hard stop action, determine if the next * atom on a slot should be stopped * @kbdev: kbase devices - * @head_katom: atom currently in the JSn_HEAD - * @next_katom: atom currently in the JSn_HEAD_NEXT - * @action: JS_COMMAND_<...> action for soft/hard-stop + * @head_katom: atom currently in the JS_HEAD + * @next_katom: atom currently in the JS_HEAD_NEXT + * @action: COMMAND_<...> action for soft/hard-stop * * This is used in cases where @head_katom is the target of the soft/hard-stop. * It only makes sense to call this when @head_katom and @next_katom are from @@ -1586,16 +1492,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) */ static bool should_stop_next_atom(struct kbase_device *kbdev, const struct kbase_jd_atom *head_katom, - const struct kbase_jd_atom *next_katom, - u32 action) + const struct kbase_jd_atom *next_katom, u32 action) { bool ret = false; u32 hw_action = action & JS_COMMAND_MASK; switch (hw_action) { case JS_COMMAND_SOFT_STOP: - ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, - 0u); + ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, 0u); break; case JS_COMMAND_HARD_STOP: /* Unlike soft-stop, a hard-stop targeting a particular atom @@ -1621,8 +1525,7 @@ static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int u32 hw_action = action & JS_COMMAND_MASK; kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); - kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, - katom->core_req, katom); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, katom->core_req, katom); kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); } @@ -1631,11 +1534,9 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) if (katom->x_post_dep) { struct kbase_jd_atom *dep_atom = katom->x_post_dep; - if (dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && - dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_RETURN_TO_JS) - return dep_atom->slot_nr; + if (dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && + dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_RETURN_TO_JS) + return (int)dep_atom->slot_nr; } return -1; } @@ -1679,13 +1580,12 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx)); katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx)); } - /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided - * to stop, but we're stopping the JSn_HEAD atom, see if they are + /* If there's an atom in JS_HEAD_NEXT that we haven't already decided + * to stop, but we're stopping the JS_HEAD atom, see if they are * related/ordered in some way that would require the same stop action */ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1) - katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, - katom_idx1, action); + katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, katom_idx1, action); if (katom_idx0_valid) stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); @@ -1698,78 +1598,59 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ kbase_gpu_dequeue_atom(kbdev, js, NULL); if (katom_idx1_valid) { kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom_idx1->event_code = - BASE_JD_EVENT_REMOVED_FROM_NEXT; + katom_idx1->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx1); - kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, - prio_idx1); + kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, prio_idx1); } - katom_idx0->event_code = - BASE_JD_EVENT_REMOVED_FROM_NEXT; + katom_idx0->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx0); - kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, - prio_idx0); + kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, prio_idx0); } else { /* katom_idx0 is on GPU */ - if (katom_idx1_valid && katom_idx1->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom_idx1_valid && + katom_idx1->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { /* katom_idx0 and katom_idx1 are on GPU */ - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT)) == 0) { + if (kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT)) == + 0) { /* idx0 has already completed - stop * idx1 if needed */ if (katom_idx1_valid) { - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } } else { /* idx1 is in NEXT registers - attempt * to remove */ - kbase_reg_write(kbdev, - JOB_SLOT_REG(js, - JS_COMMAND_NEXT), - JS_COMMAND_NOP); - - if (kbase_reg_read(kbdev, - JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO)) - != 0 || - kbase_reg_read(kbdev, - JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI)) - != 0) { + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), + JS_COMMAND_NOP); + + if (kbase_reg_read64(kbdev, + JOB_SLOT_OFFSET(js, HEAD_NEXT)) != 0) { /* idx1 removed successfully, * will be handled in IRQ */ - kbase_gpu_remove_atom(kbdev, - katom_idx1, - action, true); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + true); /* Revert the last_context. */ kbdev->hwaccess.backend.slot_rb[js] .last_kctx_tagged = SLOT_RB_TAG_KCTX(katom_idx0->kctx); stop_x_dep_idx1 = - should_stop_x_dep_slot(katom_idx1); + should_stop_x_dep_slot(katom_idx1); /* stop idx0 if still on GPU */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx0, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; } else if (katom_idx1_valid) { /* idx0 has already completed, * stop idx1 if needed */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } } @@ -1777,8 +1658,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ /* idx1 not on GPU but must be dequeued*/ /* idx1 will be handled in IRQ */ - kbase_gpu_remove_atom(kbdev, katom_idx1, action, - false); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); /* stop idx0 */ /* This will be repeated for anything removed * from the next registers, since their normal @@ -1787,14 +1667,12 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ * don't actually do a hard stop on the head * atom */ - kbase_gpu_stop_atom(kbdev, js, katom_idx0, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; } else { /* no atom in idx1 */ /* just stop idx0 */ - kbase_gpu_stop_atom(kbdev, js, katom_idx0, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; } } @@ -1802,77 +1680,62 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Mark for return */ /* idx1 will be returned once idx0 completes */ - kbase_gpu_remove_atom(kbdev, katom_idx1, action, - false); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); } else { /* idx1 is on GPU */ - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT)) == 0) { + if (kbase_reg_read32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT)) == 0) { /* idx0 has already completed - stop idx1 */ - kbase_gpu_stop_atom(kbdev, js, katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } else { /* idx1 is in NEXT registers - attempt to * remove */ - kbase_reg_write(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT), - JS_COMMAND_NOP); - - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO)) != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI)) != 0) { + kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, COMMAND_NEXT), + JS_COMMAND_NOP); + + if (kbase_reg_read64(kbdev, JOB_SLOT_OFFSET(js, HEAD_NEXT)) != 0) { /* idx1 removed successfully, will be * handled in IRQ once idx0 completes */ - kbase_gpu_remove_atom(kbdev, katom_idx1, - action, - false); + kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); /* Revert the last_context, or mark as purged */ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) : - SLOT_RB_TAG_PURGED; + SLOT_RB_TAG_PURGED; } else { /* idx0 has already completed - stop * idx1 */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); + kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); ret = true; } } } } - if (stop_x_dep_idx0 != -1) - kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, - NULL, action); + kbase_backend_soft_hard_stop_slot(kbdev, kctx, (unsigned int)stop_x_dep_idx0, NULL, + action); if (stop_x_dep_idx1 != -1) - kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, - NULL, action); + kbase_backend_soft_hard_stop_slot(kbdev, kctx, (unsigned int)stop_x_dep_idx1, NULL, + action); return ret; } -void kbase_backend_cache_clean(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->need_cache_flush_cores_retained) { - kbase_gpu_start_cache_clean(kbdev, - GPU_COMMAND_CACHE_CLN_INV_FULL); + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_FULL); kbase_gpu_wait_cache_clean(kbdev); katom->need_cache_flush_cores_retained = false; } } -void kbase_backend_complete_wq(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +void kbase_backend_complete_wq(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { /* * If cache flush required due to HW workaround then perform the flush @@ -1881,9 +1744,10 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, kbase_backend_cache_clean(kbdev, katom); } -void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req) +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req) { + CSTD_UNUSED(core_req); + if (!kbdev->pm.active_count) { kbase_pm_lock(kbdev); kbase_pm_update_active(kbdev); @@ -1904,9 +1768,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) int idx; for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, - idx); + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, idx); if (katom) dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index ff4e114..2afc06a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -30,7 +30,6 @@ #include <backend/gpu/mali_kbase_js_internal.h> #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) #include <mali_kbase_gpu_metrics.h> - #endif /* @@ -45,7 +44,7 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_ /* Debug support for allowing soft-stop on a single context */ return true; } -#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_MALI_DEBUG */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { /* Timeouts would have to be 4x longer (due to micro- @@ -59,19 +58,16 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_ * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). */ { - int nr_compute_ctxs = - kbasep_js_ctx_attr_count_on_runpool(kbdev, - KBASEP_JS_CTX_ATTR_COMPUTE); - int nr_noncompute_ctxs = nr_running_ctxs - - nr_compute_ctxs; - - return (bool) (nr_compute_ctxs >= 2 || - nr_noncompute_ctxs > 0); + int nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool( + kbdev, KBASEP_JS_CTX_ATTR_COMPUTE); + int nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs; + + return (bool)(nr_compute_ctxs >= 2 || nr_noncompute_ctxs > 0); } } else { /* Run the timer callback whenever you have at least 1 context */ - return (bool) (nr_running_ctxs > 0); + return (bool)(nr_running_ctxs > 0); } } @@ -86,8 +82,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) KBASE_DEBUG_ASSERT(timer != NULL); - backend = container_of(timer, struct kbase_backend_data, - scheduling_timer); + backend = container_of(timer, struct kbase_backend_data, scheduling_timer); kbdev = container_of(backend, struct kbase_device, hwaccess.backend); js_devdata = &kbdev->js_data; @@ -109,22 +104,15 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) u32 ticks = atom->ticks++; #if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) - u32 soft_stop_ticks, hard_stop_ticks, - gpu_reset_ticks; + u32 soft_stop_ticks, hard_stop_ticks, gpu_reset_ticks; if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - soft_stop_ticks = - js_devdata->soft_stop_ticks_cl; - hard_stop_ticks = - js_devdata->hard_stop_ticks_cl; - gpu_reset_ticks = - js_devdata->gpu_reset_ticks_cl; + soft_stop_ticks = js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = js_devdata->gpu_reset_ticks_cl; } else { - soft_stop_ticks = - js_devdata->soft_stop_ticks; - hard_stop_ticks = - js_devdata->hard_stop_ticks_ss; - gpu_reset_ticks = - js_devdata->gpu_reset_ticks_ss; + soft_stop_ticks = js_devdata->soft_stop_ticks; + hard_stop_ticks = js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = js_devdata->gpu_reset_ticks_ss; } /* If timeouts have been changed then ensure @@ -134,8 +122,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * races between this worker and the thread * changing the timeouts. */ - if (backend->timeouts_updated && - ticks > soft_stop_ticks) + if (backend->timeouts_updated && ticks > soft_stop_ticks) ticks = atom->ticks = soft_stop_ticks; /* Job is Soft-Stoppable */ @@ -147,7 +134,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) */ #if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS int disjoint_threshold = - KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; u32 softstop_flags = 0u; dev_dbg(kbdev->dev, "Soft-stop"); @@ -168,13 +155,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * older value and register a disjoint * event when we try soft-stopping */ - if (js_devdata->nr_user_contexts_running - >= disjoint_threshold) - softstop_flags |= - JS_COMMAND_SW_CAUSES_DISJOINT; + if (js_devdata->nr_user_contexts_running >= + disjoint_threshold) + softstop_flags |= JS_COMMAND_SW_CAUSES_DISJOINT; - kbase_job_slot_softstop_swflags(kbdev, - s, atom, softstop_flags); + kbase_job_slot_softstop_swflags(kbdev, s, atom, + softstop_flags); #endif } else if (ticks == hard_stop_ticks) { /* Job has been scheduled for at least @@ -183,14 +169,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * now. Hard stop the slot. */ #if !KBASE_DISABLE_SCHEDULING_HARD_STOPS - int ms = - js_devdata->scheduling_period_ns - / 1000000u; - dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", - (unsigned long)ticks, - (unsigned long)ms); - kbase_job_slot_hardstop(atom->kctx, s, - atom); + u32 ms = js_devdata->scheduling_period_ns / 1000000u; + dev_warn( + kbdev->dev, + "JS: Job Hard-Stopped (took more than %u ticks at %u ms/tick)", + ticks, ms); + kbase_job_slot_hardstop(atom->kctx, s, atom); #endif } else if (ticks == gpu_reset_ticks) { /* Job has been scheduled for at least @@ -201,7 +185,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) */ reset_needed = true; } -#else /* !CONFIG_MALI_JOB_DUMP */ +#else /* !CONFIG_MALI_JOB_DUMP */ /* NOTE: During CONFIG_MALI_JOB_DUMP, we use * the alternate timeouts, which makes the hard- * stop and GPU reset timeout much longer. We @@ -214,24 +198,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * CONFIG_MALI_JOB_DUMP, however. */ dev_dbg(kbdev->dev, "Soft-stop"); - } else if (ticks == - js_devdata->hard_stop_ticks_dumping) { + } else if (ticks == js_devdata->hard_stop_ticks_dumping) { /* Job has been scheduled for at least * js_devdata->hard_stop_ticks_dumping * ticks. Hard stop the slot. */ #if !KBASE_DISABLE_SCHEDULING_HARD_STOPS - int ms = - js_devdata->scheduling_period_ns - / 1000000u; - dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", - (unsigned long)ticks, - (unsigned long)ms); - kbase_job_slot_hardstop(atom->kctx, s, - atom); + u32 ms = js_devdata->scheduling_period_ns / 1000000u; + dev_warn( + kbdev->dev, + "JS: Job Hard-Stopped (took more than %u ticks at %u ms/tick)", + ticks, ms); + kbase_job_slot_hardstop(atom->kctx, s, atom); #endif - } else if (ticks == - js_devdata->gpu_reset_ticks_dumping) { + } else if (ticks == js_devdata->gpu_reset_ticks_dumping) { /* Job has been scheduled for at least * js_devdata->gpu_reset_ticks_dumping * ticks. It should have left the GPU by @@ -240,12 +220,13 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) */ reset_needed = true; } -#endif /* !CONFIG_MALI_JOB_DUMP */ +#endif /* !CONFIG_MALI_JOB_DUMP */ } } } if (reset_needed) { - dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); + dev_err(kbdev->dev, + "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); @@ -254,8 +235,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) if (backend->timer_running) hrtimer_start(&backend->scheduling_timer, - HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); backend->timeouts_updated = false; @@ -271,8 +252,7 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) unsigned long flags; /* Timer must stop if we are suspending */ const bool suspend_timer = backend->suspend_timer; - const int nr_running_ctxs = - atomic_read(&kbdev->js_data.nr_contexts_runnable); + const int nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); lockdep_assert_held(&js_devdata->runpool_mutex); @@ -297,8 +277,8 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) backend->timer_running = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_start(&backend->scheduling_timer, - HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), - HRTIMER_MODE_REL); + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } @@ -329,7 +309,7 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) /* No need to restart the timer if it is already running. */ if (!js_devdata->gpu_metrics_timer_running) { hrtimer_start(&js_devdata->gpu_metrics_timer, - HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_emit_interval()), + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()), HRTIMER_MODE_REL); js_devdata->gpu_metrics_timer_running = true; } @@ -342,8 +322,7 @@ int kbase_backend_timer_init(struct kbase_device *kbdev) { struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; backend->timer_running = false; diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c index 6eedc00..b0dcf67 100644 --- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c @@ -62,43 +62,30 @@ struct l2_mmu_config_limit { */ static const struct l2_mmu_config_limit limits[] = { /* GPU, read, write */ - {GPU_ID2_PRODUCT_LBEX, - {0, GENMASK(10, 5), 5}, - {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TBEX, - {0, GENMASK(10, 5), 5}, - {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TBAX, - {0, GENMASK(10, 5), 5}, - {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TTRX, - {0, GENMASK(12, 7), 7}, - {0, GENMASK(17, 13), 13} }, - {GPU_ID2_PRODUCT_TNAX, - {0, GENMASK(12, 7), 7}, - {0, GENMASK(17, 13), 13} }, - {GPU_ID2_PRODUCT_TGOX, - {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, - {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, - {GPU_ID2_PRODUCT_TNOX, - {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, - {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + { GPU_ID_PRODUCT_LBEX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } }, + { GPU_ID_PRODUCT_TBEX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } }, + { GPU_ID_PRODUCT_TBAX, { 0, GENMASK(10, 5), 5 }, { 0, GENMASK(16, 12), 12 } }, + { GPU_ID_PRODUCT_TTRX, { 0, GENMASK(12, 7), 7 }, { 0, GENMASK(17, 13), 13 } }, + { GPU_ID_PRODUCT_TNAX, { 0, GENMASK(12, 7), 7 }, { 0, GENMASK(17, 13), 13 } }, + { GPU_ID_PRODUCT_TGOX, + { KBASE_3BIT_AID_32, GENMASK(14, 12), 12 }, + { KBASE_3BIT_AID_32, GENMASK(17, 15), 15 } }, + { GPU_ID_PRODUCT_TNOX, + { KBASE_3BIT_AID_32, GENMASK(14, 12), 12 }, + { KBASE_3BIT_AID_32, GENMASK(17, 15), 15 } }, }; int kbase_set_mmu_quirks(struct kbase_device *kbdev) { /* All older GPUs had 2 bits for both fields, this is a default */ - struct l2_mmu_config_limit limit = { - 0, /* Any GPU not in the limits array defined above */ - {KBASE_AID_32, GENMASK(25, 24), 24}, - {KBASE_AID_32, GENMASK(27, 26), 26} - }; - u32 product_model, gpu_id; - u32 mmu_config; - int i; + struct l2_mmu_config_limit limit = { 0, /* Any GPU not in the limits array defined above */ + { KBASE_AID_32, GENMASK(25, 24), 24 }, + { KBASE_AID_32, GENMASK(27, 26), 26 } }; + u32 product_model; + u32 mmu_config = 0; + unsigned int i; - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + product_model = kbdev->gpu_props.gpu_id.product_model; /* Limit the GPU bus bandwidth if the platform needs this. */ for (i = 0; i < ARRAY_SIZE(limits); i++) { @@ -108,7 +95,8 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev) } } - mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG))) + mmu_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG)); if (kbase_is_gpu_removed(kbdev)) return -EIO; @@ -116,7 +104,7 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev) mmu_config &= ~(limit.read.mask | limit.write.mask); /* Can't use FIELD_PREP() macro here as the mask isn't constant */ mmu_config |= (limit.read.value << limit.read.shift) | - (limit.write.value << limit.write.shift); + (limit.write.value << limit.write.shift); kbdev->hw_quirks_mmu = mmu_config; diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 46bcdc7..c340760 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -63,10 +63,13 @@ */ #include <mali_kbase.h> #include <device/mali_kbase_device.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap_legacy.h> #include <backend/gpu/mali_kbase_model_linux.h> #include <mali_kbase_mem_linux.h> +#include <asm/arch_timer.h> + #if MALI_USE_CSF #include <csf/mali_kbase_csf_firmware.h> @@ -77,23 +80,25 @@ /* Array for storing the value of SELECT register for each type of core */ static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM]; -static bool ipa_control_timer_enabled; +static u32 ipa_control_timer_enabled; #endif -#define LO_MASK(M) ((M) & 0xFFFFFFFF) -#if !MALI_USE_CSF -#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) +#if MALI_USE_CSF +static u32 sysc_alloc_regs[SYSC_ALLOC_COUNT]; #endif +#define LO_MASK(M) ((M)&0xFFFFFFFF) +#define HI_MASK(M) ((M)&0xFFFFFFFF00000000) + /* Construct a value for the THREAD_FEATURES register, *except* the two most - * significant bits, which are set to IMPLEMENTATION_MODEL in + * significant bits, which are set to THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE in * midgard_model_read_reg(). */ #if MALI_USE_CSF -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) #else -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) #endif @@ -109,7 +114,7 @@ struct error_status_t hw_error_status; * @thread_max_barrier_size: Maximum number of threads per barrier * @thread_features: Thread features, NOT INCLUDING the 2 * most-significant bits, which are always set to - * IMPLEMENTATION_MODEL. + * THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE. * @core_features: Core features * @tiler_features: Tiler features * @mmu_features: MMU features @@ -139,33 +144,40 @@ struct control_reg_values_t { struct job_slot { int job_active; int job_queued; - int job_complete_irq_asserted; - int job_irq_mask; + u32 job_complete_irq_asserted; + u32 job_irq_mask; int job_disabled; }; +enum pwr_on_index { + INDEX_L2, + INDEX_TILER, + INDEX_SHADER, + INDEX_STACK, + INDEX_DOMAIN_COUNT +}; + struct dummy_model_t { int reset_completed; int reset_completed_mask; #if !MALI_USE_CSF int prfcnt_sample_completed; #endif /* !MALI_USE_CSF */ - int power_changed_mask; /* 2bits: _ALL,_SINGLE */ - int power_changed; /* 1bit */ + int power_changed_mask; /* 2 bits: _ALL,_SINGLE */ + int power_changed; /* 1 bit */ bool clean_caches_completed; bool clean_caches_completed_irq_enabled; #if MALI_USE_CSF bool flush_pa_range_completed; bool flush_pa_range_completed_irq_enabled; #endif - int power_on; /* 6bits: SHADER[4],TILER,L2 */ - u32 stack_power_on_lo; + uint32_t domain_power_on[INDEX_DOMAIN_COUNT]; u32 coherency_enable; unsigned int job_irq_js_state; struct job_slot slots[NUM_SLOTS]; const struct control_reg_values_t *control_reg_values; u32 l2_config; - void *data; + struct kbase_device *kbdev; }; /* Array associating GPU names with control register values. The first @@ -399,7 +411,7 @@ static const struct control_reg_values_t all_control_reg_values[] = { .gpu_features_lo = 0xf, .gpu_features_hi = 0, .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, - .stack_present = 0xF, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tTIx", @@ -415,7 +427,23 @@ static const struct control_reg_values_t all_control_reg_values[] = { .gpu_features_lo = 0xf, .gpu_features_hi = 0, .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, - .stack_present = 0xF, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tKRx", + .gpu_id = GPU_ID2_MAKE(13, 8, 1, 0, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x800, + .thread_max_workgroup_size = 0x400, + .thread_max_barrier_size = 0x400, + .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0), + .core_features = 0x1, /* core_1e64fma4tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0xf, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TKRX, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, }; @@ -439,12 +467,21 @@ static struct { u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; #endif /* !MALI_USE_CSF */ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; } performance_counters; +/** + * get_implementation_register - Returns the value of the register + * + * @reg: Register address + * @control_reg_values: Struct containing the implementations of the registers + * + * Registers of the dummy model are implemented in the control_reg_values_t struct + * We are only concerned with the lower 32 bits in the dummy model + * + * Return: value of the register for the current control_reg_values_t + */ static u32 get_implementation_register(u32 reg, const struct control_reg_values_t *const control_reg_values) { @@ -457,46 +494,42 @@ static u32 get_implementation_register(u32 reg, return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); case GPU_CONTROL_REG(STACK_PRESENT_LO): return LO_MASK(control_reg_values->stack_present); - - case GPU_CONTROL_REG(SHADER_PRESENT_HI): - case GPU_CONTROL_REG(TILER_PRESENT_HI): - case GPU_CONTROL_REG(L2_PRESENT_HI): - case GPU_CONTROL_REG(STACK_PRESENT_HI): - /* *** FALLTHROUGH *** */ default: return 0; } } + void gpu_device_set_data(void *model, void *data) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; - dummy->data = data; + dummy->kbdev = data; } void *gpu_device_get_data(void *model) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; - return dummy->data; + return dummy->kbdev; } -#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 +#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1u static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; module_param(no_mali_gpu, charp, 0000); MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); #if MALI_USE_CSF -static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, - u32 cnt_idx, bool is_low_word) +static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx, + bool is_low_word) { u64 *counters_data; u32 core_count = 0; u32 event_index; u64 value = 0; u32 core; + u32 num_cores = 1; unsigned long flags; if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) @@ -505,12 +538,13 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)) return 0; - event_index = - (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; + event_index = (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; - /* Currently only primary counter blocks are supported */ - if (WARN_ON(event_index >= - (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) + if (core_type == KBASE_IPA_CORE_TYPE_SHADER) + num_cores = KBASE_DUMMY_MODEL_MAX_SHADER_CORES; + + if (WARN_ON(event_index >= (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + + KBASE_DUMMY_MODEL_COUNTER_PER_CORE * num_cores))) return 0; /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, @@ -618,11 +652,10 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_ for (block_idx = 0; block_idx < block_count; block_idx++) { /* only dump values if core is present */ - if (!(blocks_present & (1 << block_idx))) { + if (!(blocks_present & (1U << block_idx))) { #if MALI_USE_CSF /* if CSF dump zeroed out block */ - memset(&prfcnt_base[*out_index], 0, - KBASE_DUMMY_MODEL_BLOCK_SIZE); + memset(&prfcnt_base[*out_index], 0, KBASE_DUMMY_MODEL_BLOCK_SIZE); *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK; #endif /* MALI_USE_CSF */ continue; @@ -630,25 +663,22 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_ /* write the header */ prfcnt_base[*out_index] = performance_counters.time++; - prfcnt_base[*out_index+2] = prfcnt_enable_mask; + prfcnt_base[*out_index + 2] = prfcnt_enable_mask; *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; /* write the counters */ - for (counter = 0; - counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; - counter++) { + for (counter = 0; counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; counter++) { /* HW counter values retrieved through * PRFCNT_SAMPLE request are of 32 bits only. */ counter_value = (u32)values[index++]; if (KBASE_DUMMY_MODEL_COUNTER_ENABLED( - prfcnt_enable_mask, (counter + - KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { - prfcnt_base[*out_index + counter] = - counter_value; + prfcnt_enable_mask, + (counter + KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { + prfcnt_base[*out_index + counter] = counter_value; } } - *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; + *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; } } @@ -665,16 +695,15 @@ static void gpu_model_dump_nolock(void) gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, performance_counters.prfcnt_en.fe, 0x1); #endif /* !MALI_USE_CSF */ - gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, - &index, 1, + gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, &index, 1, performance_counters.prfcnt_en.tiler, DUMMY_IMPLEMENTATION_TILER_PRESENT); gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, performance_counters.prfcnt_en.l2, performance_counters.l2_present); - gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, - &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES, + gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, &index, + KBASE_DUMMY_MODEL_MAX_SHADER_CORES, performance_counters.prfcnt_en.shader, performance_counters.shader_present); @@ -685,6 +714,11 @@ static void gpu_model_dump_nolock(void) performance_counters.time += 10; } +static void gpu_model_raise_irq(void *model, u32 irq) +{ + gpu_device_raise_irq(model, irq); +} + #if !MALI_USE_CSF static void midgard_model_dump_prfcnt(void) { @@ -716,7 +750,7 @@ void gpu_model_glb_request_job_irq(void *model) spin_lock_irqsave(&hw_error_status.access_lock, flags); hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ); + gpu_model_raise_irq(model, MODEL_LINUX_JOB_IRQ); } #endif /* !MALI_USE_CSF */ @@ -734,21 +768,19 @@ static void init_register_statuses(struct dummy_model_t *dummy) for (i = 0; i < NUM_SLOTS; i++) { hw_error_status.js_status[i] = 0; - hw_error_status.job_irq_rawstat |= - (dummy->slots[i].job_complete_irq_asserted) << i; - hw_error_status.job_irq_status |= - (dummy->slots[i].job_complete_irq_asserted) << i; + hw_error_status.job_irq_rawstat |= (dummy->slots[i].job_complete_irq_asserted) << i; + hw_error_status.job_irq_status |= (dummy->slots[i].job_complete_irq_asserted) << i; } for (i = 0; i < NUM_MMU_AS; i++) { hw_error_status.as_command[i] = 0; hw_error_status.as_faultstatus[i] = 0; - hw_error_status.mmu_irq_mask |= 1 << i; + hw_error_status.mmu_irq_mask |= (1u << i); } performance_counters.time = 0; } -static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot) +static void update_register_statuses(struct dummy_model_t *dummy, u32 job_slot) { lockdep_assert_held(&hw_error_status.access_lock); @@ -758,21 +790,17 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j if (hw_error_status.js_status[job_slot] == 0) { /* status reg is clean; it can be written */ - switch (hw_error_status.errors_mask & - IS_A_JOB_ERROR) { + switch (hw_error_status.errors_mask & IS_A_JOB_ERROR) { case KBASE_JOB_INTERRUPTED: - hw_error_status.js_status[job_slot] = - JS_STATUS_INTERRUPTED; + hw_error_status.js_status[job_slot] = JS_STATUS_INTERRUPTED; break; case KBASE_JOB_STOPPED: - hw_error_status.js_status[job_slot] = - JS_STATUS_STOPPED; + hw_error_status.js_status[job_slot] = JS_STATUS_STOPPED; break; case KBASE_JOB_TERMINATED: - hw_error_status.js_status[job_slot] = - JS_STATUS_TERMINATED; + hw_error_status.js_status[job_slot] = JS_STATUS_TERMINATED; break; case KBASE_JOB_CONFIG_FAULT: @@ -781,18 +809,15 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_JOB_POWER_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_POWER_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_POWER_FAULT; break; case KBASE_JOB_READ_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_READ_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_READ_FAULT; break; case KBASE_JOB_WRITE_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_WRITE_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_WRITE_FAULT; break; case KBASE_JOB_AFFINITY_FAULT: @@ -801,8 +826,7 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_JOB_BUS_FAULT: - hw_error_status.js_status[job_slot] = - JS_STATUS_BUS_FAULT; + hw_error_status.js_status[job_slot] = JS_STATUS_BUS_FAULT; break; case KBASE_INSTR_INVALID_PC: @@ -861,14 +885,13 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j break; case KBASE_UNKNOWN: - hw_error_status.js_status[job_slot] = - JS_STATUS_UNKNOWN; + hw_error_status.js_status[job_slot] = JS_STATUS_UNKNOWN; break; default: model_error_log(KBASE_CORE, - "\nAtom Chain 0x%llx: Invalid Error Mask!", - hw_error_status.current_jc); + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); break; } } @@ -876,60 +899,50 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j /* we set JOB_FAIL_<n> */ hw_error_status.job_irq_rawstat |= - (dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot + 16); + (dummy->slots[job_slot].job_complete_irq_asserted) + << (job_slot + 16); hw_error_status.job_irq_status |= - (((dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot)) & - (dummy->slots[job_slot].job_irq_mask << - job_slot)) << 16; + (((dummy->slots[job_slot].job_complete_irq_asserted) + << (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << job_slot)) + << 16; } else { hw_error_status.job_irq_rawstat |= - (dummy->slots[job_slot].job_complete_irq_asserted) << - job_slot; + (dummy->slots[job_slot].job_complete_irq_asserted) << job_slot; hw_error_status.job_irq_status |= - ((dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot)) & - (dummy->slots[job_slot].job_irq_mask << - job_slot); + ((dummy->slots[job_slot].job_complete_irq_asserted) << (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << job_slot); } } else { hw_error_status.job_irq_rawstat |= - (dummy->slots[job_slot].job_complete_irq_asserted) << - job_slot; + (dummy->slots[job_slot].job_complete_irq_asserted) << job_slot; hw_error_status.job_irq_status |= - ((dummy->slots[job_slot].job_complete_irq_asserted) << - (job_slot)) & + ((dummy->slots[job_slot].job_complete_irq_asserted) << (job_slot)) & (dummy->slots[job_slot].job_irq_mask << job_slot); - } /* end of job register statuses */ + } /* end of job register statuses */ if (hw_error_status.errors_mask & IS_A_MMU_ERROR) { - int i; + u32 i; for (i = 0; i < NUM_MMU_AS; i++) { if (i == hw_error_status.faulty_mmu_as) { if (hw_error_status.as_faultstatus[i] == 0) { - u32 status = - hw_error_status.as_faultstatus[i]; + u32 status = hw_error_status.as_faultstatus[i]; /* status reg is clean; it can be * written */ - switch (hw_error_status.errors_mask & - IS_A_MMU_ERROR) { + switch (hw_error_status.errors_mask & IS_A_MMU_ERROR) { case KBASE_TRANSLATION_FAULT: /* 0xCm means TRANSLATION FAULT * (m is mmu_table_level) */ - status = - ((1 << 7) | (1 << 6) | - hw_error_status.mmu_table_level - ); + status = ((1 << 7) | (1 << 6) | + hw_error_status.mmu_table_level); break; case KBASE_PERMISSION_FAULT: /*0xC8 means PERMISSION FAULT */ - status = ((1 << 7) | (1 << 6) | - (1 << 3)); + status = ((1 << 7) | (1 << 6) | (1 << 3)); break; case KBASE_TRANSTAB_BUS_FAULT: @@ -937,38 +950,35 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j * BUS FAULT (m is * mmu_table_level) */ - status = ((1 << 7) | (1 << 6) | - (1 << 4) | - hw_error_status.mmu_table_level - ); + status = ((1 << 7) | (1 << 6) | (1 << 4) | + hw_error_status.mmu_table_level); break; case KBASE_ACCESS_FLAG: /* 0xD8 means ACCESS FLAG */ - status = ((1 << 7) | (1 << 6) | - (1 << 4) | (1 << 3)); + status = + ((1 << 7) | (1 << 6) | (1 << 4) | (1 << 3)); break; default: - model_error_log(KBASE_CORE, - "\nAtom Chain 0x%llx: Invalid Error Mask!", - hw_error_status.current_jc); + model_error_log( + KBASE_CORE, + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); break; } - hw_error_status.as_faultstatus[i] = - status; + hw_error_status.as_faultstatus[i] = status; } - if (hw_error_status.errors_mask & - KBASE_TRANSTAB_BUS_FAULT) + if (hw_error_status.errors_mask & KBASE_TRANSTAB_BUS_FAULT) hw_error_status.mmu_irq_rawstat |= - 1 << (16 + i); /* bus error */ + 1u << (16 + i); /* bus error */ else hw_error_status.mmu_irq_rawstat |= - 1 << i; /* page fault */ + (1u << i); /* page fault */ } } - } /*end of mmu register statuses */ + } /*end of mmu register statuses */ if (hw_error_status.errors_mask & IS_A_GPU_ERROR) { if (hw_error_status.gpu_fault_status) { /* not the first GPU error reported */ @@ -977,23 +987,22 @@ static void update_register_statuses(struct dummy_model_t *dummy, unsigned int j hw_error_status.gpu_error_irq |= 1; switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) { case KBASE_DELAYED_BUS_FAULT: - hw_error_status.gpu_fault_status = (1 << 7); + hw_error_status.gpu_fault_status = (1u << 7); break; case KBASE_SHAREABILITY_FAULT: - hw_error_status.gpu_fault_status = (1 << 7) | - (1 << 3); + hw_error_status.gpu_fault_status = (1u << 7) | (1u << 3); break; default: model_error_log(KBASE_CORE, - "\nAtom Chain 0x%llx: Invalid Error Mask!", + "\nAtom Chain 0x%llx: Invalid Error Mask!", hw_error_status.current_jc); break; } } } - hw_error_status.errors_mask = 0; /*clear error mask */ + hw_error_status.errors_mask = 0; /*clear error mask */ } #if !MALI_USE_CSF @@ -1010,22 +1019,19 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) { /* clear the bits we're updating */ - dummy->job_irq_js_state &= ~((1 << (16 + i)) | - (1 << i)); + dummy->job_irq_js_state &= ~((1 << (16 + i)) | (1 << i)); if (hw_error_status.js_status[i]) { - dummy->job_irq_js_state |= next_busy << - (i + 16); + dummy->job_irq_js_state |= next_busy << (i + 16); if (mask & (1 << (i + 16))) { /* clear job slot status */ hw_error_status.js_status[i] = 0; /* continue execution of jobchain */ - dummy->slots[i].job_active = - dummy->slots[i].job_queued; + dummy->slots[i].job_active = dummy->slots[i].job_queued; } } else { /* set bits if needed */ - dummy->job_irq_js_state |= ((slot_active << i) | - (next_busy << (i + 16))); + dummy->job_irq_js_state |= + ((slot_active << i) | (next_busy << (i + 16))); } } } @@ -1063,7 +1069,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp } for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { - const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; + const struct control_reg_values_t *const fcrv = &all_control_reg_values[i]; if (!strcmp(fcrv->name, gpu)) { ret = fcrv; @@ -1074,8 +1080,8 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp if (!ret) { ret = &all_control_reg_values[0]; - pr_warn("Couldn't find control register values for GPU %s; using default %s\n", - gpu, ret->name); + pr_warn("Couldn't find control register values for GPU %s; using default %s\n", gpu, + ret->name); } return ret; @@ -1094,10 +1100,12 @@ void *midgard_model_create(struct kbase_device *kbdev) dummy->job_irq_js_state = 0; init_register_statuses(dummy); dummy->control_reg_values = find_control_reg_values(no_mali_gpu); - performance_counters.l2_present = get_implementation_register( - GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); - performance_counters.shader_present = get_implementation_register( - GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + { + performance_counters.l2_present = get_implementation_register( + GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); + performance_counters.shader_present = get_implementation_register( + GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + } gpu_device_set_data(dummy, kbdev); @@ -1119,7 +1127,7 @@ static void midgard_model_get_outputs(void *h) lockdep_assert_held(&hw_error_status.access_lock); if (hw_error_status.job_irq_status) - gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); + gpu_model_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || @@ -1130,16 +1138,16 @@ static void midgard_model_get_outputs(void *h) (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || #endif (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) - gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); + gpu_model_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) - gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); + gpu_model_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); } static void midgard_model_update(void *h) { struct dummy_model_t *dummy = (struct dummy_model_t *)h; - int i; + u32 i; lockdep_assert_held(&hw_error_status.access_lock); @@ -1157,8 +1165,8 @@ static void midgard_model_update(void *h) * as we will overwrite the register status of the job in * the head registers - which has not yet been read */ - if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) || - (hw_error_status.job_irq_rawstat & (1 << i))) { + if ((hw_error_status.job_irq_rawstat & (1u << (i + 16))) || + (hw_error_status.job_irq_rawstat & (1u << i))) { continue; } @@ -1166,10 +1174,10 @@ static void midgard_model_update(void *h) signal_int(dummy, i); #ifdef CONFIG_MALI_ERROR_INJECT midgard_set_error(i); -#endif /* CONFIG_MALI_ERROR_INJECT */ +#endif /* CONFIG_MALI_ERROR_INJECT */ update_register_statuses(dummy, i); /*if this job slot returned failures we cannot use it */ - if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) { + if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) { dummy->slots[i].job_active = 0; continue; } @@ -1177,9 +1185,9 @@ static void midgard_model_update(void *h) dummy->slots[i].job_active = dummy->slots[i].job_queued; dummy->slots[i].job_queued = 0; if (dummy->slots[i].job_active) { - if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) + if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) model_error_log(KBASE_CORE, - "\natom %lld running a job on a dirty slot", + "\natom %lld running a job on a dirty slot", hw_error_status.current_jc); } } @@ -1193,9 +1201,9 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) for (i = 0; i < NUM_SLOTS; i++) { if (dummy->slots[i].job_active) { - hw_error_status.job_irq_rawstat |= (1 << (16 + i)); + hw_error_status.job_irq_rawstat |= (1u << (16 + i)); - hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ + hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ } } } @@ -1208,64 +1216,55 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) spin_lock_irqsave(&hw_error_status.access_lock, flags); #if !MALI_USE_CSF - if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && - (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { + if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { unsigned int slot_idx = (addr >> 7) & 0xf; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { - hw_error_status.current_jc &= - ~((u64) (0xFFFFFFFF)); - hw_error_status.current_jc |= (u64) value; + hw_error_status.current_jc &= ~((u64)(0xFFFFFFFF)); + hw_error_status.current_jc |= (u64)value; } if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) { - hw_error_status.current_jc &= (u64) 0xFFFFFFFF; - hw_error_status.current_jc |= - ((u64) value) << 32; + hw_error_status.current_jc &= (u64)0xFFFFFFFF; + hw_error_status.current_jc |= ((u64)value) << 32; } - if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && - value == 1) { + if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == 1) { pr_debug("%s", "start detected"); KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active || - !dummy->slots[slot_idx].job_queued); + !dummy->slots[slot_idx].job_queued); if ((dummy->slots[slot_idx].job_active) || - (hw_error_status.job_irq_rawstat & - (1 << (slot_idx + 16)))) { - pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~" - ); + (hw_error_status.job_irq_rawstat & (1 << (slot_idx + 16)))) { + pr_debug( + "~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~"); dummy->slots[slot_idx].job_queued = 1; } else { dummy->slots[slot_idx].job_active = 1; } } - if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == - 0) + if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == 0) dummy->slots[slot_idx].job_queued = 0; if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) && - (value == JS_COMMAND_SOFT_STOP || - value == JS_COMMAND_HARD_STOP)) { + (value == JS_COMMAND_SOFT_STOP || value == JS_COMMAND_HARD_STOP)) { /*dummy->slots[slot_idx].job_active = 0; */ hw_error_status.current_job_slot = slot_idx; if (value == JS_COMMAND_SOFT_STOP) { hw_error_status.errors_mask = KBASE_JOB_STOPPED; - } else { /*value == 3 */ + } else { /*value == 3 */ if (dummy->slots[slot_idx].job_disabled != 0) { - pr_debug("enabling slot after HARD_STOP" - ); + pr_debug("enabling slot after HARD_STOP"); dummy->slots[slot_idx].job_disabled = 0; } - hw_error_status.errors_mask = - KBASE_JOB_TERMINATED; + hw_error_status.errors_mask = KBASE_JOB_TERMINATED; } } } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { int i; for (i = 0; i < NUM_SLOTS; i++) { - if (value & ((1 << i) | (1 << (i + 16)))) + if (value & ((1u << i) | (1u << (i + 16)))) dummy->slots[i].job_complete_irq_asserted = 0; /* hw_error_status.js_status[i] is cleared in * update_job_irq_js_state @@ -1282,17 +1281,19 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) for (i = 0; i < NUM_SLOTS; i++) dummy->slots[i].job_irq_mask = (value >> i) & 0x01; pr_debug("job irq mask to value %x", value); - } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { -#else /* !MALI_USE_CSF */ +#else /* MALI_USE_CSF */ if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { pr_debug("%s", "job irq cleared"); hw_error_status.job_irq_rawstat &= ~(value); hw_error_status.job_irq_status &= ~(value); + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { + hw_error_status.job_irq_rawstat |= value; + hw_error_status.job_irq_status |= value; } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ - } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { #endif /* !MALI_USE_CSF */ + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { pr_debug("GPU_IRQ_MASK set to 0x%x", value); dummy->reset_completed_mask = (value >> 8) & 0x01; dummy->power_changed_mask = (value >> 9) & 0x03; @@ -1303,14 +1304,14 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { dummy->coherency_enable = value; } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { - if (value & (1 << 8)) { + if (value & RESET_COMPLETED) { pr_debug("%s", "gpu RESET_COMPLETED irq cleared"); dummy->reset_completed = 0; } - if (value & (3 << 9)) + if (value & (POWER_CHANGED_SINGLE | POWER_CHANGED_ALL)) dummy->power_changed = 0; - if (value & (1 << 17)) + if (value & CLEAN_CACHES_COMPLETED) dummy->clean_caches_completed = false; #if MALI_USE_CSF @@ -1369,29 +1370,31 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) #endif } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { dummy->l2_config = value; - } #if MALI_USE_CSF - else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET && - addr < CSF_HW_DOORBELL_PAGE_OFFSET + - (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + } else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET && + addr < CSF_HW_DOORBELL_PAGE_OFFSET + + (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { if (addr == CSF_HW_DOORBELL_PAGE_OFFSET) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + u32 alloc_reg = (addr - GPU_CONTROL_REG(SYSC_ALLOC0)) >> 2; + + sysc_alloc_regs[alloc_reg] = value; + } else if ((addr >= GPU_CONTROL_REG(L2_SLICE_HASH_0)) && + (addr < GPU_CONTROL_REG(L2_SLICE_HASH(L2_SLICE_HASH_COUNT)))) { /* Do nothing */ - } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && - (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { - /* Do nothing */ - } else if (addr == IPA_CONTROL_REG(COMMAND)) { + } else if (addr == IPA_CONTROL_REG(COMMAND) + ) { pr_debug("Received IPA_CONTROL command"); - } else if (addr == IPA_CONTROL_REG(TIMER)) { - ipa_control_timer_enabled = value ? true : false; + } else if (addr == IPA_CONTROL_REG(TIMER) + ) { + ipa_control_timer_enabled = value ? 1U : 0U; } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) && (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) { - enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)( - (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); - bool is_low_word = - !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); + enum kbase_ipa_core_type core_type = + (enum kbase_ipa_core_type)((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); + bool is_low_word = !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); if (is_low_word) { ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX; @@ -1400,87 +1403,72 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) ipa_ctl_select_config[core_type] &= U32_MAX; ipa_ctl_select_config[core_type] |= ((u64)value << 32); } - } #endif - else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { hw_error_status.mmu_irq_mask = value; } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) && (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) { - int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; + u32 mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_COMMAND: - switch (value) { - case AS_COMMAND_NOP: - hw_error_status.as_command[mem_addr_space] = - value; + switch (AS_COMMAND_COMMAND_GET(value)) { + case AS_COMMAND_COMMAND_NOP: + hw_error_status.as_command[mem_addr_space] = value; break; - case AS_COMMAND_UPDATE: - hw_error_status.as_command[mem_addr_space] = - value; - if ((hw_error_status.as_faultstatus[ - mem_addr_space]) - && ((hw_error_status.as_transtab[ - mem_addr_space] & 0x3) != 0)) { - model_error_log(KBASE_CORE, - "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n" - ); - } else if ((hw_error_status.as_faultstatus[ - mem_addr_space]) - && ((hw_error_status.as_transtab[ - mem_addr_space] & 0x3) == 0)) { - + case AS_COMMAND_COMMAND_UPDATE: + hw_error_status.as_command[mem_addr_space] = value; + if ((hw_error_status.as_faultstatus[mem_addr_space]) && + ((hw_error_status.as_transtab[mem_addr_space] & 0x3) != 0)) { + model_error_log( + KBASE_CORE, + "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n"); + } else if ((hw_error_status.as_faultstatus[mem_addr_space]) && + ((hw_error_status.as_transtab[mem_addr_space] & 0x3) == + 0)) { /*invalidate all active jobs */ invalidate_active_jobs(dummy); /* error handled */ - hw_error_status.as_faultstatus[ - mem_addr_space] = 0; + hw_error_status.as_faultstatus[mem_addr_space] = 0; } break; - case AS_COMMAND_LOCK: - case AS_COMMAND_UNLOCK: - hw_error_status.as_command[mem_addr_space] = - value; + case AS_COMMAND_COMMAND_LOCK: + case AS_COMMAND_COMMAND_UNLOCK: + hw_error_status.as_command[mem_addr_space] = value; break; - case AS_COMMAND_FLUSH_PT: - case AS_COMMAND_FLUSH_MEM: - if (hw_error_status.as_command[mem_addr_space] - != AS_COMMAND_LOCK) - model_error_log(KBASE_CORE, - "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n" - ); + case AS_COMMAND_COMMAND_FLUSH_PT: + case AS_COMMAND_COMMAND_FLUSH_MEM: + if (hw_error_status.as_command[mem_addr_space] != + AS_COMMAND_COMMAND_LOCK) + model_error_log( + KBASE_CORE, + "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n"); else /* error handled if any */ - hw_error_status.as_faultstatus[ - mem_addr_space] = 0; - hw_error_status.as_command[mem_addr_space] = - value; + hw_error_status.as_faultstatus[mem_addr_space] = 0; + hw_error_status.as_command[mem_addr_space] = value; break; default: model_error_log(KBASE_CORE, - "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", - value); + "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", + value); break; } break; case AS_TRANSTAB_LO: - hw_error_status.as_transtab[mem_addr_space] &= - ~((u64) (0xffffffff)); - hw_error_status.as_transtab[mem_addr_space] |= - (u64) value; + hw_error_status.as_transtab[mem_addr_space] &= ~((u64)(0xffffffff)); + hw_error_status.as_transtab[mem_addr_space] |= (u64)value; break; case AS_TRANSTAB_HI: - hw_error_status.as_transtab[mem_addr_space] &= - (u64) 0xffffffff; - hw_error_status.as_transtab[mem_addr_space] |= - ((u64) value) << 32; + hw_error_status.as_transtab[mem_addr_space] &= (u64)0xffffffff; + hw_error_status.as_transtab[mem_addr_space] |= ((u64)value) << 32; break; case AS_LOCKADDR_LO: @@ -1493,9 +1481,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) break; default: - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n", - mem_addr_space, addr, value); + mem_addr_space, addr, value); break; } } else { @@ -1527,64 +1516,71 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) break; #endif /* !MALI_USE_CSF */ case TILER_PWRON_LO: - dummy->power_on |= (value & 1) << 1; + dummy->domain_power_on[INDEX_TILER] |= value & + DUMMY_IMPLEMENTATION_TILER_PRESENT; /* Also ensure L2 is powered on */ - dummy->power_on |= value & 1; + fallthrough; + case L2_PWRON_LO: + dummy->domain_power_on[INDEX_L2] |= value & DUMMY_IMPLEMENTATION_L2_PRESENT; dummy->power_changed = 1; break; case SHADER_PWRON_LO: - dummy->power_on |= - (value & dummy->control_reg_values->shader_present) << 2; - dummy->power_changed = 1; - break; - case L2_PWRON_LO: - dummy->power_on |= value & 1; + dummy->domain_power_on[INDEX_SHADER] |= + value & dummy->control_reg_values->shader_present; dummy->power_changed = 1; break; case STACK_PWRON_LO: - dummy->stack_power_on_lo |= value; + dummy->domain_power_on[INDEX_STACK] |= + value & dummy->control_reg_values->stack_present; dummy->power_changed = 1; break; + + case L2_PWROFF_LO: + dummy->domain_power_on[INDEX_L2] &= + ~(value & DUMMY_IMPLEMENTATION_L2_PRESENT); + /* Also ensure tiler is powered off */ + fallthrough; case TILER_PWROFF_LO: - dummy->power_on &= ~((value & 1) << 1); + dummy->domain_power_on[INDEX_TILER] &= + ~(value & DUMMY_IMPLEMENTATION_TILER_PRESENT); dummy->power_changed = 1; break; case SHADER_PWROFF_LO: - dummy->power_on &= - ~((value & dummy->control_reg_values->shader_present) << 2); - dummy->power_changed = 1; - break; - case L2_PWROFF_LO: - dummy->power_on &= ~(value & 1); - /* Also ensure tiler is powered off */ - dummy->power_on &= ~((value & 1) << 1); + dummy->domain_power_on[INDEX_SHADER] &= + ~(value & dummy->control_reg_values->shader_present); dummy->power_changed = 1; break; case STACK_PWROFF_LO: - dummy->stack_power_on_lo &= ~value; + dummy->domain_power_on[INDEX_STACK] &= + ~(value & dummy->control_reg_values->stack_present); dummy->power_changed = 1; break; + case TILER_PWRON_HI: + case SHADER_PWRON_HI: + case L2_PWRON_HI: case TILER_PWROFF_HI: case SHADER_PWROFF_HI: case L2_PWROFF_HI: case PWR_KEY: case PWR_OVERRIDE0: -#if !MALI_USE_CSF +#if MALI_USE_CSF + case SHADER_PWRFEATURES: + case CSF_CONFIG: +#else /* !MALI_USE_CSF */ case JM_CONFIG: case PRFCNT_CONFIG: -#else /* !MALI_USE_CSF */ - case CSF_CONFIG: -#endif /* !MALI_USE_CSF */ +#endif /* MALI_USE_CSF */ case SHADER_CONFIG: case TILER_CONFIG: case L2_MMU_CONFIG: /* Writes ignored */ break; default: - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n", - addr, value); + addr, value); break; } } @@ -1601,7 +1597,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) spin_lock_irqsave(&hw_error_status.access_lock, flags); - *value = 0; /* 0 by default */ + *value = 0; /* 0 by default */ #if !MALI_USE_CSF if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { pr_debug("%s", "JS_ACTIVE being read"); @@ -1619,60 +1615,57 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) { *value = hw_error_status.job_irq_status; pr_debug("JS_IRQ_STATUS being read %x", *value); - } #if !MALI_USE_CSF - else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { int i; *value = 0; for (i = 0; i < NUM_SLOTS; i++) *value |= dummy->slots[i].job_irq_mask << i; pr_debug("JS_IRQ_MASK being read %x", *value); - } #else /* !MALI_USE_CSF */ - else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) - ; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ #endif /* !MALI_USE_CSF */ - else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | #if MALI_USE_CSF ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) | #endif - (dummy->power_changed_mask << 9) | (1 << 7) | 1; + (dummy->power_changed_mask << 9) | (1u << 7) | 1u; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { - *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | - (dummy->reset_completed << 8) | -#if !MALI_USE_CSF - (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -#endif /* !MALI_USE_CSF */ - ((dummy->clean_caches_completed ? 1u : 0u) << 17) | + *value = ((dummy->clean_caches_completed ? 1u : 0u) << 17) | #if MALI_USE_CSF ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) | +#else + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | #endif hw_error_status.gpu_error_irq; + *value |= (dummy->power_changed << 9) | (dummy->power_changed << 10) | + (dummy->reset_completed << 8); + pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { - *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | - ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | - ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | -#if !MALI_USE_CSF - (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -#endif /* !MALI_USE_CSF */ - (((dummy->clean_caches_completed && + *value = (((dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ? - 1u : - 0u) + 1u : + 0u) << 17) | #if MALI_USE_CSF (((dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ? - 1u : - 0u) + 1u : + 0u) << 20) | +#else + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | #endif hw_error_status.gpu_error_irq; + *value |= ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | + ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | + ((dummy->reset_completed & dummy->reset_completed_mask) << 8); pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { *value = 0; @@ -1684,18 +1677,17 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = hw_error_status.gpu_fault_status; } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { *value = dummy->l2_config; - } #if MALI_USE_CSF - else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && - (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { - *value = 0; - } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && - (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { + } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && + (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + u32 alloc_reg = (addr - GPU_CONTROL_REG(SYSC_ALLOC0)) >> 2; + *value = sysc_alloc_regs[alloc_reg]; + } else if ((addr >= GPU_CONTROL_REG(L2_SLICE_HASH_0)) && + (addr < GPU_CONTROL_REG(L2_SLICE_HASH(L2_SLICE_HASH_COUNT)))) { *value = 0; - } #endif - else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && - (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { + } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && + (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { switch (addr) { case GPU_CONTROL_REG(SHADER_PRESENT_LO): case GPU_CONTROL_REG(SHADER_PRESENT_HI): @@ -1708,22 +1700,22 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = get_implementation_register(addr, dummy->control_reg_values); break; case GPU_CONTROL_REG(SHADER_READY_LO): - *value = (dummy->power_on >> 0x02) & + *value = (dummy->domain_power_on[INDEX_SHADER]) & get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); break; case GPU_CONTROL_REG(TILER_READY_LO): - *value = (dummy->power_on >> 0x01) & + *value = (dummy->domain_power_on[INDEX_TILER]) & get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO), dummy->control_reg_values); break; case GPU_CONTROL_REG(L2_READY_LO): - *value = dummy->power_on & + *value = dummy->domain_power_on[INDEX_L2] & get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); break; case GPU_CONTROL_REG(STACK_READY_LO): - *value = dummy->stack_power_on_lo & + *value = dummy->domain_power_on[INDEX_STACK] & get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO), dummy->control_reg_values); break; @@ -1732,38 +1724,33 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) case GPU_CONTROL_REG(TILER_READY_HI): case GPU_CONTROL_REG(L2_READY_HI): case GPU_CONTROL_REG(STACK_READY_HI): - *value = 0; - break; - case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): - case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): - case GPU_CONTROL_REG(TILER_PWRTRANS_LO): - case GPU_CONTROL_REG(TILER_PWRTRANS_HI): case GPU_CONTROL_REG(L2_PWRTRANS_LO): case GPU_CONTROL_REG(L2_PWRTRANS_HI): + case GPU_CONTROL_REG(TILER_PWRTRANS_LO): + case GPU_CONTROL_REG(TILER_PWRTRANS_HI): + case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): + case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): case GPU_CONTROL_REG(STACK_PWRTRANS_LO): case GPU_CONTROL_REG(STACK_PWRTRANS_HI): - *value = 0; - break; - case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): - case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): - case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): - case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): case GPU_CONTROL_REG(L2_PWRACTIVE_LO): case GPU_CONTROL_REG(L2_PWRACTIVE_HI): - *value = 0; - break; + case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): + case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): + case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): + case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): -#if !MALI_USE_CSF - case GPU_CONTROL_REG(JM_CONFIG): -#else /* !MALI_USE_CSF */ +#if MALI_USE_CSF + case GPU_CONTROL_REG(SHADER_PWRFEATURES): case GPU_CONTROL_REG(CSF_CONFIG): -#endif /* !MALI_USE_CSF */ - +#else /* !MALI_USE_CSF */ + case GPU_CONTROL_REG(JM_CONFIG): +#endif /* MALI_USE_CSF */ case GPU_CONTROL_REG(SHADER_CONFIG): case GPU_CONTROL_REG(TILER_CONFIG): case GPU_CONTROL_REG(L2_MMU_CONFIG): + case GPU_CONTROL_REG(THREAD_TLS_ALLOC): *value = 0; break; @@ -1774,43 +1761,39 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->coherency_enable; break; - case GPU_CONTROL_REG(THREAD_TLS_ALLOC): - *value = 0; - break; - default: - model_error_log(KBASE_CORE, - "Dummy model register access: Reading unknown control reg 0x%x\n", - addr); + model_error_log( + KBASE_CORE, + "Dummy model register access: Reading unknown control reg 0x%x\n", + addr); break; } #if !MALI_USE_CSF } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && - (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { + (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { int slot_idx = (addr >> 7) & 0xf; int sub_reg = addr & 0x7F; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); switch (sub_reg) { case JS_HEAD_NEXT_LO: - *value = (u32) ((hw_error_status.current_jc) & - 0xFFFFFFFF); + *value = (u32)((hw_error_status.current_jc) & 0xFFFFFFFF); break; case JS_HEAD_NEXT_HI: - *value = (u32) (hw_error_status.current_jc >> 32); + *value = (u32)(hw_error_status.current_jc >> 32); break; case JS_STATUS: if (hw_error_status.js_status[slot_idx]) *value = hw_error_status.js_status[slot_idx]; else /* 0x08 means active, 0x00 idle */ - *value = (dummy->slots[slot_idx].job_active) - << 3; + *value = (dummy->slots[slot_idx].job_active) << 3; break; case JS_COMMAND_NEXT: *value = dummy->slots[slot_idx].job_queued; break; - /* The dummy model does not implement these registers + /** + * The dummy model does not implement these registers * avoid printing error messages */ case JS_HEAD_HI: @@ -1821,20 +1804,19 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) break; default: - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: unknown job slot reg 0x%02X being read\n", - sub_reg); + sub_reg); break; } -#endif /* !MALI_USE_CSF */ - } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { - *value = dummy->control_reg_values->as_present; -#if !MALI_USE_CSF } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) { *value = 0x7; #endif /* !MALI_USE_CSF */ + } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { + *value = dummy->control_reg_values->as_present; } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) && - addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { + addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { switch (addr) { case GPU_CONTROL_REG(TEXTURE_FEATURES_0): *value = 0xfffff; @@ -1854,7 +1836,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } #if !MALI_USE_CSF } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) && - addr <= GPU_CONTROL_REG(JS15_FEATURES)) { + addr <= GPU_CONTROL_REG(JS15_FEATURES)) { switch (addr) { case GPU_CONTROL_REG(JS0_FEATURES): *value = 0x20e; @@ -1873,8 +1855,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) break; } #endif /* !MALI_USE_CSF */ - } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) - && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { + } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { switch (addr) { case GPU_CONTROL_REG(L2_FEATURES): *value = 0x6100206; @@ -1899,12 +1880,12 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->control_reg_values->mmu_features; break; } - } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) - && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { + } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) && + addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { switch (addr) { case GPU_CONTROL_REG(THREAD_FEATURES): - *value = dummy->control_reg_values->thread_features - | (IMPLEMENTATION_MODEL << 30); + *value = dummy->control_reg_values->thread_features | + (THREAD_FEATURES_IMPLEMENTATION_TECHNOLOGY_SOFTWARE << 30); break; case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE): *value = dummy->control_reg_values->thread_max_barrier_size; @@ -1916,24 +1897,20 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = dummy->control_reg_values->thread_max_threads; break; } - } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) - && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { + } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) && + addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { *value = 0; } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) && addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) { - int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; + u32 mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_TRANSTAB_LO: - *value = (u32) - (hw_error_status.as_transtab[mem_addr_space] & - 0xffffffff); + *value = (u32)(hw_error_status.as_transtab[mem_addr_space] & 0xffffffff); break; case AS_TRANSTAB_HI: - *value = (u32) - (hw_error_status.as_transtab[mem_addr_space] >> - 32); + *value = (u32)(hw_error_status.as_transtab[mem_addr_space] >> 32); break; case AS_STATUS: @@ -1942,8 +1919,8 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) case AS_FAULTSTATUS: if (mem_addr_space == hw_error_status.faulty_mmu_as) - *value = hw_error_status.as_faultstatus[ - hw_error_status.faulty_mmu_as]; + *value = hw_error_status + .as_faultstatus[hw_error_status.faulty_mmu_as]; else *value = 0; break; @@ -1959,9 +1936,10 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) break; default: - model_error_log(KBASE_CORE, - "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", - mem_addr_space, addr); + model_error_log( + KBASE_CORE, + "Dummy model register access: Reading unsupported MMU #%u register 0x%x. Returning 0\n", + mem_addr_space, addr); *value = 0; break; } @@ -1970,58 +1948,50 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) { *value = hw_error_status.mmu_irq_rawstat; } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) { - *value = hw_error_status.mmu_irq_mask & - hw_error_status.mmu_irq_rawstat; - } + *value = hw_error_status.mmu_irq_mask & hw_error_status.mmu_irq_rawstat; #if MALI_USE_CSF - else if (addr == IPA_CONTROL_REG(STATUS)) { + } else if (addr == IPA_CONTROL_REG(STATUS) + ) { *value = (ipa_control_timer_enabled << 31); } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, - counter_index, is_low_word); + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, counter_index, + is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, - counter_index, is_low_word); + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, counter_index, + is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, - counter_index, is_low_word); + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, counter_index, + is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { - u32 counter_index = - (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; - bool is_low_word = - !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); + u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; + bool is_low_word = !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); + + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, + is_low_word); - *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, - counter_index, is_low_word); - } #endif - else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { + } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { *value = dummy->control_reg_values->gpu_features_lo; } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { *value = dummy->control_reg_values->gpu_features_hi; } else { - model_error_log(KBASE_CORE, + model_error_log( + KBASE_CORE, "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", - addr); + addr); *value = 0; } @@ -2037,11 +2007,9 @@ static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr lockdep_assert_held(&performance_counters.access_lock); - sample_size = - core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); - if ((usr_data_size >= usr_data_offset) && - (sample_size <= usr_data_size - usr_data_offset)) + if ((usr_data_size >= usr_data_offset) && (sample_size <= usr_data_size - usr_data_offset)) usr_data = usr_data_start + (usr_data_offset / sizeof(u32)); if (!usr_data) @@ -2058,20 +2026,17 @@ static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr return usr_data_offset + sample_size; } -static u32 set_kernel_sample_core_type(u64 *counters, - u64 *usr_data_start, u32 usr_data_offset, - u32 usr_data_size, u32 core_count) +static u32 set_kernel_sample_core_type(u64 *counters, u64 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) { u32 sample_size; u64 *usr_data = NULL; lockdep_assert_held(&performance_counters.access_lock); - sample_size = - core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); - if ((usr_data_size >= usr_data_offset) && - (sample_size <= usr_data_size - usr_data_offset)) + if ((usr_data_size >= usr_data_offset) && (sample_size <= usr_data_size - usr_data_offset)) usr_data = usr_data_start + (usr_data_offset / sizeof(u64)); if (!usr_data) @@ -2150,8 +2115,8 @@ void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); -void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 *l2_present, u64 *shader_present) +void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, + u64 *shader_present) { if (shader_present) *shader_present = performance_counters.shader_present; @@ -2160,12 +2125,12 @@ void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores); -void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 l2_present, u64 shader_present) +void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, + u64 shader_present) { - if (WARN_ON(!l2_present || !shader_present - || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS - || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) + if (WARN_ON(!l2_present || !shader_present || + hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS || + hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) return; performance_counters.l2_present = l2_present; @@ -2174,15 +2139,14 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, /* Update the GPU properties used by vinstr to calculate the counter * dump buffer size. */ - kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present); - kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present; + kbdev->gpu_props.num_l2_slices = hweight64(l2_present); + kbdev->gpu_props.coherency_info.group.core_mask = shader_present; kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present); kbdev->gpu_props.curr_config.shader_present = shader_present; } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); -int gpu_model_control(void *model, - struct kbase_model_control_params *params) +int gpu_model_control(void *model, struct kbase_model_control_params *params) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; int i; @@ -2202,3 +2166,9 @@ int gpu_model_control(void *model, return 0; } + +u64 midgard_model_arch_timer_get_cntfrq(void *h) +{ + CSTD_UNUSED(h); + return arch_timer_get_cntfrq(); +} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h index 2a3351b..cdd8102 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,54 +43,55 @@ #define model_error_log(module, ...) pr_err(__VA_ARGS__) -#define NUM_SLOTS 4 /*number of job slots */ +#define NUM_SLOTS 4 /*number of job slots */ /*Errors Mask Codes*/ /* each bit of errors_mask is associated to a specific error: * NON FAULT STATUS CODES: only the following are implemented since the others * represent normal working statuses */ -#define KBASE_JOB_INTERRUPTED (1<<0) -#define KBASE_JOB_STOPPED (1<<1) -#define KBASE_JOB_TERMINATED (1<<2) +#define KBASE_JOB_INTERRUPTED (1 << 0) +#define KBASE_JOB_STOPPED (1 << 1) +#define KBASE_JOB_TERMINATED (1 << 2) /* JOB EXCEPTIONS: */ -#define KBASE_JOB_CONFIG_FAULT (1<<3) -#define KBASE_JOB_POWER_FAULT (1<<4) -#define KBASE_JOB_READ_FAULT (1<<5) -#define KBASE_JOB_WRITE_FAULT (1<<6) -#define KBASE_JOB_AFFINITY_FAULT (1<<7) -#define KBASE_JOB_BUS_FAULT (1<<8) -#define KBASE_INSTR_INVALID_PC (1<<9) -#define KBASE_INSTR_INVALID_ENC (1<<10) -#define KBASE_INSTR_TYPE_MISMATCH (1<<11) -#define KBASE_INSTR_OPERAND_FAULT (1<<12) -#define KBASE_INSTR_TLS_FAULT (1<<13) -#define KBASE_INSTR_BARRIER_FAULT (1<<14) -#define KBASE_INSTR_ALIGN_FAULT (1<<15) -#define KBASE_DATA_INVALID_FAULT (1<<16) -#define KBASE_TILE_RANGE_FAULT (1<<17) -#define KBASE_ADDR_RANGE_FAULT (1<<18) -#define KBASE_OUT_OF_MEMORY (1<<19) -#define KBASE_UNKNOWN (1<<20) +#define KBASE_JOB_CONFIG_FAULT (1 << 3) +#define KBASE_JOB_POWER_FAULT (1 << 4) +#define KBASE_JOB_READ_FAULT (1 << 5) +#define KBASE_JOB_WRITE_FAULT (1 << 6) +#define KBASE_JOB_AFFINITY_FAULT (1 << 7) +#define KBASE_JOB_BUS_FAULT (1 << 8) +#define KBASE_INSTR_INVALID_PC (1 << 9) +#define KBASE_INSTR_INVALID_ENC (1 << 10) +#define KBASE_INSTR_TYPE_MISMATCH (1 << 11) +#define KBASE_INSTR_OPERAND_FAULT (1 << 12) +#define KBASE_INSTR_TLS_FAULT (1 << 13) +#define KBASE_INSTR_BARRIER_FAULT (1 << 14) +#define KBASE_INSTR_ALIGN_FAULT (1 << 15) +#define KBASE_DATA_INVALID_FAULT (1 << 16) +#define KBASE_TILE_RANGE_FAULT (1 << 17) +#define KBASE_ADDR_RANGE_FAULT (1 << 18) +#define KBASE_OUT_OF_MEMORY (1 << 19) +#define KBASE_UNKNOWN (1 << 20) /* GPU EXCEPTIONS:*/ -#define KBASE_DELAYED_BUS_FAULT (1<<21) -#define KBASE_SHAREABILITY_FAULT (1<<22) +#define KBASE_DELAYED_BUS_FAULT (1 << 21) +#define KBASE_SHAREABILITY_FAULT (1 << 22) /* MMU EXCEPTIONS:*/ -#define KBASE_TRANSLATION_FAULT (1<<23) -#define KBASE_PERMISSION_FAULT (1<<24) -#define KBASE_TRANSTAB_BUS_FAULT (1<<25) -#define KBASE_ACCESS_FLAG (1<<26) +#define KBASE_TRANSLATION_FAULT (1 << 23) +#define KBASE_PERMISSION_FAULT (1 << 24) +#define KBASE_TRANSTAB_BUS_FAULT (1 << 25) +#define KBASE_ACCESS_FLAG (1 << 26) /* generic useful bitmasks */ #define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED) #define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT) -#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT) +#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT | KBASE_SHAREABILITY_FAULT) /* number of possible MMU address spaces */ -#define NUM_MMU_AS 16 /* total number of MMU address spaces as in +#define NUM_MMU_AS \ + 16 /* total number of MMU address spaces as in * MMU_IRQ_RAWSTAT register */ @@ -133,10 +134,10 @@ struct error_status_t { u32 errors_mask; u32 mmu_table_level; - int faulty_mmu_as; + u32 faulty_mmu_as; u64 current_jc; - int current_job_slot; + u32 current_job_slot; u32 job_irq_rawstat; u32 job_irq_status; @@ -167,10 +168,9 @@ struct gpu_model_prfcnt_en { u32 shader; }; -void midgard_set_error(int job_slot); +void midgard_set_error(u32 job_slot); int job_atom_inject_error(struct kbase_error_params *params); -int gpu_model_control(void *h, - struct kbase_model_control_params *params); +int gpu_model_control(void *h, struct kbase_model_control_params *params); /** * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values @@ -194,10 +194,10 @@ int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); */ void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); -void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 *l2_present, u64 *shader_present); -void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, - u64 l2_present, u64 shader_present); +void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, + u64 *shader_present); +void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, + u64 shader_present); /* Clear the counter values array maintained by the dummy model */ void gpu_model_clear_prfcnt_values(void); diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c index f310cc7..86d4e26 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c @@ -56,44 +56,37 @@ static void gpu_generate_error(void) /* pick up a faulty mmu address space */ hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; /* pick up an mmu table level */ - hw_error_status.mmu_table_level = - 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); - hw_error_status.errors_mask = - (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); + hw_error_status.mmu_table_level = 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); + hw_error_status.errors_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); /*is there also one or more errors? */ if ((prandom_u32() % 100) < multiple_error_probability) { - errors_num = 1 + (prandom_u32() % - (MAX_CONCURRENT_FAULTS - 1)); + errors_num = 1 + (prandom_u32() % (MAX_CONCURRENT_FAULTS - 1)); while (errors_num-- > 0) { u32 temp_mask; - temp_mask = (u32)( - 1 << (prandom_u32() % TOTAL_FAULTS)); + temp_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); /* below we check that no bit of the same error * type is set again in the error mask */ if ((temp_mask & IS_A_JOB_ERROR) && - (hw_error_status.errors_mask & - IS_A_JOB_ERROR)) { + (hw_error_status.errors_mask & IS_A_JOB_ERROR)) { errors_num++; continue; } if ((temp_mask & IS_A_MMU_ERROR) && - (hw_error_status.errors_mask & - IS_A_MMU_ERROR)) { + (hw_error_status.errors_mask & IS_A_MMU_ERROR)) { errors_num++; continue; } if ((temp_mask & IS_A_GPU_ERROR) && - (hw_error_status.errors_mask & - IS_A_GPU_ERROR)) { + (hw_error_status.errors_mask & IS_A_GPU_ERROR)) { errors_num++; continue; } /* this error mask is already set */ if ((hw_error_status.errors_mask | temp_mask) == - hw_error_status.errors_mask) { + hw_error_status.errors_mask) { errors_num++; continue; } @@ -114,8 +107,7 @@ int job_atom_inject_error(struct kbase_error_params *params) if (!new_elem) { model_error_log(KBASE_CORE, - "\njob_atom_inject_error: kzalloc failed for new_elem\n" - ); + "\njob_atom_inject_error: kzalloc failed for new_elem\n"); return -ENOMEM; } new_elem->params.jc = params->jc; @@ -124,7 +116,7 @@ int job_atom_inject_error(struct kbase_error_params *params) new_elem->params.faulty_mmu_as = params->faulty_mmu_as; /*circular list below */ - if (error_track_list == NULL) { /*no elements */ + if (error_track_list == NULL) { /*no elements */ error_track_list = new_elem; new_elem->next = error_track_list; } else { @@ -139,7 +131,7 @@ int job_atom_inject_error(struct kbase_error_params *params) return 0; } -void midgard_set_error(int job_slot) +void midgard_set_error(u32 job_slot) { #ifdef CONFIG_MALI_ERROR_INJECT_RANDOM gpu_generate_error(); @@ -154,12 +146,9 @@ void midgard_set_error(int job_slot) /* found a faulty atom matching with the * current one */ - hw_error_status.errors_mask = - walker->params.errors_mask; - hw_error_status.mmu_table_level = - walker->params.mmu_table_level; - hw_error_status.faulty_mmu_as = - walker->params.faulty_mmu_as; + hw_error_status.errors_mask = walker->params.errors_mask; + hw_error_status.mmu_table_level = walker->params.mmu_table_level; + hw_error_status.faulty_mmu_as = walker->params.faulty_mmu_as; hw_error_status.current_job_slot = job_slot; if (walker->next == walker) { @@ -179,5 +168,5 @@ void midgard_set_error(int job_slot) walker = walker->next; } while (auxiliar->next != error_track_list); } -#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ +#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c index 67e00e9..fa12e52 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -24,7 +24,7 @@ */ #include <mali_kbase.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> #include "backend/gpu/mali_kbase_model_linux.h" #include "device/mali_kbase_device.h" @@ -37,72 +37,39 @@ struct model_irq_data { struct work_struct work; }; -static void serve_job_irq(struct work_struct *work) -{ - struct model_irq_data *data = container_of(work, struct model_irq_data, - work); - struct kbase_device *kbdev = data->kbdev; - - /* Make sure no worker is already serving this IRQ */ - while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) { - u32 val; - - while ((val = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_STATUS)))) { - unsigned long flags; - - /* Handle the IRQ */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -#if MALI_USE_CSF - kbase_csf_interrupt(kbdev, val); -#else - kbase_job_done(kbdev, val); -#endif - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } +#define DEFINE_SERVE_IRQ(irq_handler) \ + static void serve_##irq_handler(struct work_struct *work) \ + { \ + struct model_irq_data *data = container_of(work, struct model_irq_data, work); \ + struct kbase_device *kbdev = data->kbdev; \ + irq_handler(kbdev); \ + kmem_cache_free(kbdev->irq_slab, data); \ } - kmem_cache_free(kbdev->irq_slab, data); +static void job_irq(struct kbase_device *kbdev) +{ + /* Make sure no worker is already serving this IRQ */ + while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) + kbase_get_interrupt_handler(kbdev, JOB_IRQ_TAG)(0, kbdev); } +DEFINE_SERVE_IRQ(job_irq) -static void serve_gpu_irq(struct work_struct *work) +static void gpu_irq(struct kbase_device *kbdev) { - struct model_irq_data *data = container_of(work, struct model_irq_data, - work); - struct kbase_device *kbdev = data->kbdev; - /* Make sure no worker is already serving this IRQ */ - while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) { - u32 val; - - while ((val = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_STATUS)))) { - /* Handle the IRQ */ - kbase_gpu_interrupt(kbdev, val); - } - } - - kmem_cache_free(kbdev->irq_slab, data); + while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) + kbase_get_interrupt_handler(kbdev, GPU_IRQ_TAG)(0, kbdev); } +DEFINE_SERVE_IRQ(gpu_irq) -static void serve_mmu_irq(struct work_struct *work) +static void mmu_irq(struct kbase_device *kbdev) { - struct model_irq_data *data = container_of(work, struct model_irq_data, - work); - struct kbase_device *kbdev = data->kbdev; - /* Make sure no worker is already serving this IRQ */ - if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { - u32 val; - - while ((val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)))) { - /* Handle the IRQ */ - kbase_mmu_interrupt(kbdev, val); - } - } - - kmem_cache_free(kbdev->irq_slab, data); + while (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) + kbase_get_interrupt_handler(kbdev, MMU_IRQ_TAG)(0, kbdev); } +DEFINE_SERVE_IRQ(mmu_irq) + void gpu_device_raise_irq(void *model, u32 irq) { @@ -141,30 +108,6 @@ void gpu_device_raise_irq(void *model, u32 irq) queue_work(kbdev->irq_workq, &data->work); } -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->reg_op_lock, flags); - midgard_model_write_reg(kbdev->model, offset, value); - spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); -} - -KBASE_EXPORT_TEST_API(kbase_reg_write); - -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) -{ - unsigned long flags; - u32 val = 0; - - spin_lock_irqsave(&kbdev->reg_op_lock, flags); - midgard_model_read_reg(kbdev->model, offset, &val); - spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); - - return val; -} -KBASE_EXPORT_TEST_API(kbase_reg_read); - int kbase_install_interrupts(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -177,13 +120,16 @@ int kbase_install_interrupts(struct kbase_device *kbdev) if (kbdev->irq_workq == NULL) return -ENOMEM; - kbdev->irq_slab = kmem_cache_create("dummy_irq_slab", - sizeof(struct model_irq_data), 0, 0, NULL); + kbdev->irq_slab = + kmem_cache_create("dummy_irq_slab", sizeof(struct model_irq_data), 0, 0, NULL); if (kbdev->irq_slab == NULL) { destroy_workqueue(kbdev->irq_workq); return -ENOMEM; } + kbdev->nr_irqs = 3; + + return 0; } @@ -202,25 +148,14 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type) +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, + u32 irq_tag) { return 0; } KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); -irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) -{ - if (!val) - return IRQ_NONE; - - return IRQ_HANDLED; -} - -KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); - int kbase_gpu_device_create(struct kbase_device *kbdev) { kbdev->model = midgard_model_create(kbdev); diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h index 4cf1235..65eb620 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -117,6 +117,15 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value); void midgard_model_read_reg(void *h, u32 addr, u32 *const value); /** + * midgard_model_arch_timer_get_cntfrq - Get Model specific System Timer Frequency + * + * @h: Model handle. + * + * Return: Frequency in Hz + */ +u64 midgard_model_arch_timer_get_cntfrq(void *h); + +/** * gpu_device_raise_irq() - Private IRQ raise function. * * @model: Model handle. diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c b/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c index bbf6290..37c35ee 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,11 +28,13 @@ static bool always_on_shaders_needed(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); return true; } static bool always_on_get_core_active(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); return true; } @@ -58,15 +60,15 @@ static void always_on_term(struct kbase_device *kbdev) * and name. */ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { - "always_on", /* name */ - always_on_init, /* init */ - always_on_term, /* term */ - always_on_shaders_needed, /* shaders_needed */ - always_on_get_core_active, /* get_core_active */ - NULL, /* handle_event */ - KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_shaders_needed, /* shaders_needed */ + always_on_get_core_active, /* get_core_active */ + NULL, /* handle_event */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ #if MALI_USE_CSF - ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ + ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ #endif }; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_always_on.h b/mali_kbase/backend/gpu/mali_kbase_pm_always_on.h index 98d35da..d0c209b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_always_on.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_always_on.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -75,4 +75,3 @@ struct kbasep_pm_policy_always_on { extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; #endif /* MALI_KBASE_PM_ALWAYS_ON_H */ - diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 46c5ffd..0cb205b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -24,7 +24,7 @@ */ #include <mali_kbase.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> #include <mali_kbase_config_defaults.h> #include <mali_kbase_pm.h> @@ -33,6 +33,7 @@ #include <backend/gpu/mali_kbase_js_internal.h> #include <backend/gpu/mali_kbase_jm_internal.h> #else +#include <linux/version_compat_defs.h> #include <linux/pm_runtime.h> #include <mali_kbase_reset_gpu.h> #endif /* !MALI_USE_CSF */ @@ -42,6 +43,7 @@ #include <mali_kbase_dummy_job_wa.h> #include <backend/gpu/mali_kbase_irq_internal.h> + static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); static void kbase_pm_gpu_clock_control_worker(struct work_struct *data); @@ -51,23 +53,18 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev) struct kbase_pm_callback_conf *callbacks; callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) { - kbdev->pm.backend.callback_power_on = - callbacks->power_on_callback; - kbdev->pm.backend.callback_power_off = - callbacks->power_off_callback; - kbdev->pm.backend.callback_power_suspend = - callbacks->power_suspend_callback; - kbdev->pm.backend.callback_power_resume = - callbacks->power_resume_callback; - kbdev->pm.callback_power_runtime_init = - callbacks->power_runtime_init_callback; - kbdev->pm.callback_power_runtime_term = - callbacks->power_runtime_term_callback; - kbdev->pm.backend.callback_power_runtime_on = - callbacks->power_runtime_on_callback; + kbdev->pm.backend.callback_power_on = callbacks->power_on_callback; + kbdev->pm.backend.callback_power_off = callbacks->power_off_callback; + kbdev->pm.backend.callback_power_suspend = callbacks->power_suspend_callback; + kbdev->pm.backend.callback_power_resume = callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = callbacks->power_runtime_term_callback; + kbdev->pm.backend.callback_power_runtime_on = callbacks->power_runtime_on_callback; kbdev->pm.backend.callback_power_runtime_off = - callbacks->power_runtime_off_callback; + callbacks->power_runtime_off_callback; kbdev->pm.backend.callback_power_runtime_idle = callbacks->power_runtime_idle_callback; kbdev->pm.backend.callback_soft_reset = @@ -75,7 +72,7 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev) kbdev->pm.backend.callback_hardware_reset = callbacks->hardware_reset_callback; kbdev->pm.backend.callback_power_runtime_gpu_idle = - callbacks->power_runtime_gpu_idle_callback; + callbacks->power_runtime_gpu_idle_callback; kbdev->pm.backend.callback_power_runtime_gpu_active = callbacks->power_runtime_gpu_active_callback; #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS @@ -157,25 +154,17 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbase_pm_init_event_log(kbdev); - kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", - WQ_HIGHPRI | WQ_UNBOUND, 1); + kbdev->pm.backend.gpu_poweroff_wait_wq = + alloc_workqueue("kbase_pm_poweroff_wait", WQ_HIGHPRI | WQ_UNBOUND, 1); if (!kbdev->pm.backend.gpu_poweroff_wait_wq) return -ENOMEM; - INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, - kbase_pm_gpu_poweroff_wait_wq); + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq); kbdev->pm.backend.ca_cores_enabled = ~0ull; - kbdev->pm.backend.gpu_powered = false; - kbdev->pm.backend.gpu_ready = false; - kbdev->pm.suspending = false; - kbdev->pm.resuming = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_set_gpu_lost(kbdev, false); #endif -#ifdef CONFIG_MALI_DEBUG - kbdev->pm.backend.driver_ready_for_irqs = false; -#endif /* CONFIG_MALI_DEBUG */ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); #if !MALI_USE_CSF @@ -203,6 +192,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + if (kbase_pm_ca_init(kbdev) != 0) goto workq_fail; @@ -211,10 +201,8 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) if (kbase_pm_state_machine_init(kbdev) != 0) goto pm_state_machine_fail; - kbdev->pm.backend.hwcnt_desired = false; kbdev->pm.backend.hwcnt_disabled = true; - INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, - kbase_pm_hwcnt_disable_worker); + INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, kbase_pm_hwcnt_disable_worker); kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) @@ -223,37 +211,30 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) && kbdev->pm.backend.callback_power_runtime_gpu_active && kbdev->pm.backend.callback_power_runtime_gpu_idle; -#endif - if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) { - kbdev->pm.backend.l2_always_on = false; - kbdev->pm.backend.gpu_clock_slow_down_wa = false; + kbdev->pm.backend.apply_hw_issue_TITANHW_2938_wa = + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2938) && + kbdev->pm.backend.gpu_sleep_supported; +#endif + if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) return 0; - } /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { - kbdev->pm.backend.gpu_clock_slow_down_wa = false; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) kbdev->pm.backend.l2_always_on = true; - else - kbdev->pm.backend.l2_always_on = false; return 0; } /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ - kbdev->pm.backend.l2_always_on = false; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { kbdev->pm.backend.gpu_clock_slow_down_wa = true; - kbdev->pm.backend.gpu_clock_suspend_freq = 0; kbdev->pm.backend.gpu_clock_slow_down_desired = true; - kbdev->pm.backend.gpu_clock_slowed_down = false; INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, - kbase_pm_gpu_clock_control_worker); - } else - kbdev->pm.backend.gpu_clock_slow_down_wa = false; + kbase_pm_gpu_clock_control_worker); + } return 0; @@ -295,6 +276,58 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) */ } +#if MALI_USE_CSF +static bool wait_cond_mmu_fault_handling_in_gpu_poweroff_wait_wq(struct kbase_device *kbdev, + int faults_pending) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + bool cond = false; + + kbase_pm_lock(kbdev); + cond = backend->poweron_required || (faults_pending == 0); + kbase_pm_unlock(kbdev); + + return cond; +} +#endif + +static void wait_for_mmu_fault_handling_in_gpu_poweroff_wait_wq(struct kbase_device *kbdev) +{ +#if MALI_USE_CSF + bool reset_triggered = false; + int ret = 0; + + lockdep_assert_held(&kbdev->pm.lock); + + do { + const u64 timeout_us = kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) * USEC_PER_MSEC; + const unsigned long delay_us = 10; + int faults_pending = 0; + + kbase_pm_unlock(kbdev); + ret = read_poll_timeout_atomic( + atomic_read, faults_pending, + wait_cond_mmu_fault_handling_in_gpu_poweroff_wait_wq(kbdev, faults_pending), + delay_us, timeout_us, false, &kbdev->faults_pending); + kbase_pm_lock(kbdev); + + if (ret && !reset_triggered) { + dev_err(kbdev->dev, + "Wait for fault handling timed-out in gpu_poweroff_wait_wq"); + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) { + kbase_reset_gpu(kbdev); + reset_triggered = true; + } + } + } while (ret); +#else + kbase_pm_unlock(kbdev); + kbase_flush_mmu_wqs(kbdev); + kbase_pm_lock(kbdev); +#endif +} + static void pm_handle_power_off(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -315,8 +348,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev) return; } #endif - WARN_ON(backend->shaders_state != - KBASE_SHADERS_OFF_CORESTACK_OFF || + WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF || backend->l2_state != KBASE_L2_OFF); #if MALI_USE_CSF mcu_state = backend->mcu_state; @@ -340,9 +372,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev) * process. Interrupts are disabled so no more faults * should be generated at this point. */ - kbase_pm_unlock(kbdev); - kbase_flush_mmu_wqs(kbdev); - kbase_pm_lock(kbdev); + wait_for_mmu_fault_handling_in_gpu_poweroff_wait_wq(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT /* poweron_required may have changed while pm lock @@ -367,8 +397,8 @@ static void pm_handle_power_off(struct kbase_device *kbdev) static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_poweroff_wait_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, pm.backend.gpu_poweroff_wait_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; @@ -429,8 +459,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) kbdev->previous_frequency = kbdev->current_nominal_freq; /* Slow down GPU clock to the suspend clock*/ - kbase_devfreq_force_freq(kbdev, - kbdev->pm.backend.gpu_clock_suspend_freq); + kbase_devfreq_force_freq(kbdev, kbdev->pm.backend.gpu_clock_suspend_freq); #elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */ @@ -444,8 +473,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) kbdev->previous_frequency = clk_get_rate(clk); /* Slow down GPU clock to the suspend clock*/ - if (WARN_ON_ONCE(clk_set_rate(clk, - kbdev->pm.backend.gpu_clock_suspend_freq))) + if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->pm.backend.gpu_clock_suspend_freq))) dev_err(kbdev->dev, "Failed to set suspend freq\n"); #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -475,8 +503,7 @@ static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) /* Restore GPU clock */ if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency))) - dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", - kbdev->previous_frequency); + dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", kbdev->previous_frequency); /* Restart the metrics gathering framework */ kbase_pm_metrics_start(kbdev); @@ -486,8 +513,8 @@ static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_clock_control_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, pm.backend.gpu_clock_control_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; @@ -495,12 +522,10 @@ static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) /* Determine if GPU clock control is required */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!backend->gpu_clock_slowed_down && - backend->gpu_clock_slow_down_desired) { + if (!backend->gpu_clock_slowed_down && backend->gpu_clock_slow_down_desired) { slow_down = true; backend->gpu_clock_slowed_down = true; - } else if (backend->gpu_clock_slowed_down && - !backend->gpu_clock_slow_down_desired) { + } else if (backend->gpu_clock_slowed_down && !backend->gpu_clock_slow_down_desired) { normalize = true; backend->gpu_clock_slowed_down = false; } @@ -523,8 +548,8 @@ static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.hwcnt_disable_work); + struct kbase_device *kbdev = + container_of(data, struct kbase_device, pm.backend.hwcnt_disable_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; @@ -604,7 +629,6 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) WARN_ON(backend->poweroff_wait_in_progress); WARN_ON(backend->gpu_sleep_mode_active); if (backend->gpu_powered) { - backend->mcu_desired = false; backend->l2_desired = false; kbase_pm_update_state(kbdev); @@ -612,9 +636,8 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) ret = kbase_pm_wait_for_desired_state(kbdev); if (ret) { - dev_warn( - kbdev->dev, - "Wait for pm state change failed on synchronous power off"); + dev_warn(kbdev->dev, + "Wait for pm state change failed on synchronous power off"); ret = -EBUSY; goto out; } @@ -623,8 +646,7 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) * throughout and so need to invoke the idle callback before * the power down. */ - if (backend->callback_power_runtime_gpu_idle && - !backend->gpu_idled) { + if (backend->callback_power_runtime_gpu_idle && !backend->gpu_idled) { backend->callback_power_runtime_gpu_idle(kbdev); backend->gpu_idled = true; } @@ -703,13 +725,11 @@ static bool is_gpu_powered_down(struct kbase_device *kbdev) void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev) { - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_gpu_powered_down(kbdev)); + wait_event_killable(kbdev->pm.backend.poweroff_wait, is_gpu_powered_down(kbdev)); } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_gpu_power_down); -int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, - unsigned int flags) +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, unsigned int flags) { unsigned long irq_flags; int ret; @@ -730,8 +750,7 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, return ret; } #if MALI_USE_CSF - kbdev->pm.debug_core_mask = - kbdev->gpu_props.props.raw_props.shader_present; + kbdev->pm.debug_core_mask = kbdev->gpu_props.shader_present; spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); /* Set the initial value for 'shaders_avail'. It would be later * modified only from the MCU state machine, when the shader core @@ -743,9 +762,8 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); #else kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = - kbdev->pm.debug_core_mask[1] = - kbdev->pm.debug_core_mask[2] = - kbdev->gpu_props.props.raw_props.shader_present; + kbdev->pm.debug_core_mask[1] = kbdev->pm.debug_core_mask[2] = + kbdev->gpu_props.shader_present; #endif /* Pretend the GPU is active to prevent a power policy turning the GPU @@ -762,20 +780,11 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, } #endif - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); /* Ensure cycle counter is off */ kbdev->pm.backend.gpu_cycle_counter_requests = 0; - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); - /* We are ready to receive IRQ's now as power policy is set up, so - * enable them now. - */ -#ifdef CONFIG_MALI_DEBUG - kbdev->pm.backend.driver_ready_for_irqs = true; -#endif kbase_pm_enable_interrupts(kbdev); WARN_ON(!kbdev->pm.backend.gpu_powered); @@ -880,23 +889,23 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) } KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); #else -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask_js0, u64 new_core_mask_js1, - u64 new_core_mask_js2) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, + u64 new_core_mask_js1, u64 new_core_mask_js2) { lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->pm.lock); if (kbase_dummy_job_wa_enabled(kbdev)) { - dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); + dev_warn_once( + kbdev->dev, + "Change of core mask not supported for slot 0 as dummy job WA is enabled"); new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; } kbdev->pm.debug_core_mask[0] = new_core_mask_js0; kbdev->pm.debug_core_mask[1] = new_core_mask_js1; kbdev->pm.debug_core_mask[2] = new_core_mask_js2; - kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | - new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2; kbase_pm_update_dynamic_cores_onoff(kbdev); } @@ -938,7 +947,9 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); if (ret) { #if !MALI_USE_CSF + mutex_lock(&kbdev->js_data.runpool_mutex); kbase_backend_timer_resume(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); #endif /* !MALI_USE_CSF */ return ret; } @@ -950,7 +961,7 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) if (kbdev->pm.backend.callback_power_suspend) kbdev->pm.backend.callback_power_suspend(kbdev); - return ret; + return 0; } void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) @@ -980,7 +991,9 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; +#if !MALI_USE_CSF ktime_t end_timestamp = ktime_get_raw(); +#endif struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; if (!kbdev->arb.arb_if) @@ -988,42 +1001,43 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) rt_mutex_lock(&kbdev->pm.lock); mutex_lock(&arb_vm_state->vm_state_lock); - if (kbdev->pm.backend.gpu_powered && - !kbase_pm_is_gpu_lost(kbdev)) { + if (kbdev->pm.backend.gpu_powered && !kbase_pm_is_gpu_lost(kbdev)) { kbase_pm_set_gpu_lost(kbdev, true); /* GPU is no longer mapped to VM. So no interrupts will * be received and Mali registers have been replaced by * dummy RAM */ - WARN(!kbase_is_gpu_removed(kbdev), - "GPU is still available after GPU lost event\n"); + WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n"); /* Full GPU reset will have been done by hypervisor, so * cancel */ - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); kbase_synchronize_irqs(kbdev); /* Clear all jobs running on the GPU */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->protected_mode = false; +#if !MALI_USE_CSF kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); +#endif kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#if !MALI_USE_CSF /* Cancel any pending HWC dumps */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || - kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +#endif } mutex_unlock(&arb_vm_state->vm_state_lock); rt_mutex_unlock(&kbdev->pm.lock); @@ -1041,6 +1055,7 @@ int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Set the override flag to force the power up of L2 cache */ kbdev->pm.backend.gpu_wakeup_override = true; + kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE; kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1070,22 +1085,33 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); if (ret) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_warn( - kbdev->dev, - "Waiting for MCU to wake up failed on runtime suspend"); + dev_warn(kbdev->dev, "Waiting for MCU to wake up failed on runtime suspend"); kbdev->pm.backend.gpu_wakeup_override = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } - /* Check if a Doorbell mirror interrupt occurred meanwhile */ + /* Check if a Doorbell mirror interrupt occurred meanwhile. + * Also check if GPU idle work item is pending. If FW had sent the GPU idle notification + * after the wake up of MCU then it can be assumed that Userspace submission didn't make + * GPU non-idle, so runtime suspend doesn't need to be aborted. + */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_sleep_mode_active && - kbdev->pm.backend.exit_gpu_sleep_mode) { - dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend after L2 power up"); - kbdev->pm.backend.gpu_wakeup_override = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return -EBUSY; + if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode && + !work_pending(&kbdev->csf.scheduler.gpu_idle_work)) { + u32 glb_req = + kbase_csf_firmware_global_input_read(&kbdev->csf.global_iface, GLB_REQ); + u32 glb_ack = kbase_csf_firmware_global_output(&kbdev->csf.global_iface, GLB_ACK); + + /* Only abort the runtime suspend if GPU idle event is not pending */ + if (!((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK)) { + dev_dbg(kbdev->dev, + "DB mirror interrupt occurred during runtime suspend after L2 power up"); + kbdev->pm.backend.gpu_wakeup_override = false; + kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return -EBUSY; + } } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Need to release the kbdev->pm.lock to avoid lock ordering issue @@ -1105,8 +1131,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) /* After re-acquiring the kbdev->pm.lock, check if the device * became active (or active then idle) meanwhile. */ - if (kbdev->pm.active_count || - kbdev->pm.backend.poweroff_wait_in_progress) { + if (kbdev->pm.active_count || kbdev->pm.backend.poweroff_wait_in_progress) { dev_dbg(kbdev->dev, "Device became active on runtime suspend after suspending Scheduler"); ret = -EBUSY; @@ -1181,17 +1206,16 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev) * the fact that pm.lock is released before invoking Scheduler function * to suspend the CSGs. */ - if (kbdev->pm.active_count || - kbdev->pm.backend.poweroff_wait_in_progress) { + if (kbdev->pm.active_count || kbdev->pm.backend.poweroff_wait_in_progress) { dev_dbg(kbdev->dev, "Device became active on runtime suspend"); ret = -EBUSY; goto unlock; } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_sleep_mode_active && - kbdev->pm.backend.exit_gpu_sleep_mode) { - dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend before L2 power up"); + if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) { + dev_dbg(kbdev->dev, + "DB mirror interrupt occurred during runtime suspend before L2 power up"); ret = -EBUSY; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); goto unlock; @@ -1209,7 +1233,8 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev) /* Disable interrupts and turn off the GPU clocks */ if (!kbase_pm_clock_off(kbdev)) { - dev_warn(kbdev->dev, "Failed to turn off GPU clocks on runtime suspend, MMU faults pending"); + dev_warn(kbdev->dev, + "Failed to turn off GPU clocks on runtime suspend, MMU faults pending"); WARN_ON(!kbdev->poweroff_pending); /* Previous call to kbase_pm_clock_off() would have disabled diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index b02f77f..6dc9638 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,8 +37,7 @@ int kbase_pm_ca_init(struct kbase_device *kbdev) if (kbdev->current_core_mask) pm_backend->ca_cores_enabled = kbdev->current_core_mask; else - pm_backend->ca_cores_enabled = - kbdev->gpu_props.props.raw_props.shader_present; + pm_backend->ca_cores_enabled = kbdev->gpu_props.shader_present; #endif return 0; @@ -46,6 +45,7 @@ int kbase_pm_ca_init(struct kbase_device *kbdev) void kbase_pm_ca_term(struct kbase_device *kbdev) { + CSTD_UNUSED(kbdev); } #ifdef CONFIG_MALI_DEVFREQ @@ -70,13 +70,15 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) old_core_mask = pm_backend->ca_cores_enabled; #else if (!(core_mask & kbdev->pm.debug_core_mask_all)) { - dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", - core_mask, kbdev->pm.debug_core_mask_all); + dev_err(kbdev->dev, + "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); goto unlock; } if (kbase_dummy_job_wa_enabled(kbdev)) { - dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); + dev_err_once(kbdev->dev, + "Dynamic core scaling not supported as dummy job WA is enabled"); goto unlock; } #endif /* MALI_USE_CSF */ @@ -98,8 +100,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) } #endif - dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", - pm_backend->ca_cores_enabled); + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", pm_backend->ca_cores_enabled); return; unlock: @@ -108,13 +109,19 @@ unlock: KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask); #endif -u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) +u64 kbase_pm_ca_get_debug_core_mask(struct kbase_device *kbdev) { #if MALI_USE_CSF - u64 debug_core_mask = kbdev->pm.debug_core_mask; + return kbdev->pm.debug_core_mask; #else - u64 debug_core_mask = kbdev->pm.debug_core_mask_all; + return kbdev->pm.debug_core_mask_all; #endif +} +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_debug_core_mask); + +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) +{ + u64 debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -125,12 +132,10 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) * to limit it to be a subgroup of the curr config, otherwise the * shaders state machine on the PM does not evolve. */ - return kbdev->gpu_props.curr_config.shader_present & - kbdev->pm.backend.ca_cores_enabled & - debug_core_mask; + return kbdev->gpu_props.curr_config.shader_present & kbdev->pm.backend.ca_cores_enabled & + debug_core_mask; #else - return kbdev->gpu_props.curr_config.shader_present & - debug_core_mask; + return kbdev->gpu_props.curr_config.shader_present & debug_core_mask; #endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h index 90dcaf5..37d1020 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,6 +58,17 @@ void kbase_pm_ca_term(struct kbase_device *kbdev); u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); /** + * kbase_pm_ca_get_debug_core_mask - Get debug core mask. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the currently selected shader cores. + * + * Return: The bit mask of user-selected cores + */ +u64 kbase_pm_ca_get_debug_core_mask(struct kbase_device *kbdev); + +/** * kbase_pm_ca_update_core_status - Update core status * * @kbdev: The kbase device structure for the device (must be @@ -71,7 +82,7 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); * Calls into the core availability policy */ void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, - u64 cores_transitioning); + u64 cores_transitioning); /** * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h index d1e4b53..cc27739 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,4 +56,3 @@ extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); #endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ - diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.c b/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.c index f40b753..cd2d65b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,15 +52,15 @@ static void coarse_demand_term(struct kbase_device *kbdev) * and name. */ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { - "coarse_demand", /* name */ - coarse_demand_init, /* init */ - coarse_demand_term, /* term */ - coarse_demand_shaders_needed, /* shaders_needed */ - coarse_demand_get_core_active, /* get_core_active */ - NULL, /* handle_event */ - KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_shaders_needed, /* shaders_needed */ + coarse_demand_get_core_active, /* get_core_active */ + NULL, /* handle_event */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ #if MALI_USE_CSF - COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ + COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ #endif }; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 66ca0b6..9b17092 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,8 @@ #include "mali_kbase_pm_coarse_demand.h" #include "mali_kbase_pm_adaptive.h" +#include <hw_access/mali_kbase_hw_access_regmap.h> + #if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM) #define KBASE_PM_RUNTIME 1 #endif @@ -50,17 +52,22 @@ struct kbase_jd_atom; * - kbase_pm_get_present_cores() * - kbase_pm_get_active_cores() * - kbase_pm_get_trans_cores() - * - kbase_pm_get_ready_cores(). + * - kbase_pm_get_ready_cores() + * - kbase_pm_get_state() + * - core_type_to_reg() + * - pwr_cmd_constructor() + * - valid_to_power_up() + * - valid_to_power_down() + * - kbase_pm_invoke() * - * They specify which type of core should be acted on. These values are set in - * a manner that allows core_type_to_reg() function to be simpler and more - * efficient. + * They specify which type of core should be acted on. */ + enum kbase_pm_core_type { - KBASE_PM_CORE_L2 = L2_PRESENT_LO, - KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, - KBASE_PM_CORE_TILER = TILER_PRESENT_LO, - KBASE_PM_CORE_STACK = STACK_PRESENT_LO + KBASE_PM_CORE_L2 = GPU_CONTROL_ENUM(L2_PRESENT), + KBASE_PM_CORE_SHADER = GPU_CONTROL_ENUM(SHADER_PRESENT), + KBASE_PM_CORE_TILER = GPU_CONTROL_ENUM(TILER_PRESENT), + KBASE_PM_CORE_STACK = GPU_CONTROL_ENUM(STACK_PRESENT) }; /* @@ -68,7 +75,7 @@ enum kbase_pm_core_type { * state machine. */ enum kbase_l2_core_state { -#define KBASEP_L2_STATE(n) KBASE_L2_ ## n, +#define KBASEP_L2_STATE(n) KBASE_L2_##n, #include "mali_kbase_pm_l2_states.h" #undef KBASEP_L2_STATE }; @@ -78,7 +85,7 @@ enum kbase_l2_core_state { * enum kbase_mcu_state - The states used for the MCU state machine. */ enum kbase_mcu_state { -#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, +#define KBASEP_MCU_STATE(n) KBASE_MCU_##n, #include "mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE }; @@ -88,12 +95,27 @@ enum kbase_mcu_state { * enum kbase_shader_core_state - The states used for the shaders' state machine. */ enum kbase_shader_core_state { -#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, +#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_##n, #include "mali_kbase_pm_shader_states.h" #undef KBASEP_SHADER_STATE }; /** + * enum kbase_pm_runtime_suspend_abort_reason - Reason why runtime suspend was aborted + * after the wake up of MCU. + * + * @ABORT_REASON_NONE: Not aborted + * @ABORT_REASON_DB_MIRROR_IRQ: Runtime suspend was aborted due to DB_MIRROR irq. + * @ABORT_REASON_NON_IDLE_CGS: Runtime suspend was aborted as CSGs were detected as non-idle after + * their suspension. + */ +enum kbase_pm_runtime_suspend_abort_reason { + ABORT_REASON_NONE, + ABORT_REASON_DB_MIRROR_IRQ, + ABORT_REASON_NON_IDLE_CGS +}; + +/** * struct kbasep_pm_metrics - Metrics data collected for use by the power * management framework. * @@ -299,9 +321,6 @@ struct kbase_pm_event_log { * states and transitions. * @cg1_disabled: Set if the policy wants to keep the second core group * powered off - * @driver_ready_for_irqs: Debug state indicating whether sufficient - * initialization of the driver has occurred to handle - * IRQs * @metrics: Structure to hold metrics for the GPU * @shader_tick_timer: Structure to hold the shader poweroff tick timer state * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. @@ -347,6 +366,8 @@ struct kbase_pm_event_log { * @callback_power_off_sc_rails: Callback invoked to turn off the shader core * power rails. See &struct kbase_pm_callback_conf. * @ca_cores_enabled: Cores that are currently available + * @apply_hw_issue_TITANHW_2938_wa: Indicates if the workaround for BASE_HW_ISSUE_TITANHW_2938 + * needs to be applied when unmapping memory from GPU. * @mcu_state: The current state of the micro-control unit, only applicable * to GPUs that have such a component * @l2_state: The current state of the L2 cache state machine. See @@ -415,6 +436,17 @@ struct kbase_pm_event_log { * mode for the saving the HW state before power down. * @db_mirror_interrupt_enabled: Flag tracking if the Doorbell mirror interrupt * is enabled or not. + * @runtime_suspend_abort_reason: Tracks if the runtime suspend was aborted, + * after the wake up of MCU, due to the DB_MIRROR irq + * or non-idle CSGs. Tracking is done to avoid + * redundant transition of MCU to sleep state after the + * abort of runtime suspend and before the resumption + * of scheduling. + * @l2_force_off_after_mcu_halt: Flag to indicate that L2 cache power down is + * must after performing the MCU halt. Flag is set + * immediately after the MCU halt and cleared + * after the L2 cache power down. MCU can't be + * re-enabled whilst the flag is set. * @in_reset: True if a GPU is resetting and normal power manager operation is * suspended * @partial_shaderoff: True if we want to partial power off shader cores, @@ -480,10 +512,6 @@ struct kbase_pm_backend_data { bool cg1_disabled; -#ifdef CONFIG_MALI_DEBUG - bool driver_ready_for_irqs; -#endif /* CONFIG_MALI_DEBUG */ - struct kbasep_pm_metrics_state metrics; struct kbasep_pm_tick_timer_state shader_tick_timer; @@ -516,6 +544,7 @@ struct kbase_pm_backend_data { u64 ca_cores_enabled; #if MALI_USE_CSF + bool apply_hw_issue_TITANHW_2938_wa; enum kbase_mcu_state mcu_state; #endif enum kbase_l2_core_state l2_state; @@ -542,7 +571,10 @@ struct kbase_pm_backend_data { bool gpu_idled; bool gpu_wakeup_override; bool db_mirror_interrupt_enabled; + enum kbase_pm_runtime_suspend_abort_reason runtime_suspend_abort_reason; #endif + + bool l2_force_off_after_mcu_halt; #endif bool l2_desired; bool l2_always_on; @@ -573,16 +605,16 @@ struct kbase_pm_backend_data { #if MALI_USE_CSF /* CSF PM flag, signaling that the MCU shader Core should be kept on */ -#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) +#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) /* CSF PM flag, signaling no scheduler suspension on idle groups */ #define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) /* CSF PM flag, signaling no scheduler suspension on no runnable groups */ #define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2) /* The following flags corresponds to existing defined PM policies */ -#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \ - CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ - CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) +#define ALWAYS_ON_PM_SCHED_FLAGS \ + (CSF_DYNAMIC_PM_CORE_KEEP_ON | CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ + CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) #define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0) #define ADAPTIVE_PM_SCHED_FLAGS (0) #if !MALI_CUSTOMER_RELEASE @@ -624,7 +656,7 @@ enum kbase_pm_policy_event { * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not * become active before the Shader Tick Timer timeout occurred. */ - KBASE_PM_POLICY_EVENT_TIMER_MISS, + KBASE_PM_POLICY_EVENT_TIMER_MISS }; /** @@ -708,8 +740,7 @@ struct kbase_pm_policy { * valid pointer) * @event: The id of the power event that has occurred */ - void (*handle_event)(struct kbase_device *kbdev, - enum kbase_pm_policy_event event); + void (*handle_event)(struct kbase_device *kbdev, enum kbase_pm_policy_event event); enum kbase_pm_policy_id id; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 7c891c1..bd592a1 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -25,7 +25,7 @@ #include <mali_kbase.h> #include <mali_kbase_config_defaults.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> #include <tl/mali_kbase_tracepoints.h> #include <mali_kbase_pm.h> #include <mali_kbase_config_defaults.h> @@ -65,10 +65,10 @@ bool corestack_driver_control; /* Default value of 0/false */ #endif module_param(corestack_driver_control, bool, 0444); MODULE_PARM_DESC(corestack_driver_control, - "Let the driver power on/off the GPU core stack independently " - "without involving the Power Domain Controller. This should " - "only be enabled on platforms for which integration of the PDC " - "to the Mali GPU is known to be problematic."); + "Let the driver power on/off the GPU core stack independently " + "without involving the Power Domain Controller. This should " + "only be enabled on platforms for which integration of the PDC " + "to the Mali GPU is known to be problematic."); KBASE_EXPORT_TEST_API(corestack_driver_control); /** @@ -86,22 +86,21 @@ KBASE_EXPORT_TEST_API(corestack_driver_control); * and more efficient. */ enum kbasep_pm_action { - ACTION_PRESENT = 0, - ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), - ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), - ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), - ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), - ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) + ACTION_PRESENT, + ACTION_READY, + ACTION_PWRON, + ACTION_PWROFF, + ACTION_PWRTRANS, + ACTION_PWRACTIVE }; -static u64 kbase_pm_get_state( - struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - enum kbasep_pm_action action); +static u64 kbase_pm_get_state(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev); #if MALI_USE_CSF + bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -109,12 +108,15 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) if (unlikely(!kbdev->csf.firmware_inited)) return false; - if (kbdev->csf.scheduler.pm_active_count && - kbdev->pm.backend.mcu_desired) + if (kbdev->pm.backend.l2_force_off_after_mcu_halt) + return false; + + if (kbdev->csf.scheduler.pm_active_count && kbdev->pm.backend.mcu_desired) return true; #ifdef KBASE_PM_RUNTIME - if (kbdev->pm.backend.gpu_wakeup_override) + if (kbdev->pm.backend.gpu_wakeup_override || + kbdev->pm.backend.runtime_suspend_abort_reason != ABORT_REASON_NONE) return true; #endif @@ -123,8 +125,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) * unless policy changing transition needs it off. */ - return (kbdev->pm.backend.mcu_desired && - kbase_pm_no_mcu_core_pwroff(kbdev) && + return (kbdev->pm.backend.mcu_desired && kbase_pm_no_mcu_core_pwroff(kbdev) && !kbdev->pm.backend.policy_change_clamp_state_to_off); } #endif @@ -136,11 +137,10 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) return false; if (kbdev->pm.backend.protected_transition_override && - kbdev->pm.backend.protected_l2_override) + kbdev->pm.backend.protected_l2_override) return true; - if (kbdev->pm.backend.protected_transition_override && - !kbdev->pm.backend.shaders_desired) + if (kbdev->pm.backend.protected_transition_override && !kbdev->pm.backend.shaders_desired) return false; #else if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off)) @@ -174,8 +174,7 @@ int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev) WARN_ON(!kbdev->protected_mode_transition); - if (kbdev->pm.backend.l2_always_on && - (kbdev->system_coherency == COHERENCY_ACE)) { + if (kbdev->pm.backend.l2_always_on && (kbdev->system_coherency == COHERENCY_ACE)) { WARN_ON(kbdev->pm.backend.protected_entry_transition_override); /* @@ -198,8 +197,7 @@ void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev) WARN_ON(!kbdev->protected_mode_transition); - if (kbdev->pm.backend.l2_always_on && - (kbdev->system_coherency == COHERENCY_ACE)) { + if (kbdev->pm.backend.l2_always_on && (kbdev->system_coherency == COHERENCY_ACE)) { WARN_ON(!kbdev->pm.backend.protected_entry_transition_override); kbdev->pm.backend.protected_entry_transition_override = false; @@ -222,9 +220,38 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) } #endif + +#define ACTION_TYPE_TO_REG_GPU_CONTROL(core_name) \ + { \ + switch (action) { \ + case ACTION_PRESENT: \ + reg = GPU_CONTROL_ENUM(core_name##_PRESENT); \ + break; \ + case ACTION_READY: \ + reg = GPU_CONTROL_ENUM(core_name##_READY); \ + break; \ + case ACTION_PWRON: \ + reg = GPU_CONTROL_ENUM(core_name##_PWRON); \ + break; \ + case ACTION_PWROFF: \ + reg = GPU_CONTROL_ENUM(core_name##_PWROFF); \ + break; \ + case ACTION_PWRTRANS: \ + reg = GPU_CONTROL_ENUM(core_name##_PWRTRANS); \ + break; \ + case ACTION_PWRACTIVE: \ + reg = GPU_CONTROL_ENUM(core_name##_PWRACTIVE); \ + break; \ + default: \ + dev_err(kbdev->dev, "Invalid action"); \ + break; \ + } \ + } + /** * core_type_to_reg - Decode a core type and action to a register. * + * @kbdev: The kbase device for the core * @core_type: The type of core * @action: The type of action * @@ -236,37 +263,104 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) * Return: The register offset of the _LO register that performs an action of * type @action on a core of type @core_type. */ -static u32 core_type_to_reg(enum kbase_pm_core_type core_type, - enum kbasep_pm_action action) -{ - if (corestack_driver_control) { - if (core_type == KBASE_PM_CORE_STACK) { - switch (action) { - case ACTION_PRESENT: - return STACK_PRESENT_LO; - case ACTION_READY: - return STACK_READY_LO; - case ACTION_PWRON: - return STACK_PWRON_LO; - case ACTION_PWROFF: - return STACK_PWROFF_LO; - case ACTION_PWRTRANS: - return STACK_PWRTRANS_LO; - default: - WARN(1, "Invalid action for core type\n"); +static u32 core_type_to_reg(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + u32 reg = 0x0; + + { + switch (core_type) { + case KBASE_PM_CORE_L2: + ACTION_TYPE_TO_REG_GPU_CONTROL(L2); + break; + case KBASE_PM_CORE_SHADER: + ACTION_TYPE_TO_REG_GPU_CONTROL(SHADER); + break; + case KBASE_PM_CORE_TILER: + ACTION_TYPE_TO_REG_GPU_CONTROL(TILER); + break; + case KBASE_PM_CORE_STACK: + if (corestack_driver_control) { + switch (action) { + case ACTION_PRESENT: + reg = GPU_CONTROL_ENUM(STACK_PRESENT); + break; + case ACTION_READY: + reg = GPU_CONTROL_ENUM(STACK_READY); + break; + case ACTION_PWRON: + reg = GPU_CONTROL_ENUM(STACK_PWRON); + break; + case ACTION_PWROFF: + reg = GPU_CONTROL_ENUM(STACK_PWROFF); + break; + case ACTION_PWRTRANS: + reg = GPU_CONTROL_ENUM(STACK_PWRTRANS); + break; + default: + dev_err(kbdev->dev, "Invalid action for core stack"); + } } + break; + default: + dev_err(kbdev->dev, "Invalid core type for gpu control"); + break; } } - return (u32)core_type + (u32)action; + WARN_ON(!reg); + return reg; } -#if IS_ENABLED(CONFIG_ARM64) +#if !MALI_USE_CSF +/** + * map_core_type_to_tl_pm_state - Map core type to TL_PM_STATE. + * + * @kbdev: The kbase device for the core + * @core_type: The type of core + * + * Given a core_type (defined by kbase_pm_core_type) this function will return + * TL_PM_STATE_*, which is a mapping of core_type to respective core type timeline value. + * + * Return: Core type timeline value. + */ +__pure static u32 map_core_type_to_tl_pm_state(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type) +{ + u32 map = 0; + + switch (core_type) { + case KBASE_PM_CORE_L2: + map = TL_PM_STATE_L2; + break; + case KBASE_PM_CORE_SHADER: + map = TL_PM_STATE_SHADER; + break; + case KBASE_PM_CORE_TILER: + map = TL_PM_STATE_TILER; + break; + case KBASE_PM_CORE_STACK: + if (corestack_driver_control) + map = TL_PM_STATE_STACK; + break; + default: + dev_err(kbdev->dev, "Invalid core type"); + } + + /* Core stack might not change default value */ + WARN_ON(!map); + return map; +} +#endif + +#if IS_ENABLED(CONFIG_ARM64) && !MALI_USE_CSF + static void mali_cci_flush_l2(struct kbase_device *kbdev) { + u32 val; const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; - u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - u32 raw; + const u32 timeout_us = + kbase_get_timeout_ms(kbdev, KBASE_CLEAN_CACHE_TIMEOUT) * USEC_PER_MSEC; /* * Note that we don't take the cache flush mutex here since @@ -276,22 +370,17 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) * to be called from. */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CACHE_CLN_INV_L2); - - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_CACHE_CLN_INV_L2); /* Wait for cache flush to complete before continuing, exit on * gpu resets or loop expiry. */ - while (((raw & mask) == 0) && --loops) { - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); - } + kbase_reg_poll32_timeout(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT), val, val & mask, 0, + timeout_us, false); } #endif + /** * kbase_pm_invoke - Invokes an action on a core set * @@ -301,24 +390,18 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) * @action: The action to perform on the cores * * This function performs the action given by @action on a set of cores of a - * type given by @core_type. It is a static function used by - * kbase_pm_transition_core_type() + * type given by @core_type. */ -static void kbase_pm_invoke(struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - u64 cores, - enum kbasep_pm_action action) +static void kbase_pm_invoke(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + u64 cores, enum kbasep_pm_action action) { u32 reg; - u32 lo = cores & 0xFFFFFFFF; - u32 hi = (cores >> 32) & 0xFFFFFFFF; lockdep_assert_held(&kbdev->hwaccess_lock); - reg = core_type_to_reg(core_type, action); - - KBASE_DEBUG_ASSERT(reg); + reg = core_type_to_reg(kbdev, core_type, action); +#if !MALI_USE_CSF if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -326,9 +409,12 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state |= cores; else if (action == ACTION_PWROFF) state &= ~cores; - KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); + + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, map_core_type_to_tl_pm_state(kbdev, core_type), + state); } +#endif /* Tracing */ if (cores) { if (action == ACTION_PWRON) @@ -363,18 +449,13 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, } } - if (kbase_dummy_job_wa_enabled(kbdev) && - action == ACTION_PWRON && + if (kbase_dummy_job_wa_enabled(kbdev) && action == ACTION_PWRON && core_type == KBASE_PM_CORE_SHADER && - !(kbdev->dummy_job_wa.flags & - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { + !(kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { kbase_dummy_job_wa_execute(kbdev, cores); - } else { - if (lo != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); - if (hi != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); } + else + kbase_reg_write64(kbdev, reg, cores); } /** @@ -391,49 +472,40 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, * * Return: A bit mask specifying the state of the cores */ -static u64 kbase_pm_get_state(struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - enum kbasep_pm_action action) +static u64 kbase_pm_get_state(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) { - u32 reg; - u32 lo, hi; - - reg = core_type_to_reg(core_type, action); + u32 reg = core_type_to_reg(kbdev, core_type, action); - KBASE_DEBUG_ASSERT(reg); - - lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); - hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); - - return (((u64) hi) << 32) | ((u64) lo); + return kbase_reg_read64(kbdev, reg); } /** * kbase_pm_get_present_cores - Get the cores that are present * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of the cores that are present */ -u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { - KBASE_DEBUG_ASSERT(kbdev != NULL); + if (WARN_ON(!kbdev)) + return 0; - switch (type) { + switch (core_type) { case KBASE_PM_CORE_L2: return kbdev->gpu_props.curr_config.l2_present; case KBASE_PM_CORE_SHADER: return kbdev->gpu_props.curr_config.shader_present; case KBASE_PM_CORE_TILER: - return kbdev->gpu_props.props.raw_props.tiler_present; + return kbdev->gpu_props.tiler_present; case KBASE_PM_CORE_STACK: - return kbdev->gpu_props.props.raw_props.stack_present; + return kbdev->gpu_props.stack_present; default: break; } - KBASE_DEBUG_ASSERT(0); + WARN_ON(1); return 0; } @@ -445,14 +517,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); * (busy processing work) * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of cores that are active */ -u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { - return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); + return kbase_pm_get_state(kbdev, core_type, ACTION_PWRACTIVE); } KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); @@ -462,14 +533,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); * power states * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of cores that are transitioning */ -u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { - return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); + return kbase_pm_get_state(kbdev, core_type, ACTION_PWRTRANS); } KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); @@ -478,18 +548,17 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); * kbase_pm_get_ready_cores - Get the cores that are powered on * * @kbdev: Kbase device - * @type: The type of cores to query + * @core_type: The type of cores to query * * Return: Bitmask of cores that are ready (powered on) */ -u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type) { u64 result; - result = kbase_pm_get_state(kbdev, type, ACTION_READY); + result = kbase_pm_get_state(kbdev, core_type, ACTION_READY); - switch (type) { + switch (core_type) { case KBASE_PM_CORE_SHADER: KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); break; @@ -521,8 +590,7 @@ static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) backend->hwcnt_disabled = true; } else { - kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, - &backend->hwcnt_disable_work); + kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, &backend->hwcnt_disable_work); } } @@ -538,9 +606,9 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) #if MALI_USE_CSF if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), - L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); + val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG), + L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); } #endif /* MALI_USE_CSF */ @@ -552,7 +620,7 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) (!kbdev->l2_hash_values_override)) return; - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG)); if (kbdev->l2_size_override) { val &= ~L2_CONFIG_SIZE_MASK; @@ -560,26 +628,29 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) } if (kbdev->l2_hash_override) { - WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); + WARN_ON(kbase_hw_has_l2_slice_hash_feature(kbdev)); val &= ~L2_CONFIG_HASH_MASK; val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); } else if (kbdev->l2_hash_values_override) { - int i; +#if MALI_USE_CSF + uint i; - WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); - val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK; - val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT); + WARN_ON(!kbase_hw_has_l2_slice_hash_feature(kbdev)); - for (i = 0; i < ASN_HASH_COUNT; i++) { - dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n", + val &= ~L2_CONFIG_L2_SLICE_HASH_ENABLE_MASK; + val |= (0x1 << L2_CONFIG_L2_SLICE_HASH_ENABLE_SHIFT); + for (i = 0; i < GPU_L2_SLICE_HASH_COUNT; i++) { + /* L2_SLICE_HASH and ASN_HASH alias each other */ + dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%u]\n", kbdev->l2_hash_values[i], i); - kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)), - kbdev->l2_hash_values[i]); + kbase_reg_write32(kbdev, GPU_L2_SLICE_HASH_OFFSET(i), + kbdev->l2_hash_values[i]); } +#endif /* MALI_USE_CSF */ } dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG), val); } static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) @@ -605,18 +676,20 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) return strings[state]; } -static -void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) +static void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) { #if KBASE_KTRACE_ENABLE switch (state) { -#define KBASEP_MCU_STATE(n) \ - case KBASE_MCU_ ## n: \ - KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \ +#define KBASEP_MCU_STATE(n) \ + case KBASE_MCU_##n: \ + KBASE_KTRACE_ADD(kbdev, PM_MCU_##n, NULL, state); \ break; #include "mali_kbase_pm_mcu_states.h" #undef KBASEP_MCU_STATE } +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(state); #endif } @@ -636,21 +709,19 @@ static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbd if (unlikely(kbdev->csf.firmware_hctl_core_pwr)) return false; - core_mask_update = - backend->shaders_avail != backend->shaders_desired_mask; + core_mask_update = backend->shaders_avail != backend->shaders_desired_mask; timer_update = kbdev->csf.mcu_core_pwroff_dur_count != - kbdev->csf.mcu_core_pwroff_reg_shadow; + kbdev->csf.mcu_core_pwroff_reg_shadow; if (core_mask_update || timer_update) - kbase_csf_firmware_update_core_attr(kbdev, timer_update, - core_mask_update, backend->shaders_desired_mask); + kbase_csf_firmware_update_core_attr(kbdev, timer_update, core_mask_update, + backend->shaders_desired_mask); return (core_mask_update || timer_update); } -bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, - enum kbase_mcu_state state) +bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -669,12 +740,12 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, */ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) { - u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_CONTROL)); + u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL)); lockdep_assert_held(&kbdev->hwaccess_lock); val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), val); } /** @@ -687,20 +758,20 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) */ static void wait_mcu_as_inactive(struct kbase_device *kbdev) { - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - + u32 val; + int err; + const u32 timeout_us = + kbase_get_timeout_ms(kbdev, KBASE_AS_INACTIVE_TIMEOUT) * USEC_PER_MSEC; lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) return; /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ - while (--max_loops && - kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) & - AS_STATUS_AS_ACTIVE_INT) - ; - - if (!WARN_ON_ONCE(max_loops == 0)) + err = kbase_reg_poll32_timeout(kbdev, MMU_AS_OFFSET(MCU_AS_NR, STATUS), val, + !(val & AS_STATUS_AS_ACTIVE_INT_MASK), 10, timeout_us, + false); + if (!WARN_ON_ONCE(err == -ETIMEDOUT)) return; dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR); @@ -728,7 +799,8 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en lockdep_assert_held(&kbdev->hwaccess_lock); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + + irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS /* For IFPO, we require the POWER_CHANGED_ALL interrupt to be always on */ @@ -736,14 +808,83 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en #endif if (enable) { irq_mask |= POWER_CHANGED_ALL; - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); } else { irq_mask &= ~POWER_CHANGED_ALL; } - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), irq_mask); } + +#if MALI_USE_CSF +/** + * hctl_shader_cores_active - Check the shader cores are active + * + * @kbdev: Pointer to the device. + * @shaders_ready: Bitmask of shader cores that are ready. + * + * This function is called to check if the required shader cores are active. + * + * Return: true if required shader cores are active, otherwise false. + */ +static bool hctl_shader_cores_active(struct kbase_device *kbdev, u64 shaders_ready) +{ + const u64 shaders_active = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); + const u64 shaders_to_disable = shaders_ready & ~kbdev->pm.backend.shaders_desired_mask; + bool cores_are_active = !!(shaders_to_disable & shaders_active); + + + return cores_are_active; +} + +/** + * hctl_shader_cores_power_up_done - Check the shader cores are powered up. + * + * @kbdev: Pointer to the device + * @shaders_ready: Bitmask of shader cores that are ready. + * @shaders_trans: Bitmask of shader cores that are transitioning. + * @shaders_avail: Bitmask of shader cores that are allowed to be used. + * + * This function is called to check if the required number of shader cores have been + * powered up. + * + * Return: true if power up is complete for required shader cores, otherwise false. + */ +static bool hctl_shader_cores_power_up_done(struct kbase_device *kbdev, u64 shaders_ready, + u64 shaders_trans, u64 shaders_avail) +{ + if (shaders_trans || shaders_ready != shaders_avail) + return false; + + + return true; +} + +/* + * hctl_shader_cores_power_down_done - Check the shader cores are powered down + * + * @kbdev: Pointer to the device + * @shaders_ready: Bitmask of shader cores that are ready. + * @shaders_trans: Bitmask of shader cores that are transitioning. + * @shaders_avail: Bitmask of shader cores that are allowed to be used. + * + * This function is called to check if the required number of shader cores have been + * powered down. + * + * Return: true if power down is complete for required shader cores, otherwise false. + */ +static bool hctl_shader_cores_power_down_done(struct kbase_device *kbdev, u64 shaders_ready, + u64 shaders_trans, u64 shaders_avail) +{ + if (shaders_trans || shaders_ready != shaders_avail) + return false; + + + return true; +} +#endif /* MALI_USE_CSF */ + static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -783,21 +924,18 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_PEND_ON_RELOAD: if (kbdev->csf.firmware_reloaded) { - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); kbase_csf_firmware_global_reinit(kbdev, - backend->shaders_desired_mask); + backend->shaders_desired_mask); if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, false); - backend->mcu_state = - KBASE_MCU_ON_GLB_REINIT_PEND; + backend->mcu_state = KBASE_MCU_ON_GLB_REINIT_PEND; } break; case KBASE_MCU_ON_GLB_REINIT_PEND: if (kbase_csf_firmware_global_reinit_complete(kbdev)) { - backend->shaders_avail = - backend->shaders_desired_mask; + backend->shaders_avail = backend->shaders_desired_mask; backend->pm_shaders_core_mask = 0; if (kbdev->csf.firmware_hctl_core_pwr) { #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS @@ -809,9 +947,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) } #endif kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - backend->shaders_avail, ACTION_PWRON); - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_ON; + backend->shaders_avail, ACTION_PWRON); + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } else backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; #if IS_ENABLED(CONFIG_MALI_CORESIGHT) @@ -829,16 +966,15 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; case KBASE_MCU_HCTL_SHADERS_PEND_ON: - if (!shaders_trans && - shaders_ready == backend->shaders_avail) { - /* Cores now stable, notify MCU the stable mask */ - kbase_csf_firmware_update_core_attr(kbdev, - false, true, shaders_ready); + if (!hctl_shader_cores_power_up_done(kbdev, shaders_ready, shaders_trans, + backend->shaders_avail)) + break; - backend->pm_shaders_core_mask = shaders_ready; - backend->mcu_state = - KBASE_MCU_HCTL_CORES_NOTIFY_PEND; - } + /* Cores now stable, notify MCU the stable mask */ + kbase_csf_firmware_update_core_attr(kbdev, false, true, shaders_ready); + + backend->pm_shaders_core_mask = shaders_ready; + backend->mcu_state = KBASE_MCU_HCTL_CORES_NOTIFY_PEND; break; case KBASE_MCU_HCTL_CORES_NOTIFY_PEND: @@ -853,6 +989,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_backend_csf_set_hw_availability( + &kbdev->hwcnt_gpu_iface, + kbdev->gpu_props.curr_config.l2_slices, + kbdev->gpu_props.curr_config.shader_present & + kbdev->pm.debug_core_mask); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; @@ -871,8 +1012,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); - backend->mcu_state = - KBASE_MCU_HCTL_MCU_ON_RECHECK; + backend->mcu_state = KBASE_MCU_HCTL_MCU_ON_RECHECK; } } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; @@ -908,16 +1048,14 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_ON; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } else if (~backend->shaders_desired_mask & shaders_ready) { kbase_csf_firmware_update_core_attr(kbdev, false, true, backend->shaders_desired_mask); backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND; } else { - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_ON; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } break; @@ -929,30 +1067,25 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) } break; - case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: - { - u64 active_cores = kbase_pm_get_active_cores( - kbdev, - KBASE_PM_CORE_SHADER); - u64 cores_to_disable = shaders_ready & - ~backend->shaders_desired_mask; + case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: { + if (hctl_shader_cores_active(kbdev, shaders_ready)) + break; - if (!(cores_to_disable & active_cores)) { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - cores_to_disable, - ACTION_PWROFF); - backend->shaders_avail = backend->shaders_desired_mask; - backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; - } - } - break; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready & ~backend->shaders_desired_mask, + ACTION_PWROFF); + backend->shaders_avail = backend->shaders_desired_mask; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; + } break; case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND: - if (!shaders_trans && shaders_ready == backend->shaders_avail) { - /* Cores now stable */ - backend->pm_shaders_core_mask = shaders_ready; - backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; - } + if (!hctl_shader_cores_power_down_done(kbdev, shaders_ready, shaders_trans, + backend->shaders_avail)) + break; + + /* Cores now stable */ + backend->pm_shaders_core_mask = shaders_ready; + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; break; case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: @@ -972,7 +1105,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); - if (backend->hwcnt_disabled) { #ifdef KBASE_PM_RUNTIME if (backend->gpu_sleep_mode_active) @@ -1026,34 +1158,40 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; case KBASE_MCU_ON_PEND_HALT: - if (kbase_csf_firmware_mcu_halted(kbdev)) { + if (kbase_csf_firmware_mcu_halt_req_complete(kbdev)) { KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, - kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); if (kbdev->csf.firmware_hctl_core_pwr) - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_READY_OFF; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_READY_OFF; else backend->mcu_state = KBASE_MCU_POWER_DOWN; } break; case KBASE_MCU_HCTL_SHADERS_READY_OFF: - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready, ACTION_PWROFF); - backend->mcu_state = - KBASE_MCU_HCTL_SHADERS_PEND_OFF; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready, ACTION_PWROFF); + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_OFF; break; case KBASE_MCU_HCTL_SHADERS_PEND_OFF: - if (!shaders_trans && !shaders_ready) { - backend->pm_shaders_core_mask = 0; - backend->mcu_state = KBASE_MCU_POWER_DOWN; - } + if (!hctl_shader_cores_power_down_done(kbdev, shaders_ready, shaders_trans, + 0)) + break; + + backend->pm_shaders_core_mask = 0; + backend->mcu_state = KBASE_MCU_POWER_DOWN; break; case KBASE_MCU_POWER_DOWN: - kbase_csf_firmware_disable_mcu(kbdev); - backend->mcu_state = KBASE_MCU_PEND_OFF; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2922)) { + if (!kbdev->csf.firmware_hctl_core_pwr) + kbasep_pm_toggle_power_interrupt(kbdev, true); + backend->mcu_state = KBASE_MCU_OFF; + backend->l2_force_off_after_mcu_halt = true; + } else { + kbase_csf_firmware_disable_mcu(kbdev); + backend->mcu_state = KBASE_MCU_PEND_OFF; + } break; case KBASE_MCU_PEND_OFF: @@ -1075,7 +1213,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_SLEEP: if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, - kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); backend->mcu_state = KBASE_MCU_IN_SLEEP; kbase_pm_enable_db_mirror_interrupt(kbdev); kbase_csf_scheduler_reval_idleness_post_sleep(kbdev); @@ -1084,12 +1222,21 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) */ if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, true); + /* If Scheduler's PM refcount is not zero then the early wakeup + * on reaching the sleep state can be skipped as the waiting thread + * (like Scheduler kthread) would be interested in MCU being + * turned ON. + * In the more regular flow, the refcount is very likely to be zero + * and there would be no waiters. The wake_up() call won't have an + * effect if there are no waiters. + */ + if (likely(!kbdev->csf.scheduler.pm_active_count)) + wake_up(&backend->gpu_in_desired_state_wait); } break; case KBASE_MCU_IN_SLEEP: - if (kbase_pm_is_mcu_desired(kbdev) && - backend->l2_state == KBASE_L2_ON) { + if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { wait_mcu_as_inactive(kbdev); KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( kbdev, kbase_backend_get_cycle_cnt(kbdev)); @@ -1117,8 +1264,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) break; default: - WARN(1, "Invalid state in mcu_state: %d", - backend->mcu_state); + WARN(1, "Invalid state in mcu_state: %d", backend->mcu_state); } if (backend->mcu_state != prev_state) { @@ -1149,11 +1295,9 @@ static void core_idle_worker(struct work_struct *work) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) { const unsigned int core_inactive_wait_ms = 1; - u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); - u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask; - if (!(cores_to_disable & active_cores)) { + if (!hctl_shader_cores_active(kbdev, shaders_ready)) { kbase_pm_update_state(kbdev); break; } @@ -1205,18 +1349,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) return strings[state]; } -static -void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state) +static void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, + enum kbase_l2_core_state state) { #if KBASE_KTRACE_ENABLE switch (state) { -#define KBASEP_L2_STATE(n) \ - case KBASE_L2_ ## n: \ - KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \ +#define KBASEP_L2_STATE(n) \ + case KBASE_L2_##n: \ + KBASE_KTRACE_ADD(kbdev, PM_L2_##n, NULL, state); \ break; #include "mali_kbase_pm_l2_states.h" #undef KBASEP_L2_STATE } +#else + CSTD_UNUSED(kbdev); + CSTD_UNUSED(state); #endif } @@ -1250,30 +1397,25 @@ static void wait_as_active_int(struct kbase_device *kbdev) { #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) int as_no; - + u32 val; + int err; + const u32 timeout_us = + kbase_get_timeout_ms(kbdev, KBASE_AS_INACTIVE_TIMEOUT) * USEC_PER_MSEC; lockdep_assert_held(&kbdev->hwaccess_lock); - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) return; for (as_no = 0; as_no != kbdev->nr_hw_address_spaces; as_no++) { - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS. * The wait is actually needed only for the enabled AS. */ - while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(as_no, AS_STATUS)) & - AS_STATUS_AS_ACTIVE_INT) - ; -#ifdef CONFIG_MALI_DEBUG - /* For a disabled AS the loop should run for a single iteration only. */ - if (!kbdev->as_to_kctx[as_no] && (max_loops != (KBASE_AS_INACTIVE_MAX_LOOPS -1))) - dev_warn(kbdev->dev, "AS_ACTIVE_INT bit found to be set for disabled AS %d", as_no); -#endif - - if (max_loops) + err = kbase_reg_poll32_timeout(kbdev, MMU_AS_OFFSET(as_no, STATUS), val, + !(val & AS_STATUS_AS_ACTIVE_INT_MASK), 10, timeout_us, + false); + if (!WARN_ON_ONCE(err == -ETIMEDOUT)) continue; dev_warn(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d", as_no); @@ -1287,15 +1429,8 @@ static void wait_as_active_int(struct kbase_device *kbdev) static bool can_power_down_l2(struct kbase_device *kbdev) { -#if MALI_USE_CSF - /* Due to the HW issue GPU2019-3878, need to prevent L2 power off - * whilst MMU command is in progress. - * Also defer the power-down if MMU is in process of page migration. - */ - return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; -#else + /* Defer the power-down if MMU is in process of page migration. */ return !kbdev->mmu_page_migrate_in_progress; -#endif } static bool can_power_up_l2(struct kbase_device *kbdev) @@ -1314,15 +1449,61 @@ static bool need_tiler_control(struct kbase_device *kbdev) else return false; #else + CSTD_UNUSED(kbdev); return true; #endif } +/** + * hctl_l2_power_down - Initiate power down of L2 cache + * + * @kbdev: The kbase device structure for the device. + * + * This function initiates the power down of L2 cache when Host controls the power + * for Tiler block. The function expects that power down of Tiler to already have + * been initiated and it triggers the L2 power down only after the power down for + * Tiler is complete. + * The function shall be called only if L2 is in ready state. + */ +static void hctl_l2_power_down(struct kbase_device *kbdev) +{ +} + +/** + * hctl_tiler_power_up_done - Check and/or initiate power up of Tiler + * + * @kbdev: The kbase device structure for the device. + * + * This function initiates the power up of Tiler, when Host controls the power + * for Tiler block, but only if the caller hasn't already triggered the power up + * of Tiler. + * The function shall be called only if L2 is in ready state. + * + * Return: true if power up is complete for Tiler, otherwise false. + */ +static bool hctl_tiler_power_up_done(struct kbase_device *kbdev) +{ + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_TILER); + const u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + + tiler_trans &= ~tiler_ready; + if (tiler_trans) + return false; + + if (!tiler_ready) { + return false; + } + + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); + return true; +} + + static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; u64 l2_present = kbdev->gpu_props.curr_config.l2_present; - u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; + u64 tiler_present = kbdev->gpu_props.tiler_present; bool l2_power_up_done; enum kbase_l2_core_state prev_state; @@ -1330,10 +1511,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) do { /* Get current state */ - u64 l2_trans = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2); - u64 l2_ready = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2); + u64 l2_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); #ifdef CONFIG_MALI_ARBITER_SUPPORT /* @@ -1341,8 +1520,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * are vulnerable to corruption if gpu is lost */ if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { - backend->shaders_state = - KBASE_SHADERS_OFF_CORESTACK_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) { /* Don't progress until hw counters are disabled @@ -1352,10 +1530,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * the hardware. This step is needed to keep the HW * counters in a consistent state after a GPU lost. */ - backend->l2_state = - KBASE_L2_ON_HWCNT_DISABLE; + backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL, - backend->l2_state); + backend->l2_state); kbase_pm_trigger_hwcnt_disable(kbdev); } @@ -1384,7 +1561,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) /* Enable HW timer of IPA control before * L2 cache is powered-up. */ - kbase_ipa_control_handle_gpu_sleep_exit(kbdev); + { + kbase_ipa_control_handle_gpu_sleep_exit(kbdev); + } #endif /* * Set the desired config for L2 before @@ -1400,9 +1579,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * L2 cache. */ if (need_tiler_control(kbdev)) { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, - ACTION_PWRON); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); } else { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, ACTION_PWRON); } @@ -1411,8 +1591,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * must power them on explicitly. */ if (l2_present != 1) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present & ~1, + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present & ~1ULL, ACTION_PWRON); /* Clear backend slot submission kctx */ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); @@ -1425,18 +1604,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) l2_power_up_done = false; if (!l2_trans && l2_ready == l2_present) { if (need_tiler_control(kbdev)) { - u64 tiler_trans = kbase_pm_get_trans_cores( - kbdev, KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_TILER); - tiler_trans &= ~tiler_ready; - - if (!tiler_trans && tiler_ready == tiler_present) { - KBASE_KTRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, tiler_ready); - l2_power_up_done = true; - } + l2_power_up_done = hctl_tiler_power_up_done(kbdev); } else { KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, l2_ready); @@ -1455,11 +1623,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * hardware counters. */ if (kbdev->pm.backend.gpu_clock_slow_down_wa) - backend->l2_state = - KBASE_L2_RESTORE_CLOCKS; + backend->l2_state = KBASE_L2_RESTORE_CLOCKS; else - backend->l2_state = - KBASE_L2_ON_HWCNT_ENABLE; + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; /* Now that the L2 is on, the shaders can start * powering on if they're required. The obvious @@ -1504,8 +1670,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) #if !MALI_USE_CSF backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); backend->hwcnt_disabled = false; } #endif @@ -1575,8 +1740,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) if (backend->hwcnt_disabled) { if (kbdev->pm.backend.gpu_clock_slow_down_wa) - backend->l2_state = - KBASE_L2_SLOW_DOWN_CLOCKS; + backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS; else backend->l2_state = KBASE_L2_POWER_DOWN; } @@ -1612,12 +1776,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) else if (can_power_down_l2(kbdev)) { if (!backend->l2_always_on) { wait_as_active_int(kbdev); - /* Powering off the L2 will also power off the - * tiler. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present, - ACTION_PWROFF); + /* Powering off the L2 will also power off the tiler. */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, + ACTION_PWROFF); } else /* If L2 cache is powered then we must flush it * before we power off the GPU. Normally this @@ -1636,42 +1797,53 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) break; case KBASE_L2_PEND_OFF: - if (!backend->l2_always_on) { - /* We only need to check the L2 here - if the L2 - * is off then the tiler is definitely also off. - */ - if (!l2_trans && !l2_ready) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Allow clock gating within the GPU and prevent it - * from being seen as active during sleep. - */ - kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -#endif - /* L2 is now powered off */ - backend->l2_state = KBASE_L2_OFF; + if (likely(!backend->l2_always_on)) { + if (need_tiler_control(kbdev) && l2_ready) { + hctl_l2_power_down(kbdev); + break; } - } else { - if (!kbdev->cache_clean_in_progress) { -#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - /* Allow clock gating within the GPU and prevent it - * from being seen as active during sleep. - */ - kbase_ipa_control_handle_gpu_sleep_enter(kbdev); + + if (l2_trans || l2_ready) + break; + } else if (kbdev->cache_clean_in_progress) + break; +#if MALI_USE_CSF +#if defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + { + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); + } #endif - backend->l2_state = KBASE_L2_OFF; - } + /* Disabling MCU after L2 cache power down is to address + * BASE_HW_ISSUE_TITANHW_2922 hardware issue. + */ + if (backend->l2_force_off_after_mcu_halt) { + kbase_csf_firmware_disable_mcu(kbdev); + kbase_csf_firmware_disable_mcu_wait(kbdev); + WARN_ON_ONCE(backend->mcu_state != KBASE_MCU_OFF); + backend->l2_force_off_after_mcu_halt = false; } +#endif + /* L2 is now powered off */ + backend->l2_state = KBASE_L2_OFF; + break; case KBASE_L2_RESET_WAIT: /* Reset complete */ - if (!backend->in_reset) + if (!backend->in_reset) { +#if MALI_USE_CSF + backend->l2_force_off_after_mcu_halt = false; +#endif backend->l2_state = KBASE_L2_OFF; + } + break; default: - WARN(1, "Invalid state in l2_state: %d", - backend->l2_state); + WARN(1, "Invalid state in l2_state: %d", backend->l2_state); } if (backend->l2_state != prev_state) { @@ -1683,18 +1855,17 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", kbase_l2_core_state_to_string(prev_state), - kbase_l2_core_state_to_string( - backend->l2_state)); + kbase_l2_core_state_to_string(backend->l2_state)); kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state); } } while (backend->l2_state != prev_state); if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && - backend->l2_state == KBASE_L2_OFF) { + backend->l2_state == KBASE_L2_OFF) { kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, - &kbdev->pm.backend.gpu_poweroff_wait_work); + &kbdev->pm.backend.gpu_poweroff_wait_work); } return 0; @@ -1703,10 +1874,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) static void shader_poweroff_timer_stop_callback(struct work_struct *data) { unsigned long flags; - struct kbasep_pm_tick_timer_state *stt = container_of(data, - struct kbasep_pm_tick_timer_state, work); - struct kbase_device *kbdev = container_of(stt, struct kbase_device, - pm.backend.shader_tick_timer); + struct kbasep_pm_tick_timer_state *stt = + container_of(data, struct kbasep_pm_tick_timer_state, work); + struct kbase_device *kbdev = + container_of(stt, struct kbase_device, pm.backend.shader_tick_timer); hrtimer_cancel(&stt->timer); @@ -1746,8 +1917,7 @@ static void shader_poweroff_timer_stop_callback(struct work_struct *data) */ static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) { - struct kbasep_pm_tick_timer_state *stt = - &kbdev->pm.backend.shader_tick_timer; + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1760,8 +1930,7 @@ static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) } #if !MALI_USE_CSF -static const char *kbase_shader_core_state_to_string( - enum kbase_shader_core_state state) +static const char *kbase_shader_core_state_to_string(enum kbase_shader_core_state state) { const char *const strings[] = { #define KBASEP_SHADER_STATE(n) #n, @@ -1777,8 +1946,7 @@ static const char *kbase_shader_core_state_to_string( static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - struct kbasep_pm_tick_timer_state *stt = - &kbdev->pm.backend.shader_tick_timer; + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; enum kbase_shader_core_state prev_state; u64 stacks_avail = 0; @@ -1808,12 +1976,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) */ if (kbase_is_gpu_removed(kbdev) #ifdef CONFIG_MALI_ARBITER_SUPPORT - || kbase_pm_is_gpu_lost(kbdev)) { + || kbase_pm_is_gpu_lost(kbdev)) { #else - ) { + ) { #endif - backend->shaders_state = - KBASE_SHADERS_OFF_CORESTACK_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); break; } @@ -1832,14 +1999,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) * except at certain points where we can handle it, * i.e. off and SHADERS_ON_CORESTACK_ON. */ - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); backend->pm_shaders_core_mask = 0; - if (backend->shaders_desired && - backend->l2_state == KBASE_L2_ON) { - if (backend->hwcnt_desired && - !backend->hwcnt_disabled) { + if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) { + if (backend->hwcnt_desired && !backend->hwcnt_disabled) { /* Trigger a hwcounter dump */ backend->hwcnt_desired = false; kbase_pm_trigger_hwcnt_disable(kbdev); @@ -1847,10 +2011,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->hwcnt_disabled) { if (corestack_driver_control) { - kbase_pm_invoke(kbdev, - KBASE_PM_CORE_STACK, - stacks_avail, - ACTION_PWRON); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWRON); } backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON; @@ -1860,16 +2022,14 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: if (!stacks_trans && stacks_ready == stacks_avail) { - backend->shaders_avail = - backend->shaders_desired_mask; - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - backend->shaders_avail, ACTION_PWRON); + backend->shaders_avail = backend->shaders_desired_mask; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail, + ACTION_PWRON); if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_POWER_ON); + kbdev, KBASE_PM_POLICY_EVENT_POWER_ON); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } @@ -1877,21 +2037,19 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_PEND_ON_CORESTACK_ON: if (!shaders_trans && shaders_ready == backend->shaders_avail) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + shaders_ready); backend->pm_shaders_core_mask = shaders_ready; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { #if MALI_USE_CSF unsigned long flags; - kbase_csf_scheduler_spin_lock(kbdev, - &flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags); #endif - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF - kbase_csf_scheduler_spin_unlock(kbdev, - flags); + kbase_csf_scheduler_spin_unlock(kbdev, flags); #endif backend->hwcnt_disabled = false; } @@ -1901,23 +2059,20 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_ON_CORESTACK_ON: - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); /* If shaders to change state, trigger a counter dump */ if (!backend->shaders_desired || - (backend->shaders_desired_mask != shaders_ready)) { + (backend->shaders_desired_mask != shaders_ready)) { backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); - backend->shaders_state = - KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; } break; case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: - backend->shaders_desired_mask = - kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); if (!backend->hwcnt_disabled) { /* Wait for being disabled */ @@ -1926,17 +2081,15 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_IDLE); + kbdev, KBASE_PM_POLICY_EVENT_IDLE); if (kbdev->pm.backend.protected_transition_override || #ifdef CONFIG_MALI_ARBITER_SUPPORT - kbase_pm_is_suspending(kbdev) || - kbase_pm_is_gpu_lost(kbdev) || + kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev) || #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - !stt->configured_ticks || - WARN_ON(stt->cancel_queued)) { - backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + !stt->configured_ticks || WARN_ON(stt->cancel_queued)) { + backend->shaders_state = + KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; } else { stt->remaining_ticks = stt->configured_ticks; stt->needed = true; @@ -1954,11 +2107,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) * before the timeout expires. */ if (!hrtimer_active(&stt->timer)) - hrtimer_start(&stt->timer, - stt->configured_interval, - HRTIMER_MODE_REL); + hrtimer_start(&stt->timer, stt->configured_interval, + HRTIMER_MODE_REL); - backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; + backend->shaders_state = + KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; } } else if (backend->shaders_desired_mask & ~shaders_ready) { /* set cores ready but not available to @@ -1971,14 +2124,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); - backend->shaders_state = - KBASE_SHADERS_PEND_ON_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } else if (shaders_ready & ~backend->shaders_desired_mask) { - backend->shaders_state = - KBASE_SHADERS_WAIT_GPU_IDLE; + backend->shaders_state = KBASE_SHADERS_WAIT_GPU_IDLE; } else { - backend->shaders_state = - KBASE_SHADERS_PEND_ON_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } break; @@ -1992,8 +2142,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_TIMER_HIT); + kbdev, KBASE_PM_POLICY_EVENT_TIMER_HIT); stt->remaining_ticks = 0; backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; @@ -2001,13 +2150,11 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) if (backend->pm_current_policy && backend->pm_current_policy->handle_event) backend->pm_current_policy->handle_event( - kbdev, - KBASE_PM_POLICY_EVENT_TIMER_MISS); + kbdev, KBASE_PM_POLICY_EVENT_TIMER_MISS); backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; #ifdef CONFIG_MALI_ARBITER_SUPPORT - } else if (kbase_pm_is_suspending(kbdev) || - kbase_pm_is_gpu_lost(kbdev)) { + } else if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ } @@ -2029,20 +2176,17 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) shader_poweroff_timer_queue_cancel(kbdev); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { - kbase_gpu_start_cache_clean_nolock( - kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); - backend->shaders_state = - KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; + kbase_gpu_start_cache_clean_nolock(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2); + backend->shaders_state = KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; } else { - backend->shaders_state = - KBASE_SHADERS_READY_OFF_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_READY_OFF_CORESTACK_ON; } break; case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: if (!kbdev->cache_clean_in_progress) - backend->shaders_state = - KBASE_SHADERS_READY_OFF_CORESTACK_ON; + backend->shaders_state = KBASE_SHADERS_READY_OFF_CORESTACK_ON; break; @@ -2060,15 +2204,16 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) WARN_ON(backend->shaders_desired_mask & ~shaders_ready); WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); - backend->shaders_avail = - backend->shaders_desired_mask; + backend->shaders_avail = backend->shaders_desired_mask; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); + shaders_ready & ~backend->shaders_avail, + ACTION_PWROFF); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + (shaders_ready & ~backend->shaders_avail)); } else { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready, ACTION_PWROFF); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready, + ACTION_PWROFF); KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); @@ -2079,8 +2224,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: if (!shaders_trans && !shaders_ready) { if (corestack_driver_control) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, - stacks_avail, ACTION_PWROFF); + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, stacks_avail, + ACTION_PWROFF); backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; } @@ -2095,18 +2240,16 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) #if MALI_USE_CSF unsigned long flags; - kbase_csf_scheduler_spin_lock(kbdev, - &flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags); #endif - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); #if MALI_USE_CSF - kbase_csf_scheduler_spin_unlock(kbdev, - flags); + kbase_csf_scheduler_spin_unlock(kbdev, flags); #endif backend->hwcnt_disabled = false; } - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; } break; @@ -2118,7 +2261,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_RESET_WAIT: /* Reset complete */ if (!backend->in_reset) - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; break; } @@ -2131,8 +2275,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n", kbase_shader_core_state_to_string(prev_state), - kbase_shader_core_state_to_string( - backend->shaders_state)); + kbase_shader_core_state_to_string(backend->shaders_state)); } } while (backend->shaders_state != prev_state); @@ -2151,10 +2294,10 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) #if !MALI_USE_CSF if (kbdev->pm.backend.shaders_desired && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) in_desired_state = false; else if (!kbdev->pm.backend.shaders_desired && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) in_desired_state = false; #else in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev); @@ -2175,21 +2318,21 @@ static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) return in_desired_state; } -static bool kbase_pm_is_in_desired_state_with_l2_powered( - struct kbase_device *kbdev) +static bool kbase_pm_is_in_desired_state_with_l2_powered(struct kbase_device *kbdev) { bool in_desired_state = false; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbase_pm_is_in_desired_state_nolock(kbdev) && - (kbdev->pm.backend.l2_state == KBASE_L2_ON)) + (kbdev->pm.backend.l2_state == KBASE_L2_ON)) in_desired_state = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return in_desired_state; } +#if !MALI_USE_CSF static void kbase_pm_trace_power_state(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -2210,37 +2353,23 @@ static void kbase_pm_trace_power_state(struct kbase_device *kbdev) } } - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_L2)); - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_SHADER)); - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_TILER)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_L2, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_SHADER, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_TILER, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); if (corestack_driver_control) - KBASE_TLSTREAM_AUX_PM_STATE( - kbdev, - KBASE_PM_CORE_STACK, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_STACK)); + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, TL_PM_STATE_STACK, + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK)); } +#endif void kbase_pm_update_state(struct kbase_device *kbdev) { #if !MALI_USE_CSF - enum kbase_shader_core_state prev_shaders_state = - kbdev->pm.backend.shaders_state; + enum kbase_shader_core_state prev_shaders_state = kbdev->pm.backend.shaders_state; #else enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state; #endif @@ -2262,11 +2391,10 @@ void kbase_pm_update_state(struct kbase_device *kbdev) * the L2. */ if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && - kbdev->pm.backend.shaders_state == - KBASE_SHADERS_OFF_CORESTACK_OFF) { + kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) { if (kbase_pm_l2_update_state(kbdev)) return; - } + } #else if (kbase_pm_mcu_update_state(kbdev)) return; @@ -2279,31 +2407,30 @@ void kbase_pm_update_state(struct kbase_device *kbdev) #endif if (kbase_pm_is_in_desired_state_nolock(kbdev)) { - KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, - kbdev->pm.backend.shaders_avail); + KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); +#if !MALI_USE_CSF kbase_pm_trace_power_state(kbdev); +#endif KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } } -static enum hrtimer_restart -shader_tick_timer_callback(struct hrtimer *timer) +static enum hrtimer_restart shader_tick_timer_callback(struct hrtimer *timer) { - struct kbasep_pm_tick_timer_state *stt = container_of(timer, - struct kbasep_pm_tick_timer_state, timer); - struct kbase_device *kbdev = container_of(stt, struct kbase_device, - pm.backend.shader_tick_timer); + struct kbasep_pm_tick_timer_state *stt = + container_of(timer, struct kbasep_pm_tick_timer_state, timer); + struct kbase_device *kbdev = + container_of(stt, struct kbase_device, pm.backend.shader_tick_timer); struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; enum hrtimer_restart restart = HRTIMER_NORESTART; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (stt->remaining_ticks && - backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { + if (stt->remaining_ticks && backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { stt->remaining_ticks--; /* If the remaining ticks just changed from 1 to 0, invoke the @@ -2333,7 +2460,6 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); - stt->needed = false; hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stt->timer.function = shader_tick_timer_callback; stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); @@ -2341,6 +2467,7 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) stt->configured_ticks = stt->default_ticks; #if MALI_USE_CSF + kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); if (!kbdev->pm.backend.core_idle_wq) { destroy_workqueue(stt->wq); @@ -2400,8 +2527,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) */ backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) { - WARN_ON(!kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)); + WARN_ON(!kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)); backend->hwcnt_disabled = true; } @@ -2448,7 +2574,7 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *tim CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); dev_err(kbdev->dev, "\tShader=%016llx\n", - kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); #else dev_err(kbdev->dev, "GPU pm state :\n"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -2459,10 +2585,8 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *tim kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off); dev_err(kbdev->dev, "\tgpu_poweroff_wait_work pending %d", work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); - dev_err(kbdev->dev, "\tMCU desired = %d\n", - kbase_pm_is_mcu_desired(kbdev)); - dev_err(kbdev->dev, "\tMCU sw state = %d\n", - kbdev->pm.backend.mcu_state); + dev_err(kbdev->dev, "\tMCU desired = %d\n", kbase_pm_is_mcu_desired(kbdev)); + dev_err(kbdev->dev, "\tMCU sw state = %d\n", kbdev->pm.backend.mcu_state); dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n", kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off); dev_err(kbdev->dev, "\tL2 sw state = %d\n", @@ -2474,40 +2598,22 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *tim spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_LO))); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_READY))); + dev_err(kbdev->dev, "\tTiler =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_READY))); + dev_err(kbdev->dev, "\tL2 =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_READY))); #if MALI_USE_CSF kbase_csf_debug_dump_registers(kbdev); #endif dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(SHADER_PWRTRANS))); + dev_err(kbdev->dev, "\tTiler =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(TILER_PWRTRANS))); + dev_err(kbdev->dev, "\tL2 =%016llx\n", + kbase_reg_read64(kbdev, GPU_CONTROL_ENUM(L2_PWRTRANS))); dump_stack(); } @@ -2546,7 +2652,8 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #if MALI_USE_CSF - timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); + timeout = (unsigned long)kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); #else timeout = msecs_to_jiffies(PM_TIMEOUT_MS); #endif @@ -2555,11 +2662,11 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, kbase_pm_is_in_desired_state_with_l2_powered(kbdev), - timeout); + (long)timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev), + (long)timeout); #endif if (!remaining) { @@ -2571,9 +2678,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info( - kbdev->dev, - "Wait for desired PM state with L2 powered got interrupted"); + dev_info(kbdev->dev, "Wait for desired PM state with L2 powered got interrupted"); err = (int)remaining; } @@ -2587,7 +2692,7 @@ static int pm_wait_for_desired_state(struct kbase_device *kbdev, bool killable_w #if MALI_USE_CSF long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); #else - long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); + long timeout = (long)msecs_to_jiffies(PM_TIMEOUT_MS); #endif int err = 0; @@ -2673,22 +2778,18 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) /* Wait for core mask update to complete */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - core_mask_update_done(kbdev), timeout); + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - core_mask_update_done(kbdev), timeout); + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); #endif if (!remaining) { kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info( - kbdev->dev, - "Wait for cores down scaling got interrupted"); + dev_info(kbdev->dev, "Wait for cores down scaling got interrupted"); err = (int)remaining; } @@ -2723,7 +2824,7 @@ static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool k /* Handling of timeout error isn't supported for arbiter builds */ const long timeout = MAX_SCHEDULE_TIMEOUT; #else - const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); + const long timeout = (long)msecs_to_jiffies(PM_TIMEOUT_MS); #endif #endif int err = 0; @@ -2796,20 +2897,21 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) * and unmask them all. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), 0xFFFFFFFF); #if MALI_USE_CSF /* Enable only the Page fault bits part */ - kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFF); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0xFFFF); #else - kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0xFFFFFFFF); #endif + } KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); @@ -2823,13 +2925,14 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) */ lockdep_assert_held(&kbdev->hwaccess_lock); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), 0); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_MASK), 0); + kbase_reg_write32(kbdev, JOB_CONTROL_ENUM(JOB_IRQ_CLEAR), 0xFFFFFFFF); + + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0); + kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0); - kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); } void kbase_pm_disable_interrupts(struct kbase_device *kbdev) @@ -2895,8 +2998,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #ifdef CONFIG_MALI_ARBITER_SUPPORT if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { - dev_err(kbdev->dev, - "%s: Cannot power up while GPU lost", __func__); + dev_err(kbdev->dev, "%s: Cannot power up while GPU lost", __func__); return; } #endif @@ -2946,16 +3048,14 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) #ifdef CONFIG_MALI_ARBITER_SUPPORT else { if (kbdev->arb.arb_if) { - struct kbase_arbiter_vm_state *arb_vm_state = - kbdev->pm.arb_vm_state; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; /* In the case that the GPU has just been granted by * the Arbiter, a reset will have already been done. * However, it is still necessary to initialize the GPU. */ if (arb_vm_state->vm_arb_starting) - kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | - PM_NO_RESET); + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | PM_NO_RESET); } } /* @@ -2963,8 +3063,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) * that a repartitioning occurred. In this case the current config * should be read again. */ - kbase_gpuprops_get_curr_config_props(kbdev, - &kbdev->gpu_props.curr_config); + kbase_gpuprops_get_curr_config_props(kbdev, &kbdev->gpu_props.curr_config); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -2973,12 +3072,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); - if (kbdev->dummy_job_wa.flags & - KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_dummy_job_wa_execute(kbdev, - kbase_pm_get_present_cores(kbdev, - KBASE_PM_CORE_SHADER)); + kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -2990,14 +3087,16 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->gpu_ready = true; backend->l2_desired = true; #if MALI_USE_CSF - if (reset_required) { - /* GPU reset was done after the power on, so send the post - * reset event instead. This is okay as GPU power off event - * is same as pre GPU reset event. - */ - kbase_ipa_control_handle_gpu_reset_post(kbdev); - } else { - kbase_ipa_control_handle_gpu_power_on(kbdev); + { + if (reset_required) { + /* GPU reset was done after the power on, so send the post + * reset event instead. This is okay as GPU power off event + * is same as pre GPU reset event. + */ + kbase_ipa_control_handle_gpu_reset_post(kbdev); + } else { + kbase_ipa_control_handle_gpu_power_on(kbdev); + } } #endif kbase_pm_update_state(kbdev); @@ -3052,14 +3151,16 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) kbase_pm_cache_snoop_disable(kbdev); #if MALI_USE_CSF - kbase_ipa_control_handle_gpu_power_off(kbdev); + { + kbase_ipa_control_handle_gpu_power_off(kbdev); + } #endif if (kbase_is_gpu_removed(kbdev) #ifdef CONFIG_MALI_ARBITER_SUPPORT - || kbase_pm_is_gpu_lost(kbdev)) { + || kbase_pm_is_gpu_lost(kbdev)) { #else - ) { + ) { #endif /* Ensure we unblock any threads that are stuck waiting * for the GPU @@ -3114,8 +3215,7 @@ static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->pm.lock); - wait_event(kbdev->pm.backend.reset_done_wait, - (kbdev->pm.backend.reset_done)); + wait_event(kbdev->pm.backend.reset_done_wait, (kbdev->pm.backend.reset_done)); kbdev->pm.backend.reset_done = false; } @@ -3136,26 +3236,24 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_gpu_quirks(struct kbase_device *kbdev) { #if MALI_USE_CSF - kbdev->hw_quirks_gpu = - kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG))) + kbdev->hw_quirks_gpu = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG)); #else - u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); + u32 hw_quirks_gpu = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG)); - if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { + if (kbdev->gpu_props.gpu_id.product_model == GPU_ID_PRODUCT_TMIX) { /* Only for tMIx */ u32 coherency_features; - coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES)); + coherency_features = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES)); /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly * documented for tMIx so force correct value here. */ - if (coherency_features == - COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { + if (coherency_features == COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << JM_FORCE_COHERENCY_FEATURES_SHIFT; } @@ -3168,11 +3266,10 @@ static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) #endif /* !MALI_USE_CSF */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { - int default_idvs_group_size = 0xF; + u32 default_idvs_group_size = 0xF; u32 group_size = 0; - if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", - &group_size)) + if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", &group_size)) group_size = default_idvs_group_size; if (group_size > IDVS_GROUP_MAX_SIZE) { @@ -3192,19 +3289,16 @@ static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) return 0; } -static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_sc_quirks(struct kbase_device *kbdev) { - u32 hw_quirks_sc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_CONFIG)); + u32 hw_quirks_sc = 0; + + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG))) + hw_quirks_sc = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG)); if (kbase_is_gpu_removed(kbdev)) return -EIO; - if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ - hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; - else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ - hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) hw_quirks_sc |= SC_VAR_ALGORITHM; @@ -3218,8 +3312,10 @@ static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) static int kbase_set_tiler_quirks(struct kbase_device *kbdev) { - u32 hw_quirks_tiler = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_CONFIG)); + u32 hw_quirks_tiler = 0; + + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))) + hw_quirks_tiler = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG)); if (kbase_is_gpu_removed(kbdev)) return -EIO; @@ -3236,9 +3332,6 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { struct device_node *np = kbdev->dev->of_node; - const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - const u32 prod_id = - (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; int error = 0; kbdev->hw_quirks_gpu = 0; @@ -3255,7 +3348,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n", kbdev->hw_quirks_gpu); } else { - error = kbase_set_gpu_quirks(kbdev, prod_id); + error = kbase_set_gpu_quirks(kbdev); if (error) return error; } @@ -3265,7 +3358,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n", kbdev->hw_quirks_sc); } else { - error = kbase_set_sc_quirks(kbdev, prod_id); + error = kbase_set_sc_quirks(kbdev); if (error) return error; } @@ -3282,7 +3375,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) || !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) { - dev_info(kbdev->dev, "Found quirks_mmu = [0x%x] in Devicetree\n", + dev_info(kbdev->dev, "Found MMU quirks = [0x%x] in Devicetree\n", kbdev->hw_quirks_mmu); } else { error = kbase_set_mmu_quirks(kbdev); @@ -3293,27 +3386,26 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), - kbdev->hw_quirks_sc); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(SHADER_CONFIG), kbdev->hw_quirks_sc); - kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), - kbdev->hw_quirks_tiler); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG), kbdev->hw_quirks_tiler); - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), - kbdev->hw_quirks_mmu); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_MMU_CONFIG), kbdev->hw_quirks_mmu); #if MALI_USE_CSF - kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), - kbdev->hw_quirks_gpu); + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG))) + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(CSF_CONFIG), kbdev->hw_quirks_gpu); #else - kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_gpu); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu); #endif } void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { - if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && - !kbdev->cci_snoop_enabled) { +#if !MALI_USE_CSF + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && !kbdev->cci_snoop_enabled) { #if IS_ENABLED(CONFIG_ARM64) if (kbdev->snoop_enable_smc != 0) kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); @@ -3321,10 +3413,12 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); kbdev->cci_snoop_enabled = true; } +#endif /* !MALI_USE_CSF */ } void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) { +#if !MALI_USE_CSF if (kbdev->cci_snoop_enabled) { #if IS_ENABLED(CONFIG_ARM64) if (kbdev->snoop_disable_smc != 0) { @@ -3335,6 +3429,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); kbdev->cci_snoop_enabled = false; } +#endif /* !MALI_USE_CSF */ } #if !MALI_USE_CSF @@ -3402,6 +3497,7 @@ static int kbase_pm_hw_reset(struct kbase_device *kbdev) static int kbase_pm_do_reset(struct kbase_device *kbdev) { struct kbasep_reset_timeout_data rtdata; + u32 reg_offset, reg_val; int ret; #if MALI_USE_CSF @@ -3416,8 +3512,12 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); + reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK); + reg_val = RESET_COMPLETED; + /* Unmask the reset complete interrupt only */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); + kbase_reg_write32(kbdev, reg_offset, reg_val); + if (kbdev->pm.backend.callback_soft_reset) { ret = kbdev->pm.backend.callback_soft_reset(kbdev); @@ -3426,8 +3526,10 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) else if (ret > 0) return 0; } else { - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SOFT_RESET); + { + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_SOFT_RESET); + } } /* Initialize a structure for tracking the status of the reset */ @@ -3438,8 +3540,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rtdata.timer.function = kbasep_reset_timeout; - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); @@ -3451,15 +3552,19 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) return 0; } + reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT); + reg_val = RESET_COMPLETED; + + /* No interrupt has been received - check if the RAWSTAT register says - * the reset has completed + * the reset has completed. */ - if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - RESET_COMPLETED)) { + if (kbase_reg_read32(kbdev, reg_offset) & reg_val) { /* The interrupt is set in the RAWSTAT; this suggests that the * interrupts are not getting to the CPU */ - dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, + "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); /* If interrupts aren't working we can't continue. */ destroy_hrtimer_on_stack(&rtdata.timer); return -EINVAL; @@ -3477,17 +3582,20 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) #ifdef CONFIG_MALI_ARBITER_SUPPORT if (!kbdev->arb.arb_if) { #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", - RESET_TIMEOUT); + dev_err(kbdev->dev, + "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_HARD_RESET); + + { + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_HARD_RESET); + } /* Restart the timer to wait for the hard reset to complete */ rtdata.timed_out = false; - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); @@ -3503,7 +3611,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms) GPU_IRQ_RAWSTAT: %d\n", - RESET_TIMEOUT, kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT))); + RESET_TIMEOUT, kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT))); /* Last resort, trigger a hardware reset of the GPU */ return kbase_pm_hw_reset(kbdev); @@ -3516,8 +3624,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SET_PROTECTED_MODE); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), GPU_COMMAND_SET_PROTECTED_MODE); return 0; } @@ -3548,6 +3655,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_turn_on_sc_power_rails_locked(kbdev); #endif + /* Ensure interrupts are off to begin with, this also clears any * outstanding interrupts */ @@ -3567,8 +3675,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) #ifdef CONFIG_MALI_ARBITER_SUPPORT if (!(flags & PM_NO_RESET)) #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); + err = kbdev->protected_ops->protected_mode_disable(kbdev->protected_dev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); #if MALI_USE_CSF @@ -3588,6 +3695,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) if (err) goto exit; + if (flags & PM_HW_ISSUES_DETECT) { err = kbase_pm_hw_issues_detect(kbdev); if (err) @@ -3596,28 +3704,25 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_apply(kbdev); kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + kbase_amba_set_shareable_cache_support(kbdev); /* Sanity check protected mode was left after reset */ - WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_PROTECTED_MODE_ACTIVE); + WARN_ON(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE); /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ - if (kbdev->pm.backend.gpu_cycle_counter_requests && - (flags & PM_ENABLE_IRQS)) { + if (kbdev->pm.backend.gpu_cycle_counter_requests && (flags & PM_ENABLE_IRQS)) { kbase_pm_enable_interrupts(kbdev); /* Re-enable the counters if we need to */ - spin_lock_irqsave( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); if (kbdev->pm.backend.gpu_cycle_counter_requests) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); kbase_pm_disable_interrupts(kbdev); } @@ -3652,34 +3757,30 @@ exit: * When this function is called the l2 cache must be on - i.e., the GPU must be * on. */ -static void -kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) +static void kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); ++kbdev->pm.backend.gpu_cycle_counter_requests; if (kbdev->pm.backend.gpu_cycle_counter_requests == 1) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); else { /* This might happen after GPU reset. * Then counter needs to be kicked. */ -#if !IS_ENABLED(CONFIG_MALI_NO_MALI) - if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) && !MALI_USE_CSF + if (!(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & GPU_STATUS_CYCLE_COUNT_ACTIVE)) { - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); } #endif } - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); } void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) @@ -3688,8 +3789,7 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < - INT_MAX); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); kbase_pm_wait_for_l2_powered(kbdev); @@ -3704,8 +3804,7 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < - INT_MAX); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -3720,20 +3819,17 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); --kbdev->pm.backend.gpu_cycle_counter_requests; if (kbdev->pm.backend.gpu_cycle_counter_requests == 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_STOP); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP); - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); } void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index d7f19fb..c2d7bdb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -31,7 +31,6 @@ #include "backend/gpu/mali_kbase_pm_ca.h" #include "mali_kbase_pm_policy.h" - /** * kbase_pm_dev_idle - The GPU is idle. * @@ -56,7 +55,7 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev); * * @kbdev: The kbase device structure for the device (must be a valid * pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) present in the GPU device and also a count of @@ -64,15 +63,14 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev); * * Return: The bit mask of cores present */ -u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_get_active_cores - Get details of the cores that are currently * active in the device. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are actively processing work (i.e. @@ -80,15 +78,14 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, * * Return: The bit mask of active cores */ -u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_get_trans_cores - Get details of the cores that are currently * transitioning between power states. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are currently transitioning between @@ -96,15 +93,14 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, * * Return: The bit mask of transitioning cores */ -u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_get_ready_cores - Get details of the cores that are currently * powered and ready for jobs. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * @core_type: The type of core (see the enum kbase_pm_core_type enumeration) * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are powered and ready for jobs (they may @@ -112,8 +108,7 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, * * Return: The bit mask of ready cores */ -u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type core_type); /** * kbase_pm_clock_on - Turn the clock for the device on, and enable device @@ -348,6 +343,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev); * shader poweroff timer * @kbdev: Device pointer * + * This function must be called only when a kbase device is initialized. + * * Return: 0 on success, error code on error */ int kbase_pm_state_machine_init(struct kbase_device *kbdev); @@ -375,8 +372,8 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev); * kbasep_pm_metrics_init - Initialize the metrics gathering framework. * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * This must be called before other metric gathering APIs are called. - * + * This function must be called only when a kbase device is initialized and + * also must be called before other metric gathering APIs are called. * * Return: 0 on success, error code on error */ @@ -517,8 +514,9 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev); * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Setup the power management callbacks and initialize/enable the runtime-pm - * for the Mali GPU platform device, using the callback function. This must be - * called before the kbase_pm_register_access_enable() function. + * for the Mali GPU platform device, using the callback function. + * This function must be called only when a kbase device is initialized and + * also must be called before the kbase_pm_register_access_enable() function. * * Return: 0 on success, error code on error */ @@ -602,8 +600,7 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); void kbase_pm_do_poweroff(struct kbase_device *kbdev); #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) -void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, - struct kbasep_pm_metrics *last, +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last, struct kbasep_pm_metrics *diff); #endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ @@ -638,8 +635,8 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); * * Return: Returns 0 on failure and non zero on success. */ -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, - u32 util_gl_share, u32 util_cl_share[2]); +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, + u32 util_cl_share[2]); #endif #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -654,8 +651,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); * * Caller must hold hwaccess_lock */ -void kbase_pm_metrics_update(struct kbase_device *kbdev, - ktime_t *now); +void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); /** * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU @@ -823,8 +819,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); * * Return: true if MCU is inactive */ -bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, - enum kbase_mcu_state state); +bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state); /** * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be @@ -835,13 +830,11 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, * * Return: true if allowed to enter the suspended state. */ -static inline -bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) +static inline bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return !(kbdev->pm.backend.csf_pm_sched_flags & - CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); + return !(kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); } /** @@ -853,13 +846,11 @@ bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) * * Return: true if allowed to enter the suspended state. */ -static inline -bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) +static inline bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return !(kbdev->pm.backend.csf_pm_sched_flags & - CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); + return !(kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); } /** @@ -875,8 +866,7 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - return kbdev->pm.backend.csf_pm_sched_flags & - CSF_DYNAMIC_PM_CORE_KEEP_ON; + return kbdev->pm.backend.csf_pm_sched_flags & CSF_DYNAMIC_PM_CORE_KEEP_ON; } /** @@ -996,13 +986,12 @@ static inline void kbase_pm_enable_db_mirror_interrupt(struct kbase_device *kbde lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbdev->pm.backend.db_mirror_interrupt_enabled) { - u32 irq_mask = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_MASK)); + u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); WARN_ON(irq_mask & DOORBELL_MIRROR); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask | DOORBELL_MIRROR); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask | DOORBELL_MIRROR); kbdev->pm.backend.db_mirror_interrupt_enabled = true; } } @@ -1020,11 +1009,10 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->pm.backend.db_mirror_interrupt_enabled) { - u32 irq_mask = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_MASK)); + u32 irq_mask = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~DOORBELL_MIRROR); + kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_MASK), + irq_mask & ~DOORBELL_MIRROR); kbdev->pm.backend.db_mirror_interrupt_enabled = false; } } @@ -1053,4 +1041,5 @@ static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbd return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); } + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index 5d98bd7..e89b188 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,7 +43,7 @@ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly * under 11s. Exceeding this will cause overflow */ -#define KBASE_PM_TIME_SHIFT 8 +#define KBASE_PM_TIME_SHIFT 8 #endif #if MALI_USE_CSF @@ -111,9 +111,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); - kbdev->pm.backend.metrics.values.time_busy = 0; - kbdev->pm.backend.metrics.values.time_idle = 0; - kbdev->pm.backend.metrics.values.time_in_protm = 0; perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; @@ -126,39 +123,21 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) /* We need the GPU_ACTIVE counter */ perf_counter.idx = GPU_ACTIVE_CNT_IDX; - err = kbase_ipa_control_register( - kbdev, &perf_counter, NUM_PERF_COUNTERS, - &kbdev->pm.backend.metrics.ipa_control_client); + err = kbase_ipa_control_register(kbdev, &perf_counter, NUM_PERF_COUNTERS, + &kbdev->pm.backend.metrics.ipa_control_client); if (err) { - dev_err(kbdev->dev, - "Failed to register IPA with kbase_ipa_control: err=%d", - err); + dev_err(kbdev->dev, "Failed to register IPA with kbase_ipa_control: err=%d", err); return -1; } #else KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); - - kbdev->pm.backend.metrics.gpu_active = false; - kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; - - kbdev->pm.backend.metrics.values.time_busy = 0; - kbdev->pm.backend.metrics.values.time_idle = 0; - kbdev->pm.backend.metrics.values.busy_cl[0] = 0; - kbdev->pm.backend.metrics.values.busy_cl[1] = 0; - kbdev->pm.backend.metrics.values.busy_gl = 0; - #endif spin_lock_init(&kbdev->pm.backend.metrics.lock); #ifdef CONFIG_MALI_MIDGARD_DVFS - hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; kbdev->pm.backend.metrics.initialized = true; atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); @@ -189,8 +168,9 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev) #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if MALI_USE_CSF - kbase_ipa_control_unregister( - kbdev, kbdev->pm.backend.metrics.ipa_control_client); + kbase_ipa_control_unregister(kbdev, kbdev->pm.backend.metrics.ipa_control_client); +#else + CSTD_UNUSED(kbdev); #endif } @@ -213,9 +193,8 @@ static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) /* Query IPA_CONTROL for the latest GPU-active and protected-time * info. */ - err = kbase_ipa_control_query( - kbdev, kbdev->pm.backend.metrics.ipa_control_client, - &gpu_active_counter, 1, &protected_time); + err = kbase_ipa_control_query(kbdev, kbdev->pm.backend.metrics.ipa_control_client, + &gpu_active_counter, 1, &protected_time); /* Read the timestamp after reading the GPU_ACTIVE counter value. * This ensures the time gap between the 2 reads is consistent for @@ -226,15 +205,13 @@ static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) now = ktime_get_raw(); if (err) { - dev_err(kbdev->dev, - "Failed to query the increment of GPU_ACTIVE counter: err=%d", + dev_err(kbdev->dev, "Failed to query the increment of GPU_ACTIVE counter: err=%d", err); } else { u64 diff_ns; s64 diff_ns_signed; u32 ns_time; - ktime_t diff = ktime_sub( - now, kbdev->pm.backend.metrics.time_period_start); + ktime_t diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); diff_ns_signed = ktime_to_ns(diff); @@ -307,17 +284,14 @@ static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) */ gpu_active_counter = MIN(gpu_active_counter, ns_time); - kbdev->pm.backend.metrics.values.time_busy += - gpu_active_counter; + kbdev->pm.backend.metrics.values.time_busy += gpu_active_counter; - kbdev->pm.backend.metrics.values.time_idle += - ns_time - gpu_active_counter; + kbdev->pm.backend.metrics.values.time_idle += ns_time - gpu_active_counter; /* Also make time in protected mode available explicitly, * so users of this data have this info, too. */ - kbdev->pm.backend.metrics.values.time_in_protm += - protected_time; + kbdev->pm.backend.metrics.values.time_in_protm += protected_time; } kbdev->pm.backend.metrics.time_period_start = now; @@ -325,8 +299,7 @@ static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) } #endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ #else -static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, - ktime_t now) +static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ktime_t now) { ktime_t diff; @@ -337,7 +310,7 @@ static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, return false; if (kbdev->pm.backend.metrics.gpu_active) { - u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + u32 ns_time = (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); kbdev->pm.backend.metrics.values.time_busy += ns_time; if (kbdev->pm.backend.metrics.active_cl_ctx[0]) @@ -358,11 +331,10 @@ static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, kbdev->pm.backend.metrics.time_period_start = now; return true; } -#endif /* MALI_USE_CSF */ +#endif /* MALI_USE_CSF */ #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) -void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, - struct kbasep_pm_metrics *last, +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last, struct kbasep_pm_metrics *diff) { struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; @@ -412,11 +384,9 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) diff = &kbdev->pm.backend.metrics.dvfs_diff; - kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, - diff); + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); - utilisation = (100 * diff->time_busy) / - max(diff->time_busy + diff->time_idle, 1u); + utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u); #if !MALI_USE_CSF busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); @@ -425,8 +395,7 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; - kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, - util_cl_share); + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); #else /* Note that, at present, we don't pass protected-mode time to the * platform here. It's unlikely to be useful, however, as the platform @@ -469,7 +438,6 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev) atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); } - #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if !MALI_USE_CSF @@ -502,12 +470,12 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) katom = kbase_gpu_inspect(kbdev, js, 1); - if (katom && katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - int device_nr = (katom->core_req & - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) - ? katom->device_nr : 0; + u32 device_nr = + (katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? + katom->device_nr : + 0; if (!WARN_ON(device_nr >= 2)) kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1; } else { diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 7d7650c..070fd27 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -24,7 +24,7 @@ */ #include <mali_kbase.h> -#include <gpu/mali_kbase_gpu_regmap.h> +#include <hw_access/mali_kbase_hw_access_regmap.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_reset_gpu.h> @@ -52,7 +52,7 @@ void kbase_pm_policy_init(struct kbase_device *kbdev) struct device_node *np = kbdev->dev->of_node; const char *power_policy_name; unsigned long flags; - int i; + unsigned int i; /* Read "power-policy" property and fallback to "power_policy" if not found */ if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) || @@ -106,13 +106,13 @@ void kbase_pm_update_active(struct kbase_device *kbdev) active = backend->pm_current_policy->get_core_active(kbdev); WARN((kbase_pm_is_active(kbdev) && !active), - "GPU is active but policy '%s' is indicating that it can be powered off", - kbdev->pm.backend.pm_current_policy->name); + "GPU is active but policy '%s' is indicating that it can be powered off", + kbdev->pm.backend.pm_current_policy->name); if (active) { /* Power on the GPU and any cores requested by the policy */ if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && - pm->backend.poweroff_wait_in_progress) { + pm->backend.poweroff_wait_in_progress) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); pm->backend.poweron_required = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -213,7 +213,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) #endif if (kbdev->pm.backend.shaders_desired != shaders_desired) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, + kbdev->pm.backend.shaders_desired); kbdev->pm.backend.shaders_desired = shaders_desired; kbase_pm_update_state(kbdev); @@ -231,9 +232,10 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -int kbase_pm_list_policies(struct kbase_device *kbdev, - const struct kbase_pm_policy * const **list) +size_t kbase_pm_list_policies(struct kbase_device *kbdev, + const struct kbase_pm_policy *const **list) { + CSTD_UNUSED(kbdev); if (list) *list = all_policy_list; @@ -265,32 +267,29 @@ static int policy_change_wait_for_L2_off(struct kbase_device *kbdev) * for host control of shader cores. */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.l2_state == KBASE_L2_OFF, + timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); #endif if (!remaining) { err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info(kbdev->dev, - "Wait for L2_off got interrupted"); + dev_info(kbdev->dev, "Wait for L2_off got interrupted"); err = (int)remaining; } - dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, - err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); + dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, err, + kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); return err; } #endif -void kbase_pm_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_policy *new_policy) +void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *new_policy) { const struct kbase_pm_policy *old_policy; unsigned long flags; @@ -300,7 +299,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, bool reset_gpu = false; bool reset_op_prevented = true; struct kbase_csf_scheduler *scheduler = NULL; - u32 pwroff; + u64 pwroff_ns; bool switching_to_always_on; #endif @@ -310,12 +309,14 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); #if MALI_USE_CSF - pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + pwroff_ns = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops; - if (pwroff == 0 && !switching_to_always_on) { - dev_warn(kbdev->dev, + if (pwroff_ns == 0 && !switching_to_always_on) { + dev_warn( + kbdev->dev, "power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n"); - dev_warn(kbdev->dev, + dev_warn( + kbdev->dev, "power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n"); kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); } @@ -391,8 +392,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, if (old_policy->term) old_policy->term(kbdev); - memset(&kbdev->pm.backend.pm_policy_data, 0, - sizeof(union kbase_pm_policy_data)); + memset(&kbdev->pm.backend.pm_policy_data, 0, sizeof(union kbase_pm_policy_data)); KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); if (new_policy->init) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h index e811365..aa9ed9c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,8 +80,7 @@ void kbase_pm_update_cores(struct kbase_device *kbdev); * Return: true if the request to the HW was successfully made else false if the * request is still pending. */ -static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, - bool shader_required) +static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, bool shader_required) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -89,14 +88,14 @@ static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, * available, and shaders are definitely not powered. */ if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && - kbdev->pm.backend.l2_state != KBASE_L2_ON && - kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) + kbdev->pm.backend.l2_state != KBASE_L2_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) return false; if (shader_required && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) + kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) return false; return true; diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 28365c0..dfdf469 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -22,7 +22,6 @@ #include <mali_kbase.h> #include <mali_kbase_hwaccess_time.h> #if MALI_USE_CSF -#include <asm/arch_timer.h> #include <linux/gcd.h> #include <csf/mali_kbase_csf_timeout.h> #endif @@ -30,6 +29,11 @@ #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> #include <linux/version_compat_defs.h> +#include <asm/arch_timer.h> + +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) +#include <backend/gpu/mali_kbase_model_linux.h> +#endif struct kbase_timeout_info { char *selector_str; @@ -53,6 +57,15 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, [KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT", KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES }, + [KBASE_PRFCNT_ACTIVE_TIMEOUT] = { "KBASE_PRFCNT_ACTIVE_TIMEOUT", + KBASE_PRFCNT_ACTIVE_TIMEOUT_CYCLES }, + [KBASE_CLEAN_CACHE_TIMEOUT] = { "KBASE_CLEAN_CACHE_TIMEOUT", + KBASE_CLEAN_CACHE_TIMEOUT_CYCLES }, + [KBASE_AS_INACTIVE_TIMEOUT] = { "KBASE_AS_INACTIVE_TIMEOUT", + KBASE_AS_INACTIVE_TIMEOUT_CYCLES }, + [IPA_INACTIVE_TIMEOUT] = { "IPA_INACTIVE_TIMEOUT", IPA_INACTIVE_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_STOP_TIMEOUT] = { "CSF_FIRMWARE_STOP_TIMEOUT", + CSF_FIRMWARE_STOP_TIMEOUT_CYCLES }, }; #else static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { @@ -60,30 +73,23 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, [JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT", JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES }, + [KBASE_PRFCNT_ACTIVE_TIMEOUT] = { "KBASE_PRFCNT_ACTIVE_TIMEOUT", + KBASE_PRFCNT_ACTIVE_TIMEOUT_CYCLES }, + [KBASE_CLEAN_CACHE_TIMEOUT] = { "KBASE_CLEAN_CACHE_TIMEOUT", + KBASE_CLEAN_CACHE_TIMEOUT_CYCLES }, + [KBASE_AS_INACTIVE_TIMEOUT] = { "KBASE_AS_INACTIVE_TIMEOUT", + KBASE_AS_INACTIVE_TIMEOUT_CYCLES }, }; #endif -void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, - u64 *cycle_counter, - u64 *system_time, - struct timespec64 *ts) +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts) { - u32 hi1, hi2; - if (cycle_counter) *cycle_counter = kbase_backend_get_cycle_cnt(kbdev); if (system_time) { - /* Read hi, lo, hi to ensure a coherent u64 */ - do { - hi1 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_HI)); - *system_time = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_LO)); - hi2 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_HI)); - } while (hi1 != hi2); - *system_time |= (((u64) hi1) << 32); + *system_time = kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP)); } /* Record the CPU's idea of current time */ @@ -113,7 +119,7 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); while (time_is_after_jiffies(remaining)) { - if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + if ((kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) & GPU_STATUS_CYCLE_COUNT_ACTIVE)) { success = true; break; @@ -124,18 +130,15 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) } #endif -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec64 *ts) +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, + struct timespec64 *ts) { #if !MALI_USE_CSF kbase_pm_request_gpu_cycle_counter(kbdev); - WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, - "L2 not powered up"); - WARN_ONCE((!timedwait_cycle_count_active(kbdev)), - "Timed out on CYCLE_COUNT_ACTIVE"); + WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, "L2 not powered up"); + WARN_ONCE((!timedwait_cycle_count_active(kbdev)), "Timed out on CYCLE_COUNT_ACTIVE"); #endif - kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, - ts); + kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, ts); #if !MALI_USE_CSF kbase_pm_release_gpu_cycle_counter(kbdev); #endif @@ -270,19 +273,7 @@ KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) { - u32 hi1, hi2, lo; - - /* Read hi, lo, hi to ensure a coherent u64 */ - do { - hi1 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_HI)); - lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_LO)); - hi2 = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_HI)); - } while (hi1 != hi2); - - return lo | (((u64) hi1) << 32); + return kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(CYCLE_COUNT)); } #if MALI_USE_CSF @@ -310,10 +301,23 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); if (cpu_ts) - *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + *cpu_ts = (u64)(ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec); } #endif +u64 kbase_arch_timer_get_cntfrq(struct kbase_device *kbdev) +{ + u64 freq = arch_timer_get_cntfrq(); + +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) + freq = midgard_model_arch_timer_get_cntfrq(kbdev->model); +#endif + + dev_dbg(kbdev->dev, "System Timer Freq = %lluHz", freq); + + return freq; +} + int kbase_backend_time_init(struct kbase_device *kbdev) { int err = 0; @@ -325,7 +329,7 @@ int kbase_backend_time_init(struct kbase_device *kbdev) kbase_pm_register_access_enable(kbdev); get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); - freq = arch_timer_get_cntfrq(); + freq = kbase_arch_timer_get_cntfrq(kbdev); if (!freq) { dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); @@ -344,8 +348,9 @@ int kbase_backend_time_init(struct kbase_device *kbdev) goto disable_registers; } - kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, - kbdev->backend_time.divisor); + kbdev->backend_time.offset = + (s64)(cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, + kbdev->backend_time.divisor)); #endif if (kbase_timeout_scaling_init(kbdev)) { |