diff options
author | Debarshi Dutta <debarshid@google.com> | 2023-06-02 13:36:22 +0000 |
---|---|---|
committer | Debarshi Dutta <debarshid@google.com> | 2023-07-12 18:55:15 +0000 |
commit | 20fff721667a227b3d6decf9dbc3798476390302 (patch) | |
tree | fba7129be28198dc2af1fb34fe0ec3a9ec0ce572 /mali_kbase/backend | |
parent | 9e12ba5986f91fa0192b1ab55fafcea5e9b37094 (diff) | |
download | gpu-20fff721667a227b3d6decf9dbc3798476390302.tar.gz |
Merge upstream DDK R43P0 KMD
Merge DDK version R43P0 from upstream branch
Provenance: 48a9c7e25986318c8475bc245de51e7bec2606e8 (ipdelivery/EAC/v_r43p0)
VX504X08X-BU-00000-r43p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r43p0-01eac0 - Valhall Android Document Bundle
VX504X08X-DC-11001-r43p0-01eac0 - Valhall Android DDK Software Errata
VX504X08X-SW-99006-r43p0-01eac0 - Valhall Android Renderscript AOSP parts
Bug 278174418
Commit-Topic: R43P0_KMD
Signed-off-by: Debarshi Dutta <debarshid@google.com>
Change-Id: I84fb19e7ce5f28e735d44a4993d51bd985aac80b
Diffstat (limited to 'mali_kbase/backend')
24 files changed, 921 insertions, 462 deletions
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index 611b16b..7df24c3 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -49,8 +49,12 @@ endif mali_kbase-$(CONFIG_MALI_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o -# Dummy model +ifneq ($(CONFIG_MALI_REAL_HW),y) + mali_kbase-y += backend/gpu/mali_kbase_model_linux.o +endif + +# NO_MALI Dummy model interface mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o # HW error simulation mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o + diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c index 9587c70..7c0abba 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,12 +22,32 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" #include <device/mali_kbase_device.h> +/** + * kbasep_amba_register_present() - Check AMBA_<> register is present + * in the GPU. + * @kbdev: Device pointer + * + * Note: Only for arch version 12.x.1 onwards. + * + * Return: true if AMBA_FEATURES/ENABLE registers are present. + */ +static bool kbasep_amba_register_present(struct kbase_device *kbdev) +{ + return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >= + GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1)); +} void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { kbdev->current_gpu_coherency_mode = mode; + if (kbasep_amba_register_present(kbdev)) { + u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + + val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); + kbase_reg_write(kbdev, AMBA_ENABLE, val); + } else kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); } @@ -35,9 +55,38 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) { u32 coherency_features; + if (kbasep_amba_register_present(kbdev)) + coherency_features = + kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES)); + else coherency_features = kbase_reg_read( kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); return coherency_features; } +void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, + bool enable) +{ + if (kbasep_amba_register_present(kbdev)) { + u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + + val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); + kbase_reg_write(kbdev, AMBA_ENABLE, val); + + } else { + WARN(1, "memory_cache_support not supported"); + } +} + +void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) +{ + if (kbasep_amba_register_present(kbdev)) { + u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + + val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); + kbase_reg_write(kbdev, AMBA_ENABLE, val); + } else { + WARN(1, "invalidate_hint not supported"); + } +} diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 13c79d6..8cd8090 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,4 +43,23 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, */ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); +/** + * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support + * in the GPU. + * @kbdev: Device pointer + * @enable: true for enable. + * + * Note: Only for arch version 12.x.1 onwards. + */ +void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, + bool enable); +/** + * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint + * in the GPU. + * @kbdev: Device pointer + * @enable: true for enable. + * + * Note: Only for arch version 12.x.1 onwards. + */ +void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 09c1863..a389cd9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -631,7 +631,6 @@ static void kbase_devfreq_work_term(struct kbase_device *kbdev) destroy_workqueue(workq); } - int kbase_devfreq_init(struct kbase_device *kbdev) { struct devfreq_dev_profile *dp; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h index 7190f42..bd2eb8a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,7 @@ #ifndef _KBASE_INSTR_DEFS_H_ #define _KBASE_INSTR_DEFS_H_ -#include <mali_kbase_hwcnt_gpu.h> +#include <hwcnt/mali_kbase_hwcnt_gpu.h> /* * Instrumentation State Machine States diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index a29f7ef..ef09c6b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,12 +25,12 @@ #include <linux/interrupt.h> -#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +#if IS_ENABLED(CONFIG_MALI_REAL_HW) /* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 static void *kbase_tag(void *ptr, u32 tag) { @@ -163,7 +163,6 @@ static irq_handler_t kbase_handler_table[] = { #ifdef CONFIG_MALI_DEBUG #define JOB_IRQ_HANDLER JOB_IRQ_TAG -#define MMU_IRQ_HANDLER MMU_IRQ_TAG #define GPU_IRQ_HANDLER GPU_IRQ_TAG /** @@ -501,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); -#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_as.c b/mali_kbase/backend/gpu/mali_kbase_jm_as.c index 309e5c7..7059c84 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_as.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_as.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, kbase_js_runpool_inc_context_count(kbdev, kctx); } -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js) { int i; @@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, return true; } - diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 3062597..72926bc 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,7 +34,7 @@ #include <mali_kbase_ctx_sched.h> #include <mali_kbase_kinstr_jm.h> #include <mali_kbase_hwaccess_instr.h> -#include <mali_kbase_hwcnt_context.h> +#include <hwcnt/mali_kbase_hwcnt_context.h> #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_irq_internal.h> #include <backend/gpu/mali_kbase_jm_internal.h> @@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, const u64 affinity, const u64 limited_core_mask); -static u64 kbase_job_write_affinity(struct kbase_device *kbdev, - base_jd_core_req core_req, - int js, const u64 limited_core_mask) +static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, + unsigned int js, const u64 limited_core_mask) { u64 affinity; bool skip_affinity_check = false; @@ -191,7 +190,28 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) +static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js, + struct kbase_context *kctx) +{ + const ktime_t wait_loop_start = ktime_get_raw(); + const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms; + s64 diff = 0; + + /* wait for the JS_COMMAND_NEXT register to reach the given status value */ + do { + if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT))) + return true; + + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while (diff < max_timeout); + + dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js, + kctx->tgid, kctx->id); + + return false; +} + +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js) { struct kbase_context *kctx; u32 cfg; @@ -204,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, kctx = katom->kctx; /* Command register must be available */ - if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx), - "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx)) + if (!kbasep_jm_wait_js_free(kbdev, js, kctx)) return -EPERM; dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", @@ -355,10 +374,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * work out the best estimate (which might still result in an over-estimate to * the calculated time spent) */ -static void kbasep_job_slot_update_head_start_timestamp( - struct kbase_device *kbdev, - int js, - ktime_t end_timestamp) +static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js, + ktime_t end_timestamp) { ktime_t timestamp_diff; struct kbase_jd_atom *katom; @@ -388,8 +405,7 @@ static void kbasep_job_slot_update_head_start_timestamp( * Make a tracepoint call to the instrumentation module informing that * softstop happened on given lpu (job slot). */ -static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, - int js) +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) { KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( kbdev, @@ -398,7 +414,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, void kbase_job_done(struct kbase_device *kbdev, u32 done) { - int i; u32 count = 0; ktime_t end_timestamp; @@ -409,6 +424,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp = ktime_get_raw(); while (done) { + unsigned int i; u32 failed = done >> 16; /* treat failed slots as finished slots */ @@ -418,8 +434,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * numbered interrupts before the higher numbered ones. */ i = ffs(finished) - 1; - if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__)) - break; do { int nr_done; @@ -618,11 +632,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom) +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, + u32 action, base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom) { #if KBASE_KTRACE_ENABLE u32 status_reg_before; @@ -680,6 +692,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, struct kbase_context *head_kctx; head = kbase_gpu_inspect(kbdev, js, 0); + if (unlikely(!head)) { + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); + return; + } head_kctx = head->kctx; if (status_reg_before == BASE_JD_EVENT_ACTIVE) @@ -748,7 +764,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; - int i; + unsigned int i; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -760,7 +776,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; - int target_js = target_katom->slot_nr; + unsigned int target_js = target_katom->slot_nr; int i; bool stop_sent = false; @@ -938,8 +954,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); * * Where possible any job in the next register is evicted before the soft-stop. */ -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags) +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) { dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); @@ -959,8 +975,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); } -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom) +void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, + struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev = kctx->kbdev; @@ -1264,7 +1280,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) { - int i; + unsigned int i; int pending_jobs = 0; /* Count the number of jobs */ @@ -1444,6 +1460,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) return true; } +bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) +{ + return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING; +} + int kbase_reset_gpu_wait(struct kbase_device *kbdev) { wait_event(kbdev->hwaccess.backend.reset_wait, diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 1ebb843..bfd55a6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,21 +34,6 @@ #include <device/mali_kbase_device.h> /** - * kbase_job_submit_nolock() - Submit a job to a certain job-slot - * @kbdev: Device pointer - * @katom: Atom to submit - * @js: Job slot to submit on - * - * The caller must check kbasep_jm_is_submit_slots_free() != false before - * calling this. - * - * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock - */ -void kbase_job_submit_nolock(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, int js); - -/** * kbase_job_done_slot() - Complete the head job on a particular job-slot * @kbdev: Device pointer * @s: Job slot @@ -60,22 +45,13 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp); #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -static inline char *kbasep_make_job_slot_string(int js, char *js_string, - size_t js_size) +static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%i", js); + snprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif -#if !MALI_USE_CSF -static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, - struct kbase_context *kctx) -{ - return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); -} -#endif - /** * kbase_job_hw_submit() - Submit a job to the GPU * @kbdev: Device pointer @@ -90,7 +66,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, * * Return: 0 if the job was successfully submitted to hardware, an error otherwise. */ -int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js); #if !MALI_USE_CSF /** @@ -106,11 +82,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, * The following locking conditions are made on the caller: * - it must hold the hwaccess_lock */ -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom); +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, + u32 action, base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); #endif /* !MALI_USE_CSF */ /** @@ -134,11 +108,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * * Return: true if an atom was stopped, false otherwise */ -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action); +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js, struct kbase_jd_atom *katom, u32 action); /** * kbase_job_slot_init - Initialise job slot framework diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 4fe8046..f4094a3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,7 +29,7 @@ #include <mali_kbase_jm.h> #include <mali_kbase_js.h> #include <tl/mali_kbase_tracepoints.h> -#include <mali_kbase_hwcnt_context.h> +#include <hwcnt/mali_kbase_hwcnt_context.h> #include <mali_kbase_reset_gpu.h> #include <mali_kbase_kinstr_jm.h> #include <backend/gpu/mali_kbase_cache_policy_backend.h> @@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, * * Return: Atom removed from ringbuffer */ -static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, - int js, - ktime_t *end_timestamp) +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js, + ktime_t *end_timestamp) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; struct kbase_jd_atom *katom; @@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, return katom; } -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx) +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; } -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js) +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; @@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) { - int js; - int i; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int i; + for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) return false; } -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) { int nr = 0; int i; @@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) return nr; } -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js) { int nr = 0; int i; @@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) return nr; } -static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, - enum kbase_atom_gpu_rb_state min_rb_state) +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js, + enum kbase_atom_gpu_rb_state min_rb_state) { int nr = 0; int i; @@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, bool secure) { - int js, i; + unsigned int js; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int i; + for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, return false; } -int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) { lockdep_assert_held(&kbdev->hwaccess_lock); @@ -429,9 +429,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, * * Return: true if any slots other than @js are busy, false otherwise */ -static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) { - int slot; + unsigned int slot; for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { if (slot == js) @@ -843,7 +843,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, void kbase_backend_slot_update(struct kbase_device *kbdev) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1000,36 +1000,34 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) other_slots_busy(kbdev, js)) break; -#ifdef CONFIG_MALI_GEM5_BUILD - if (!kbasep_jm_is_js_free(kbdev, js, - katom[idx]->kctx)) - break; -#endif /* Check if this job needs the cycle counter * enabled before submission */ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) - kbase_pm_request_gpu_cycle_counter_l2_is_on( - kbdev); + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); - if (!kbase_job_hw_submit(kbdev, katom[idx], js)) + if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; - else - break; - kbasep_platform_event_work_begin(katom[idx]); + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. + */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + + /* Inform platform at start/finish of atom */ + kbasep_platform_event_work_begin(katom[idx]); + } else { + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + break; + } /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_GPU_RB_SUBMITTED: - - /* Inform power management at start/finish of - * atom so it can update its GPU utilisation - * metrics. - */ - kbase_pm_metrics_update(kbdev, - &katom[idx]->start_timestamp); - break; case KBASE_ATOM_GPU_RB_RETURN_TO_JS: @@ -1109,8 +1107,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, * * Return: true if an atom was evicted, false otherwise. */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code) +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code) { struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; @@ -1118,6 +1115,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, lockdep_assert_held(&kbdev->hwaccess_lock); katom = kbase_gpu_inspect(kbdev, js, 0); + if (!katom) { + dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js); + return false; + } next_katom = kbase_gpu_inspect(kbdev, js, 1); if (next_katom && @@ -1181,13 +1182,19 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, * otherwise we would be in the incorrect state of having an atom both running * on the HW and returned to the JS. */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp) + +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); - struct kbase_context *kctx = katom->kctx; + struct kbase_context *kctx = NULL; + + if (unlikely(!katom)) { + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); + return; + } + + kctx = katom->kctx; dev_dbg(kbdev->dev, "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", @@ -1240,7 +1247,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } } else if (completion_code != BASE_JD_EVENT_DONE) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - int i; + unsigned int i; if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", @@ -1385,7 +1392,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1413,7 +1420,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_gpu_in_protected_mode(kbdev)); WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); + "Protected atom on JS%u not supported", js); } if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && !kbase_ctx_flag(katom->kctx, KCTX_DYING)) @@ -1509,10 +1516,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev, return ret; } -static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, - int js, - struct kbase_jd_atom *katom, - u32 action) +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js, + struct kbase_jd_atom *katom, u32 action) { struct kbase_context *kctx = katom->kctx; u32 hw_action = action & JS_COMMAND_MASK; @@ -1556,11 +1561,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) return -1; } -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action) +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, + unsigned int js, struct kbase_jd_atom *katom, u32 action) { struct kbase_jd_atom *katom_idx0; struct kbase_context *kctx_idx0 = NULL; @@ -1813,7 +1815,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, void kbase_gpu_dump_slots(struct kbase_device *kbdev) { unsigned long flags; - int js; + unsigned int js; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1828,12 +1830,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) idx); if (katom) - dev_info(kbdev->dev, - " js%d idx%d : katom=%pK gpu_rb_state=%d\n", - js, idx, katom, katom->gpu_rb_state); + dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); else - dev_info(kbdev->dev, " js%d idx%d : empty\n", - js, idx); + dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx); } } @@ -1842,7 +1842,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) { - int js; + unsigned int js; bool tracked = false; lockdep_assert_held(&kbdev->hwaccess_lock); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h index d3ff203..32be0bf 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,8 +40,7 @@ * * Return: true if job evicted from NEXT registers, false otherwise */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code); +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code); /** * kbase_gpu_complete_hw - Complete an atom on job slot js @@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, * completed * @end_timestamp: Time of completion */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp); +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); /** * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer @@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * Return: The atom at that position in the ringbuffer * or NULL if no atom present */ -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx); +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx); /** * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index 02d7cdb..0ed04bb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; struct kbase_backend_data *backend; - int s; + unsigned int s; bool reset_needed = false; KBASE_DEBUG_ASSERT(timer != NULL); @@ -365,4 +365,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev) backend->timeouts_updated = true; } - diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 961a951..dd16fb2 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -62,8 +62,9 @@ * document */ #include <mali_kbase.h> +#include <device/mali_kbase_device.h> #include <gpu/mali_kbase_gpu_regmap.h> -#include <backend/gpu/mali_kbase_model_dummy.h> +#include <backend/gpu/mali_kbase_model_linux.h> #include <mali_kbase_mem_linux.h> #if MALI_USE_CSF @@ -80,67 +81,23 @@ static bool ipa_control_timer_enabled; #endif #define LO_MASK(M) ((M) & 0xFFFFFFFF) -#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) - -static u32 get_implementation_register(u32 reg) -{ - switch (reg) { - case GPU_CONTROL_REG(SHADER_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT); - case GPU_CONTROL_REG(TILER_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); - case GPU_CONTROL_REG(L2_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); - case GPU_CONTROL_REG(STACK_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT); - - case GPU_CONTROL_REG(SHADER_PRESENT_HI): - case GPU_CONTROL_REG(TILER_PRESENT_HI): - case GPU_CONTROL_REG(L2_PRESENT_HI): - case GPU_CONTROL_REG(STACK_PRESENT_HI): - /* *** FALLTHROUGH *** */ - default: - return 0; - } -} - -struct { - spinlock_t access_lock; #if !MALI_USE_CSF - unsigned long prfcnt_base; -#endif /* !MALI_USE_CSF */ - u32 *prfcnt_base_cpu; - - u32 time; - - struct gpu_model_prfcnt_en prfcnt_en; - - u64 l2_present; - u64 shader_present; +#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) +#endif -#if !MALI_USE_CSF - u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +/* Construct a value for the THREAD_FEATURES register, *except* the two most + * significant bits, which are set to IMPLEMENTATION_MODEL in + * midgard_model_read_reg(). + */ +#if MALI_USE_CSF +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ + ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) #else - u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; -#endif /* !MALI_USE_CSF */ - u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; - -} performance_counters = { - .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -}; +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ + ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) +#endif -struct job_slot { - int job_active; - int job_queued; - int job_complete_irq_asserted; - int job_irq_mask; - int job_disabled; -}; +struct error_status_t hw_error_status; /** * struct control_reg_values_t - control register values specific to the GPU being 'emulated' @@ -158,6 +115,9 @@ struct job_slot { * @mmu_features: MMU features * @gpu_features_lo: GPU features (low) * @gpu_features_hi: GPU features (high) + * @shader_present: Available shader bitmap + * @stack_present: Core stack present bitmap + * */ struct control_reg_values_t { const char *name; @@ -172,6 +132,16 @@ struct control_reg_values_t { u32 mmu_features; u32 gpu_features_lo; u32 gpu_features_hi; + u32 shader_present; + u32 stack_present; +}; + +struct job_slot { + int job_active; + int job_queued; + int job_complete_irq_asserted; + int job_irq_mask; + int job_disabled; }; struct dummy_model_t { @@ -184,6 +154,10 @@ struct dummy_model_t { int power_changed; /* 1bit */ bool clean_caches_completed; bool clean_caches_completed_irq_enabled; +#if MALI_USE_CSF + bool flush_pa_range_completed; + bool flush_pa_range_completed_irq_enabled; +#endif int power_on; /* 6bits: SHADER[4],TILER,L2 */ u32 stack_power_on_lo; u32 coherency_enable; @@ -194,45 +168,6 @@ struct dummy_model_t { void *data; }; -void gpu_device_set_data(void *model, void *data) -{ - struct dummy_model_t *dummy = (struct dummy_model_t *)model; - - dummy->data = data; -} - -void *gpu_device_get_data(void *model) -{ - struct dummy_model_t *dummy = (struct dummy_model_t *)model; - - return dummy->data; -} - -#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 - -/* SCons should pass in a default GPU, but other ways of building (e.g. - * in-tree) won't, so define one here in case. - */ -#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU -#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" -#endif - -static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; -module_param(no_mali_gpu, charp, 0000); -MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); - -/* Construct a value for the THREAD_FEATURES register, *except* the two most - * significant bits, which are set to IMPLEMENTATION_MODEL in - * midgard_model_read_reg(). - */ -#if MALI_USE_CSF -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ - ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) -#else -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ - ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) -#endif - /* Array associating GPU names with control register values. The first * one is used in the case of no match. */ @@ -249,6 +184,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tHEx", @@ -262,6 +199,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tSIx", @@ -275,6 +214,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2821, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tDVx", @@ -288,6 +229,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2821, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tNOx", @@ -301,6 +244,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tGOx_r0p0", @@ -314,6 +259,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tGOx_r1p0", @@ -328,6 +275,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2823, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tTRx", @@ -341,6 +290,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tNAx", @@ -354,6 +305,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tBEx", @@ -367,6 +320,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tBAx", @@ -380,19 +335,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, - }, - { - .name = "tDUx", - .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tODx", @@ -406,6 +350,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tGRx", @@ -420,6 +366,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tVAx", @@ -434,6 +382,8 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, }, { .name = "tTUx", @@ -448,10 +398,102 @@ static const struct control_reg_values_t all_control_reg_values[] = { .mmu_features = 0x2830, .gpu_features_lo = 0xf, .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, + .stack_present = 0xF, + }, + { + .name = "tTIx", + .gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x800, + .thread_max_workgroup_size = 0x400, + .thread_max_barrier_size = 0x400, + .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0), + .core_features = 0x1, /* core_1e64fma4tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0xf, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, + .stack_present = 0xF, }, }; -struct error_status_t hw_error_status; +static struct { + spinlock_t access_lock; +#if !MALI_USE_CSF + unsigned long prfcnt_base; +#endif /* !MALI_USE_CSF */ + u32 *prfcnt_base_cpu; + + u32 time; + + struct gpu_model_prfcnt_en prfcnt_en; + + u64 l2_present; + u64 shader_present; + +#if !MALI_USE_CSF + u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +#else + u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +#endif /* !MALI_USE_CSF */ + u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * + KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * + KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +} performance_counters; + +static u32 get_implementation_register(u32 reg, + const struct control_reg_values_t *const control_reg_values) +{ + switch (reg) { + case GPU_CONTROL_REG(SHADER_PRESENT_LO): + return LO_MASK(control_reg_values->shader_present); + case GPU_CONTROL_REG(TILER_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); + case GPU_CONTROL_REG(L2_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); + case GPU_CONTROL_REG(STACK_PRESENT_LO): + return LO_MASK(control_reg_values->stack_present); + + case GPU_CONTROL_REG(SHADER_PRESENT_HI): + case GPU_CONTROL_REG(TILER_PRESENT_HI): + case GPU_CONTROL_REG(L2_PRESENT_HI): + case GPU_CONTROL_REG(STACK_PRESENT_HI): + /* *** FALLTHROUGH *** */ + default: + return 0; + } +} + +void gpu_device_set_data(void *model, void *data) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)model; + + dummy->data = data; +} + +void *gpu_device_get_data(void *model) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)model; + + return dummy->data; +} + +#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 + +/* SCons should pass in a default GPU, but other ways of building (e.g. + * in-tree) won't, so define one here in case. + */ +#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU +#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" +#endif + +static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; +module_param(no_mali_gpu, charp, 0000); +MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); #if MALI_USE_CSF static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, @@ -474,17 +516,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; /* Currently only primary counter blocks are supported */ - if (WARN_ON(event_index >= 64)) + if (WARN_ON(event_index >= + (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) return 0; /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for * IPA counters. If selected, the value returned for them will be zero. */ - if (WARN_ON(event_index <= 3)) + if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS)) return 0; - event_index -= 4; + event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; spin_lock_irqsave(&performance_counters.access_lock, flags); @@ -680,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model) spin_lock_irqsave(&hw_error_status.access_lock, flags); hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); + gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ); } #endif /* !MALI_USE_CSF */ @@ -712,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy) performance_counters.time = 0; } -static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) +static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot) { lockdep_assert_held(&hw_error_status.access_lock); @@ -1011,6 +1054,21 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp size_t i; const struct control_reg_values_t *ret = NULL; + /* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1 + * revisions respectively. As none of them are named "tGOx" the name comparison + * needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0" + * or "r1p0" and is derived from the DDK's build configuration. In cases + * where it is unavailable, it defaults to tGOx r1p0. + */ + if (!strcmp(gpu, "tGOx")) { +#ifdef CONFIG_GPU_HWVER + if (!strcmp(CONFIG_GPU_HWVER, "r0p0")) + gpu = "tGOx_r0p0"; + else if (!strcmp(CONFIG_GPU_HWVER, "r1p0")) +#endif /* CONFIG_GPU_HWVER defined */ + gpu = "tGOx_r1p0"; + } + for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; @@ -1030,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp return ret; } -void *midgard_model_create(const void *config) +void *midgard_model_create(struct kbase_device *kbdev) { struct dummy_model_t *dummy = NULL; @@ -1043,7 +1101,16 @@ void *midgard_model_create(const void *config) dummy->job_irq_js_state = 0; init_register_statuses(dummy); dummy->control_reg_values = find_control_reg_values(no_mali_gpu); + performance_counters.l2_present = get_implementation_register( + GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); + performance_counters.shader_present = get_implementation_register( + GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + + gpu_device_set_data(dummy, kbdev); + + dev_info(kbdev->dev, "Using Dummy Model"); } + return dummy; } @@ -1059,19 +1126,21 @@ static void midgard_model_get_outputs(void *h) lockdep_assert_held(&hw_error_status.access_lock); if (hw_error_status.job_irq_status) - gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || hw_error_status.gpu_error_irq || #if !MALI_USE_CSF dummy->prfcnt_sample_completed || +#else + (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || #endif (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) - gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) - gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ); + gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); } static void midgard_model_update(void *h) @@ -1138,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) } } -u8 midgard_model_write_reg(void *h, u32 addr, u32 value) +void midgard_model_write_reg(void *h, u32 addr, u32 value) { unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; @@ -1148,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) #if !MALI_USE_CSF if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { - int slot_idx = (addr >> 7) & 0xf; + unsigned int slot_idx = (addr >> 7) & 0xf; KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { @@ -1235,6 +1304,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->reset_completed_mask = (value >> 8) & 0x01; dummy->power_changed_mask = (value >> 9) & 0x03; dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u; +#if MALI_USE_CSF + dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u; +#endif } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { dummy->coherency_enable = value; } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { @@ -1247,10 +1319,17 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) if (value & (1 << 17)) dummy->clean_caches_completed = false; -#if !MALI_USE_CSF - if (value & PRFCNT_SAMPLE_COMPLETED) + +#if MALI_USE_CSF + if (value & (1u << 20)) + dummy->flush_pa_range_completed = false; +#endif /* MALI_USE_CSF */ + +#if !MALI_USE_CSF + if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */ dummy->prfcnt_sample_completed = 0; #endif /* !MALI_USE_CSF */ + /*update error status */ hw_error_status.gpu_error_irq &= ~(value); } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { @@ -1274,7 +1353,15 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) pr_debug("clean caches requested"); dummy->clean_caches_completed = true; break; -#if !MALI_USE_CSF +#if MALI_USE_CSF + case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2: + case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC: + case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL: + pr_debug("pa range flush requested"); + dummy->flush_pa_range_completed = true; + break; +#endif /* MALI_USE_CSF */ +#if !MALI_USE_CSF case GPU_COMMAND_PRFCNT_SAMPLE: midgard_model_dump_prfcnt(); dummy->prfcnt_sample_completed = 1; @@ -1282,6 +1369,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) default: break; } +#if MALI_USE_CSF + } else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) && + addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) { + /* Writes ignored */ +#endif } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { dummy->l2_config = value; } @@ -1291,6 +1383,12 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; + } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && + (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + /* Do nothing */ + } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && + (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { + /* Do nothing */ } else if (addr == IPA_CONTROL_REG(COMMAND)) { pr_debug("Received IPA_CONTROL command"); } else if (addr == IPA_CONTROL_REG(TIMER)) { @@ -1315,8 +1413,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) hw_error_status.mmu_irq_mask = value; } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); - } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && - (addr <= MMU_AS_REG(15, AS_STATUS))) { + } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) >> 6; @@ -1443,7 +1540,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->power_changed = 1; break; case SHADER_PWRON_LO: - dummy->power_on |= (value & 0xF) << 2; + dummy->power_on |= + (value & dummy->control_reg_values->shader_present) << 2; dummy->power_changed = 1; break; case L2_PWRON_LO: @@ -1459,7 +1557,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->power_changed = 1; break; case SHADER_PWROFF_LO: - dummy->power_on &= ~((value & 0xF) << 2); + dummy->power_on &= + ~((value & dummy->control_reg_values->shader_present) << 2); dummy->power_changed = 1; break; case L2_PWROFF_LO: @@ -1500,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) midgard_model_update(dummy); midgard_model_get_outputs(dummy); spin_unlock_irqrestore(&hw_error_status.access_lock, flags); - - return 1; } -u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) +void midgard_model_read_reg(void *h, u32 addr, u32 *const value) { unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; @@ -1546,6 +1643,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | +#if MALI_USE_CSF + ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) | +#endif (dummy->power_changed_mask << 9) | (1 << 7) | 1; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { @@ -1555,6 +1655,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | #endif /* !MALI_USE_CSF */ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | +#if MALI_USE_CSF + ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) | +#endif hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { @@ -1569,6 +1672,13 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) 1u : 0u) << 17) | +#if MALI_USE_CSF + (((dummy->flush_pa_range_completed && + dummy->flush_pa_range_completed_irq_enabled) ? + 1u : + 0u) + << 20) | +#endif hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { @@ -1581,8 +1691,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = hw_error_status.gpu_fault_status; } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { *value = dummy->l2_config; - } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && - (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { + } +#if MALI_USE_CSF + else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && + (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + *value = 0; + } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && + (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { + *value = 0; + } +#endif + else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && + (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { switch (addr) { case GPU_CONTROL_REG(SHADER_PRESENT_LO): case GPU_CONTROL_REG(SHADER_PRESENT_HI): @@ -1592,27 +1712,27 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) case GPU_CONTROL_REG(L2_PRESENT_HI): case GPU_CONTROL_REG(STACK_PRESENT_LO): case GPU_CONTROL_REG(STACK_PRESENT_HI): - *value = get_implementation_register(addr); + *value = get_implementation_register(addr, dummy->control_reg_values); break; case GPU_CONTROL_REG(SHADER_READY_LO): *value = (dummy->power_on >> 0x02) & - get_implementation_register( - GPU_CONTROL_REG(SHADER_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(TILER_READY_LO): *value = (dummy->power_on >> 0x01) & - get_implementation_register( - GPU_CONTROL_REG(TILER_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(L2_READY_LO): *value = dummy->power_on & - get_implementation_register( - GPU_CONTROL_REG(L2_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(STACK_READY_LO): *value = dummy->stack_power_on_lo & - get_implementation_register( - GPU_CONTROL_REG(STACK_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(SHADER_READY_HI): @@ -1904,8 +2024,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, is_low_word); - } else if (addr == USER_REG(LATEST_FLUSH)) { - *value = 0; } #endif else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { @@ -1921,8 +2039,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) spin_unlock_irqrestore(&hw_error_status.access_lock, flags); CSTD_UNUSED(dummy); - - return 1; } static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, @@ -2098,3 +2214,16 @@ int gpu_model_control(void *model, return 0; } + +/** + * kbase_is_gpu_removed - Has the GPU been removed. + * @kbdev: Kbase device pointer + * + * This function would return true if the GPU has been removed. + * It is stubbed here + * Return: Always false + */ +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + return false; +} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h index 8eaf1b0..2a3351b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h @@ -21,11 +21,24 @@ /* * Dummy Model interface + * + * Support for NO_MALI dummy Model interface. + * + * +-----------------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------------+ + * | Model Linux Framework | + * +-----------------------------------+ + * | Model Dummy interface definitions | + * +-----------------+-----------------+ + * | Fake R/W | Fake IRQ | + * +-----------------+-----------------+ */ #ifndef _KBASE_MODEL_DUMMY_H_ #define _KBASE_MODEL_DUMMY_H_ +#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h> #include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h> #define model_error_log(module, ...) pr_err(__VA_ARGS__) @@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en { u32 shader; }; -void *midgard_model_create(const void *config); -void midgard_model_destroy(void *h); -u8 midgard_model_write_reg(void *h, u32 addr, u32 value); -u8 midgard_model_read_reg(void *h, u32 addr, - u32 * const value); void midgard_set_error(int job_slot); int job_atom_inject_error(struct kbase_error_params *params); int gpu_model_control(void *h, @@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt void gpu_model_glb_request_job_irq(void *model); #endif /* MALI_USE_CSF */ -enum gpu_dummy_irq { - GPU_DUMMY_JOB_IRQ, - GPU_DUMMY_GPU_IRQ, - GPU_DUMMY_MMU_IRQ -}; - -void gpu_device_raise_irq(void *model, - enum gpu_dummy_irq irq); -void gpu_device_set_data(void *model, void *data); -void *gpu_device_get_data(void *model); - extern struct error_status_t hw_error_status; #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c index 3440460..f310cc7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,21 @@ #include <mali_kbase.h> #include <linux/random.h> -#include "backend/gpu/mali_kbase_model_dummy.h" +#include "backend/gpu/mali_kbase_model_linux.h" + +static struct kbase_error_atom *error_track_list; + +#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM + +/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) +#define prandom_u32 get_random_u32 +#endif + +/*following error probability are set quite high in order to stress the driver*/ +static unsigned int error_probability = 50; /* to be set between 0 and 100 */ +/* probability to have multiple error give that there is an error */ +static unsigned int multiple_error_probability = 50; /* all the error conditions supported by the model */ #define TOTAL_FAULTS 27 @@ -30,16 +44,6 @@ /* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ #define MAX_CONCURRENT_FAULTS 3 -static struct kbase_error_atom *error_track_list; - -unsigned int rand_seed; - -/*following error probability are set quite high in order to stress the driver*/ -unsigned int error_probability = 50; /* to be set between 0 and 100 */ -/* probability to have multiple error give that there is an error */ -unsigned int multiple_error_probability = 50; - -#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM /** * gpu_generate_error - Generate GPU error */ diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c index 7887cb2..e90e4df 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -20,12 +20,12 @@ */ /* - * Model interface + * Model Linux Framework interfaces. */ #include <mali_kbase.h> #include <gpu/mali_kbase_gpu_regmap.h> -#include <backend/gpu/mali_kbase_model_dummy.h> + #include "backend/gpu/mali_kbase_model_linux.h" #include "device/mali_kbase_device.h" #include "mali_kbase_irq_internal.h" @@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work) kmem_cache_free(kbdev->irq_slab, data); } -void gpu_device_raise_irq(void *model, - enum gpu_dummy_irq irq) +void gpu_device_raise_irq(void *model, u32 irq) { struct model_irq_data *data; struct kbase_device *kbdev = gpu_device_get_data(model); @@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model, data->kbdev = kbdev; switch (irq) { - case GPU_DUMMY_JOB_IRQ: + case MODEL_LINUX_JOB_IRQ: INIT_WORK(&data->work, serve_job_irq); atomic_set(&kbdev->serving_job_irq, 1); break; - case GPU_DUMMY_GPU_IRQ: + case MODEL_LINUX_GPU_IRQ: INIT_WORK(&data->work, serve_gpu_irq); atomic_set(&kbdev->serving_gpu_irq, 1); break; - case GPU_DUMMY_MMU_IRQ: + case MODEL_LINUX_MMU_IRQ: INIT_WORK(&data->work, serve_mmu_irq); atomic_set(&kbdev->serving_mmu_irq, 1); break; @@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) return val; } - KBASE_EXPORT_TEST_API(kbase_reg_read); -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} - int kbase_install_interrupts(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); int kbase_gpu_device_create(struct kbase_device *kbdev) { - kbdev->model = midgard_model_create(NULL); + kbdev->model = midgard_model_create(kbdev); if (kbdev->model == NULL) return -ENOMEM; - gpu_device_set_data(kbdev->model, kbdev); - spin_lock_init(&kbdev->reg_op_lock); - dev_warn(kbdev->dev, "Using Dummy Model"); - return 0; } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h index dcb2e7c..4cf1235 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,13 +20,132 @@ */ /* - * Model interface + * Model Linux Framework interfaces. + * + * This framework is used to provide generic Kbase Models interfaces. + * Note: Backends cannot be used together; the selection is done at build time. + * + * - Without Model Linux Framework: + * +-----------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------+ + * | HW interface definitions | + * +-----------------------------+ + * + * - With Model Linux Framework: + * +-----------------------------+ + * | Kbase read/write/IRQ | + * +-----------------------------+ + * | Model Linux Framework | + * +-----------------------------+ + * | Model interface definitions | + * +-----------------------------+ */ #ifndef _KBASE_MODEL_LINUX_H_ #define _KBASE_MODEL_LINUX_H_ +/* + * Include Model definitions + */ + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* IS_ENABLED(CONFIG_MALI_NO_MALI) */ + +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) +/** + * kbase_gpu_device_create() - Generic create function. + * + * @kbdev: Kbase device. + * + * Specific model hook is implemented by midgard_model_create() + * + * Return: 0 on success, error code otherwise. + */ int kbase_gpu_device_create(struct kbase_device *kbdev); + +/** + * kbase_gpu_device_destroy() - Generic create function. + * + * @kbdev: Kbase device. + * + * Specific model hook is implemented by midgard_model_destroy() + */ void kbase_gpu_device_destroy(struct kbase_device *kbdev); -#endif /* _KBASE_MODEL_LINUX_H_ */ +/** + * midgard_model_create() - Private create function. + * + * @kbdev: Kbase device. + * + * This hook is specific to the model built in Kbase. + * + * Return: Model handle. + */ +void *midgard_model_create(struct kbase_device *kbdev); + +/** + * midgard_model_destroy() - Private destroy function. + * + * @h: Model handle. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_destroy(void *h); + +/** + * midgard_model_write_reg() - Private model write function. + * + * @h: Model handle. + * @addr: Address at which to write. + * @value: value to write. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_write_reg(void *h, u32 addr, u32 value); + +/** + * midgard_model_read_reg() - Private model read function. + * + * @h: Model handle. + * @addr: Address from which to read. + * @value: Pointer where to store the read value. + * + * This hook is specific to the model built in Kbase. + */ +void midgard_model_read_reg(void *h, u32 addr, u32 *const value); + +/** + * gpu_device_raise_irq() - Private IRQ raise function. + * + * @model: Model handle. + * @irq: IRQ type to raise. + * + * This hook is global to the model Linux framework. + */ +void gpu_device_raise_irq(void *model, u32 irq); + +/** + * gpu_device_set_data() - Private model set data function. + * + * @model: Model handle. + * @data: Data carried by model. + * + * This hook is global to the model Linux framework. + */ +void gpu_device_set_data(void *model, void *data); + +/** + * gpu_device_get_data() - Private model get data function. + * + * @model: Model handle. + * + * This hook is global to the model Linux framework. + * + * Return: Pointer to the data carried by model. + */ +void *gpu_device_get_data(void *model); +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ + +#endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index f496ed5..abbb9c8 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -36,7 +36,7 @@ #include <linux/pm_runtime.h> #include <mali_kbase_reset_gpu.h> #endif /* !MALI_USE_CSF */ -#include <mali_kbase_hwcnt_context.h> +#include <hwcnt/mali_kbase_hwcnt_context.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <backend/gpu/mali_kbase_devfreq.h> #include <mali_kbase_dummy_job_wa.h> @@ -712,7 +712,7 @@ void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) //callchains go through this function though holding that lock //so just print without locking. dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); - dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev)); + dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0)); #endif //Attempt another state machine transition prompt. dev_err(kbdev->dev, "Attempt to prompt state machine"); @@ -1030,7 +1030,7 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) if (!kbdev->arb.arb_if) return; - mutex_lock(&kbdev->pm.lock); + rt_mutex_lock(&kbdev->pm.lock); mutex_lock(&arb_vm_state->vm_state_lock); if (kbdev->pm.backend.gpu_powered && !kbase_pm_is_gpu_lost(kbdev)) { @@ -1070,7 +1070,7 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } mutex_unlock(&arb_vm_state->vm_state_lock); - mutex_unlock(&kbdev->pm.lock); + rt_mutex_unlock(&kbdev->pm.lock); } #endif /* CONFIG_MALI_ARBITER_SUPPORT */ @@ -1286,50 +1286,3 @@ out: return ret; } #endif - -#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS -void kbase_pm_turn_on_sc_power_rails_locked(struct kbase_device *kbdev) -{ - unsigned long flags; - - lockdep_assert_held(&kbdev->pm.lock); - WARN_ON(!kbdev->pm.backend.gpu_powered); - if (kbdev->pm.backend.sc_power_rails_off) { - if (kbdev->pm.backend.callback_power_on_sc_rails) { - kbdev->pm.backend.callback_power_on_sc_rails(kbdev); - KBASE_KTRACE_ADD(kbdev, PM_RAIL_ON, NULL, 0); - } - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.sc_power_rails_off = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } -} - -void kbase_pm_turn_on_sc_power_rails(struct kbase_device *kbdev) -{ - kbase_pm_lock(kbdev); - kbase_pm_turn_on_sc_power_rails_locked(kbdev); - kbase_pm_unlock(kbdev); -} - -void kbase_pm_turn_off_sc_power_rails(struct kbase_device *kbdev) -{ - unsigned long flags; - - kbase_pm_lock(kbdev); - WARN_ON(!kbdev->pm.backend.gpu_powered); - if (!kbdev->pm.backend.sc_power_rails_off) { - bool abort; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.sc_power_rails_off = true; - /* Work around for b/234962632 */ - abort = WARN_ON(!kbdev->pm.backend.sc_pwroff_safe); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.callback_power_off_sc_rails && !abort) { - kbdev->pm.backend.callback_power_off_sc_rails(kbdev); - KBASE_KTRACE_ADD(kbdev, PM_RAIL_OFF, NULL, 0); - } - } - kbase_pm_unlock(kbdev); -} -#endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index a4d7168..b02f77f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -26,9 +26,7 @@ #include <mali_kbase.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> -#if IS_ENABLED(CONFIG_MALI_NO_MALI) -#include <backend/gpu/mali_kbase_model_dummy.h> -#endif /* CONFIG_MALI_NO_MALI */ +#include <backend/gpu/mali_kbase_model_linux.h> #include <mali_kbase_dummy_job_wa.h> int kbase_pm_ca_init(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 83dd741..7f4f476 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -39,7 +39,7 @@ #include <mali_kbase_reset_gpu.h> #include <mali_kbase_ctx_sched.h> -#include <mali_kbase_hwcnt_context.h> +#include <hwcnt/mali_kbase_hwcnt_context.h> #include <mali_kbase_pbha.h> #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <device/mali_kbase_device.h> @@ -539,6 +539,14 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) return; +#if MALI_USE_CSF + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), + L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); + } +#endif /* MALI_USE_CSF */ + /* * Skip if size and hash are not given explicitly, * which means default values are used. @@ -600,6 +608,21 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) return strings[state]; } +static +void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) +{ +#if KBASE_KTRACE_ENABLE + switch (state) { +#define KBASEP_MCU_STATE(n) \ + case KBASE_MCU_ ## n: \ + KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \ + break; +#include "mali_kbase_pm_mcu_states.h" +#undef KBASEP_MCU_STATE + } +#endif +} + static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -794,6 +817,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) KBASE_MCU_HCTL_SHADERS_PEND_ON; } else backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; + } else if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; + } +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ } break; @@ -822,8 +856,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; } @@ -844,9 +877,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) backend->mcu_state = KBASE_MCU_HCTL_MCU_ON_RECHECK; } - } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) { + } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + else if (kbdev->csf.coresight.disable_on_pmode_enter) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); + backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE; + } else if (kbdev->csf.coresight.enable_on_pmode_exit) { + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE; } +#endif break; case KBASE_MCU_HCTL_MCU_ON_RECHECK: @@ -937,12 +980,46 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) #ifdef KBASE_PM_RUNTIME if (backend->gpu_sleep_mode_active) backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE; - else + else { #endif backend->mcu_state = KBASE_MCU_ON_HALT; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbase_debug_coresight_csf_state_request( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); + backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + } } break; +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { + backend->mcu_state = KBASE_MCU_ON; + kbdev->csf.coresight.disable_on_pmode_enter = false; + } + break; + case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { + backend->mcu_state = KBASE_MCU_ON; + kbdev->csf.coresight.enable_on_pmode_exit = false; + } + break; + case KBASE_MCU_CORESIGHT_DISABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) + backend->mcu_state = KBASE_MCU_ON_HALT; + break; + + case KBASE_MCU_CORESIGHT_ENABLE: + if (kbase_debug_coresight_csf_state_check( + kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + break; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + case KBASE_MCU_ON_HALT: if (!kbase_pm_is_mcu_desired(kbdev)) { kbase_csf_firmware_trigger_mcu_halt(kbdev); @@ -1035,6 +1112,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) /* Reset complete */ if (!backend->in_reset) backend->mcu_state = KBASE_MCU_OFF; + +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + kbdev->csf.coresight.disable_on_pmode_enter = false; + kbdev->csf.coresight.enable_on_pmode_exit = false; +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ break; default: @@ -1052,6 +1134,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n", kbase_mcu_state_to_string(prev_state), kbase_mcu_state_to_string(backend->mcu_state)); + kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state); } } while (backend->mcu_state != prev_state); @@ -1125,6 +1208,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) return strings[state]; } +static +void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state) +{ +#if KBASE_KTRACE_ENABLE + switch (state) { +#define KBASEP_L2_STATE(n) \ + case KBASE_L2_ ## n: \ + KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \ + break; +#include "mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE + } +#endif +} + #if !MALI_USE_CSF /* On powering on the L2, the tracked kctx becomes stale and can be cleared. * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER @@ -1195,13 +1293,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #if MALI_USE_CSF /* Due to the HW issue GPU2019-3878, need to prevent L2 power off * whilst MMU command is in progress. + * Also defer the power-down if MMU is in process of page migration. */ - return !kbdev->mmu_hw_operation_in_progress; + return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; #else - return true; + return !kbdev->mmu_page_migrate_in_progress; #endif } +static bool can_power_up_l2(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Avoiding l2 transition if MMU is undergoing page migration */ + return !kbdev->mmu_page_migrate_in_progress; +} + static bool need_tiler_control(struct kbase_device *kbdev) { #if MALI_USE_CSF @@ -1230,18 +1337,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - u64 tiler_trans = kbase_pm_get_trans_cores( - kbdev, KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_TILER); +#ifdef CONFIG_MALI_ARBITER_SUPPORT /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ - if (kbase_is_gpu_removed(kbdev) - || kbase_pm_is_gpu_lost(kbdev)) { + if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; @@ -1255,16 +1357,19 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) */ backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; + KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL, + backend->l2_state); kbase_pm_trigger_hwcnt_disable(kbdev); } if (backend->hwcnt_disabled) { backend->l2_state = KBASE_L2_OFF; + KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state); dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n"); } break; } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#endif /* mask off ready from trans in case transitions finished * between the register reads @@ -1275,7 +1380,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) switch (backend->l2_state) { case KBASE_L2_OFF: - if (kbase_pm_is_l2_desired(kbdev)) { + if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) { #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) // Workaround: give a short pause here before starting L2 transition. udelay(200); @@ -1323,14 +1428,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) l2_power_up_done = false; if (!l2_trans && l2_ready == l2_present) { if (need_tiler_control(kbdev)) { -#ifndef CONFIG_MALI_ARBITER_SUPPORT u64 tiler_trans = kbase_pm_get_trans_cores( kbdev, KBASE_PM_CORE_TILER); u64 tiler_ready = kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_TILER); -#endif - tiler_trans &= ~tiler_ready; + if (!tiler_trans && tiler_ready == tiler_present) { KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, @@ -1591,6 +1694,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) dev_warn(kbdev->dev, "transition to l2 off without waking waiter"); } #endif + kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state); } } while (backend->l2_state != prev_state); @@ -2282,6 +2386,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) backend->in_reset = true; backend->l2_state = KBASE_L2_RESET_WAIT; + KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state); #if !MALI_USE_CSF backend->shaders_state = KBASE_SHADERS_RESET_WAIT; #else @@ -2290,6 +2395,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) */ if (likely(kbdev->csf.firmware_inited)) { backend->mcu_state = KBASE_MCU_RESET_WAIT; + KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state); #ifdef KBASE_PM_RUNTIME backend->exit_gpu_sleep_mode = true; #endif @@ -2649,31 +2755,37 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); #if MALI_USE_CSF +/** + * update_user_reg_page_mapping - Update the mapping for USER Register page + * + * @kbdev: The kbase device structure for the device. + * + * This function must be called to unmap the dummy or real page from USER Register page + * mapping whenever GPU is powered up or down. The dummy or real page would get + * appropriately mapped in when Userspace reads the LATEST_FLUSH value. + */ static void update_user_reg_page_mapping(struct kbase_device *kbdev) { + struct kbase_context *kctx, *n; + lockdep_assert_held(&kbdev->pm.lock); mutex_lock(&kbdev->csf.reg_lock); - - /* Only if the mappings for USER page exist, update all PTEs associated to it */ - if (kbdev->csf.nr_user_page_mapped > 0) { - if (likely(kbdev->csf.mali_file_inode)) { - /* This would zap the pte corresponding to the mapping of User - * register page for all the Kbase contexts. - */ - unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, - BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1); - } else { - dev_err(kbdev->dev, - "Device file inode not exist even if USER page previously mapped"); - } + list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) { + /* This would zap the PTE corresponding to the mapping of User + * Register page of the kbase context. The mapping will be reestablished + * when the context (user process) needs to access to the page. + */ + unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping, + kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1); + list_del_init(&kctx->csf.user_reg.link); + dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid, + kctx->id); } - mutex_unlock(&kbdev->csf.reg_lock); } #endif - /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -2811,7 +2923,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->gpu_idled = false; } #endif - } KBASE_EXPORT_TEST_API(kbase_pm_clock_on); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index d959f45..9e29236 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd } #endif +/** + * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Check whether the L2 state is in power transition phase or not. If it is, the MMU + * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU + * page migration is intended, immediately start the MMU migration action without + * dropping the lock. When page migration begins, a flag is set in kbdev that would + * prevent the L2 state machine traversing into power transition phases, until + * the MMU migration action ends. + * + * Return: true if MMU page migration is allowed + */ +static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); +} + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h index 5e57c9d..3b448e3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,6 +66,13 @@ * is being put to sleep. * @ON_PEND_SLEEP: MCU sleep is in progress. * @IN_SLEEP: Sleep request is completed and MCU has halted. + * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to + * be requested, Coresight is being disabled. + * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened + * Coresight is being enabled. + * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled. + * @CORESIGHT_ENABLE: The MCU is on, host does not have control and + * Coresight is being enabled. */ KBASEP_MCU_STATE(OFF) KBASEP_MCU_STATE(PEND_ON_RELOAD) @@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND) KBASEP_MCU_STATE(ON_SLEEP_INITIATE) KBASEP_MCU_STATE(ON_PEND_SLEEP) KBASEP_MCU_STATE(IN_SLEEP) +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) +/* Additional MCU states for Coresight */ +KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE) +KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE) +KBASEP_MCU_STATE(CORESIGHT_DISABLE) +KBASEP_MCU_STATE(CORESIGHT_ENABLE) +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index 2b3e4e4..5d98bd7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -38,11 +38,13 @@ #include <backend/gpu/mali_kbase_pm_defs.h> #include <mali_linux_trace.h> +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) || !MALI_USE_CSF /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly * under 11s. Exceeding this will cause overflow */ #define KBASE_PM_TIME_SHIFT 8 +#endif #if MALI_USE_CSF /* To get the GPU_ACTIVE value in nano seconds unit */ @@ -480,7 +482,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev) */ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) { - int js; + unsigned int js; lockdep_assert_held(&kbdev->pm.backend.metrics.lock); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 5110e3d..7a4d662 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #include <mali_kbase.h> #include <mali_kbase_hwaccess_time.h> #if MALI_USE_CSF +#include <asm/arch_timer.h> +#include <linux/gcd.h> #include <csf/mali_kbase_csf_timeout.h> #endif #include <device/mali_kbase_device.h> @@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, /* Only for debug messages, safe default in case it's mis-maintained */ const char *selector_str = "(unknown)"; - if (WARN(!kbdev->lowest_gpu_freq_khz, - "Lowest frequency uninitialized! Using reference frequency for scaling")) { + if (!kbdev->lowest_gpu_freq_khz) { + dev_dbg(kbdev->dev, + "Lowest frequency uninitialized! Using reference frequency for scaling"); freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; } else { freq_khz = kbdev->lowest_gpu_freq_khz; } switch (selector) { + case MMU_AS_INACTIVE_WAIT_TIMEOUT: + selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; + nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; + break; case KBASE_TIMEOUT_SELECTOR_COUNT: default: #if !MALI_USE_CSF WARN(1, "Invalid timeout selector used! Using default value"); nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; break; + case JM_DEFAULT_JS_FREE_TIMEOUT: + selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; + nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; + break; #else /* Use Firmware timeout if invalid selection */ WARN(1, @@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) return lo | (((u64) hi1) << 32); } + +#if MALI_USE_CSF +u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts) +{ + if (WARN_ON(!kbdev)) + return 0; + + return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) + + kbdev->backend_time.offset; +} + +/** + * get_cpu_gpu_time() - Get current CPU and GPU timestamps. + * + * @kbdev: Kbase device. + * @cpu_ts: Output CPU timestamp. + * @gpu_ts: Output GPU timestamp. + * @gpu_cycle: Output GPU cycle counts. + */ +static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle) +{ + struct timespec64 ts; + + kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); + + if (cpu_ts) + *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} +#endif + +int kbase_backend_time_init(struct kbase_device *kbdev) +{ +#if MALI_USE_CSF + u64 cpu_ts = 0; + u64 gpu_ts = 0; + u64 freq; + u64 common_factor; + + get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); + freq = arch_timer_get_cntfrq(); + + if (!freq) { + dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); + return -EINVAL; + } + + common_factor = gcd(NSEC_PER_SEC, freq); + + kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor); + kbdev->backend_time.divisor = div64_u64(freq, common_factor); + + if (!kbdev->backend_time.divisor) { + dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); + return -EINVAL; + } + + kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, + kbdev->backend_time.divisor); +#endif + + return 0; +} |