summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorToby Sunrise <tobyrs@google.com>2023-05-01 13:23:54 +0000
committerToby Sunrise <tobyrs@google.com>2023-05-01 13:33:11 +0000
commitf7a77046d77266482dedf54d134102e6031a7438 (patch)
tree4d6813894d79edb7ad605005087b0bce11055c4c /mali_kbase
parent25e383ffa36a9916065804029fbe3552c71329fe (diff)
downloadgpu-f7a77046d77266482dedf54d134102e6031a7438.tar.gz
Mali Valhall Android DDK r42p0-01eac0 KMD
Provenance: 300534375857cb2963042df7b788b1ab5616c500 (ipdelivery/EAC/v_r42p0) VX504X08X-BU-00000-r42p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r42p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r42p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r42p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I3b15e01574f03706574a8edaf50dae4ba16e30c0
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/Kbuild2
-rw-r--r--mali_kbase/Kconfig40
-rw-r--r--mali_kbase/Makefile38
-rw-r--r--mali_kbase/Mconfig41
-rw-r--r--mali_kbase/backend/gpu/Kbuild8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_irq_linux.c10
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_as.c8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c46
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h41
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c113
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.h14
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c58
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.h29
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_error_generator.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.c33
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.h125
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c67
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h16
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_metrics.c2
-rw-r--r--mali_kbase/build.bp5
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_csf.c2
-rw-r--r--mali_kbase/csf/Kbuild12
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c424
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h5
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.c24
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.h9
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h73
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c129
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h44
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c6
-rw-r--r--mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c73
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c236
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c815
-rw-r--r--mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h139
-rw-r--r--mali_kbase/csf/mali_kbase_csf_registers.h26
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c156
-rw-r--r--mali_kbase/csf/mali_kbase_csf_sync_debugfs.c788
-rw-r--r--mali_kbase/csf/mali_kbase_csf_sync_debugfs.h37
-rw-r--r--mali_kbase/debug/Kbuild3
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c851
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h182
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c24
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c7
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_jm.c4
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c20
-rw-r--r--mali_kbase/device/mali_kbase_device_internal.h12
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c8
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c5
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c5
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.c30
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_simple.c6
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h2
-rw-r--r--mali_kbase/jm/mali_kbase_jm_js.h23
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h10
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h135
-rw-r--r--mali_kbase/mali_kbase.h30
-rw-r--r--mali_kbase/mali_kbase_as_fault_debugfs.c10
-rw-r--r--mali_kbase/mali_kbase_core_linux.c18
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c27
-rw-r--r--mali_kbase/mali_kbase_defs.h4
-rw-r--r--mali_kbase/mali_kbase_fence_ops.c6
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c53
-rw-r--r--mali_kbase/mali_kbase_hw.c11
-rw-r--r--mali_kbase/mali_kbase_hwaccess_jm.h17
-rw-r--r--mali_kbase/mali_kbase_jd.c21
-rw-r--r--mali_kbase/mali_kbase_jm.c18
-rw-r--r--mali_kbase/mali_kbase_js.c205
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.c343
-rw-r--r--mali_kbase/mali_kbase_linux.h4
-rw-r--r--mali_kbase/mali_kbase_mem.c146
-rw-r--r--mali_kbase/mali_kbase_mem.h2
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c71
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.c16
-rw-r--r--mali_kbase/mali_kbase_mem_pool.c24
-rw-r--r--mali_kbase/mali_kbase_softjobs.c41
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c2
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c6
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c152
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h62
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw.h20
-rw-r--r--mali_kbase/tl/mali_kbase_tlstream.h12
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c32
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h37
86 files changed, 4923 insertions, 1503 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index fc08158..32b4d37 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -69,7 +69,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r41p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r42p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 701b68f..1c5e1f8 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -43,9 +43,30 @@ config MALI_PLATFORM_NAME
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
+choice
+ prompt "Mali HW backend"
+ depends on MALI_MIDGARD
+ default MALI_REAL_HW
+
config MALI_REAL_HW
+ bool "Enable build of Mali kernel driver for real HW"
depends on MALI_MIDGARD
- def_bool !MALI_NO_MALI
+ help
+ This is the default HW backend.
+
+config MALI_NO_MALI
+ bool "Enable build of Mali kernel driver for No Mali"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ help
+ This can be used to test the driver in a simulated environment
+ whereby the hardware is not physically present. If the hardware is physically
+ present it will not be used. This can be used to test the majority of the
+ driver without needing actual hardware or for software benchmarking.
+ All calls to the simulated hardware will complete immediately as if the hardware
+ completed the task.
+
+
+endchoice
menu "Platform specific options"
source "drivers/gpu/arm/midgard/platform/Kconfig"
@@ -145,6 +166,11 @@ config MALI_DMA_BUF_LEGACY_COMPAT
flushes in other drivers. This only has an effect for clients using
UK 11.18 or older. For later UK versions it is not possible.
+config MALI_CORESIGHT
+ depends on MALI_MIDGARD && MALI_CSF_SUPPORT && !MALI_NO_MALI
+ bool "Enable Kbase CoreSight tracing support"
+ default n
+
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
@@ -192,18 +218,6 @@ config MALI_CORESTACK
comment "Platform options"
depends on MALI_MIDGARD && MALI_EXPERT
-config MALI_NO_MALI
- bool "Enable No Mali"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- This can be used to test the driver in a simulated environment
- whereby the hardware is not physically present. If the hardware is physically
- present it will not be used. This can be used to test the majority of the
- driver without needing actual hardware or for software benchmarking.
- All calls to the simulated hardware will complete immediately as if the hardware
- completed the task.
-
config MALI_ERROR_INJECT
bool "Enable No Mali error injection"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index e135d86..5d88b14 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -39,17 +39,10 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_ARBITRATION ?= n
CONFIG_MALI_PARTITION_MANAGER ?= n
- ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined)
- CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION)
- endif
-
- ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined)
- CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION)
- endif
-
ifneq ($(CONFIG_MALI_NO_MALI),y)
# Prevent misuse when CONFIG_MALI_NO_MALI=y
CONFIG_MALI_REAL_HW ?= y
+ CONFIG_MALI_CORESIGHT = n
endif
ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
@@ -64,14 +57,9 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
endif
- ifeq ($(CONFIG_XEN),y)
- ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n)
- CONFIG_MALI_XEN ?= m
- endif
- endif
-
ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
CONFIG_MALI_FW_CORE_DUMP ?= y
+ CONFIG_MALI_CORESIGHT ?= n
else
CONFIG_MALI_FW_CORE_DUMP ?= n
endif
@@ -82,12 +70,14 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
ifeq ($(CONFIG_MALI_EXPERT), y)
ifeq ($(CONFIG_MALI_NO_MALI), y)
CONFIG_MALI_REAL_HW = n
+
else
# Prevent misuse when CONFIG_MALI_NO_MALI=n
CONFIG_MALI_REAL_HW = y
CONFIG_MALI_ERROR_INJECT = n
endif
+
ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
# Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
@@ -149,8 +139,6 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_MIDGARD=n
CONFIG_MALI_ARBITRATION = n
- CONFIG_MALI_ARBITER_MODULES = n
- CONFIG_MALI_GPU_POWER_MODULES = n
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
@@ -165,8 +153,6 @@ CONFIGS := \
CONFIG_MALI_GATOR_SUPPORT \
CONFIG_MALI_ARBITER_SUPPORT \
CONFIG_MALI_ARBITRATION \
- CONFIG_MALI_ARBITER_MODULES \
- CONFIG_MALI_GPU_POWER_MODULES \
CONFIG_MALI_PARTITION_MANAGER \
CONFIG_MALI_REAL_HW \
CONFIG_MALI_GEM5_BUILD \
@@ -197,10 +183,13 @@ CONFIGS := \
CONFIG_MALI_KUTF_CLK_RATE_TRACE \
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
CONFIG_MALI_XEN \
- CONFIG_MALI_FW_CORE_DUMP
+ CONFIG_MALI_FW_CORE_DUMP \
+ CONFIG_MALI_CORESIGHT
-#
+THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+-include $(THIS_DIR)/../arbitration/Makefile
+
# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build
#
# Generate the list of CONFIGs and values.
@@ -262,7 +251,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization
# global variables.
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
KBUILD_CFLAGS += -Wmissing-field-initializers
-KBUILD_CFLAGS += -Wtype-limits
+# -Wtype-limits must be disabled due to build failures on kernel 5.x
+KBUILD_CFLAGS += -Wno-type-limit
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
@@ -271,6 +261,12 @@ KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
# This warning is disabled to avoid build failures in some kernel versions
KBUILD_CFLAGS += -Wno-ignored-qualifiers
+ifeq ($(CONFIG_GCOV_KERNEL),y)
+ KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
+ KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+ EXTRA_CFLAGS += -DGCOV_PROFILE=1
+endif
+
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index d294543..d5b3067 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -41,11 +41,31 @@ config MALI_PLATFORM_NAME
When PLATFORM_CUSTOM is set, this needs to be set manually to
pick up the desired platform files.
+choice
+ prompt "Mali HW backend"
+ depends on MALI_MIDGARD
+ default MALI_NO_MALI if NO_MALI
+ default MALI_REAL_HW
+
config MALI_REAL_HW
- bool
+ bool "Enable build of Mali kernel driver for real HW"
depends on MALI_MIDGARD
- default y
- default n if NO_MALI
+ help
+ This is the default HW backend.
+
+config MALI_NO_MALI
+ bool "Enable build of Mali kernel driver for No Mali"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ help
+ This can be used to test the driver in a simulated environment
+ whereby the hardware is not physically present. If the hardware is physically
+ present it will not be used. This can be used to test the majority of the
+ driver without needing actual hardware or for software benchmarking.
+ All calls to the simulated hardware will complete immediately as if the hardware
+ completed the task.
+
+
+endchoice
config MALI_PLATFORM_DT_PIN_RST
bool "Enable Juno GPU Pin reset"
@@ -154,6 +174,12 @@ config MALI_DMA_BUF_LEGACY_COMPAT
flushes in other drivers. This only has an effect for clients using
UK 11.18 or older. For later UK versions it is not possible.
+config MALI_CORESIGHT
+ depends on MALI_MIDGARD && MALI_CSF_SUPPORT && !NO_MALI
+ select CSFFW_DEBUG_FW_AS_RW
+ bool "Enable Kbase CoreSight tracing support"
+ default n
+
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
@@ -240,6 +266,15 @@ config MALI_DEBUG
help
Select this option for increased checking and reporting of errors.
+config MALI_GCOV_KERNEL
+ bool "Enable branch coverage via gcov"
+ depends on MALI_MIDGARD && MALI_DEBUG
+ default n
+ help
+ Choose this option to enable building kbase with branch
+ coverage information. When built against a supporting kernel,
+ the coverage information will be available via debugfs.
+
config MALI_FENCE_DEBUG
bool "Enable debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index ad712af..f821a6f 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -47,8 +47,12 @@ endif
mali_kbase-$(CONFIG_MALI_DEVFREQ) += \
backend/gpu/mali_kbase_devfreq.o
-# Dummy model
+ifneq ($(CONFIG_MALI_REAL_HW),y)
+ mali_kbase-y += backend/gpu/mali_kbase_model_linux.o
+endif
+
+# NO_MALI Dummy model interface
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
-mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
# HW error simulation
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
index 00e050a..ef09c6b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
@@ -25,12 +25,12 @@
#include <linux/interrupt.h>
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
/* GPU IRQ Tags */
-#define JOB_IRQ_TAG 0
-#define MMU_IRQ_TAG 1
-#define GPU_IRQ_TAG 2
+#define JOB_IRQ_TAG 0
+#define MMU_IRQ_TAG 1
+#define GPU_IRQ_TAG 2
static void *kbase_tag(void *ptr, u32 tag)
{
@@ -500,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_as.c b/mali_kbase/backend/gpu/mali_kbase_jm_as.c
index 888aa59..258dc6d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_as.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_as.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
kbase_js_runpool_inc_context_count(kbdev, kctx);
}
-bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
int i;
@@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
return true;
}
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 0a01288..9a17494 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
const u64 affinity, const u64 limited_core_mask);
-static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
- base_jd_core_req core_req,
- int js, const u64 limited_core_mask)
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req,
+ unsigned int js, const u64 limited_core_mask)
{
u64 affinity;
bool skip_affinity_check = false;
@@ -191,7 +190,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
{
struct kbase_context *kctx;
u32 cfg;
@@ -344,10 +343,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
* work out the best estimate (which might still result in an over-estimate to
* the calculated time spent)
*/
-static void kbasep_job_slot_update_head_start_timestamp(
- struct kbase_device *kbdev,
- int js,
- ktime_t end_timestamp)
+static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js,
+ ktime_t end_timestamp)
{
ktime_t timestamp_diff;
struct kbase_jd_atom *katom;
@@ -377,8 +374,7 @@ static void kbasep_job_slot_update_head_start_timestamp(
* Make a tracepoint call to the instrumentation module informing that
* softstop happened on given lpu (job slot).
*/
-static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
- int js)
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js)
{
KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
kbdev,
@@ -387,7 +383,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
void kbase_job_done(struct kbase_device *kbdev, u32 done)
{
- int i;
u32 count = 0;
ktime_t end_timestamp;
@@ -398,6 +393,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
end_timestamp = ktime_get_raw();
while (done) {
+ unsigned int i;
u32 failed = done >> 16;
/* treat failed slots as finished slots */
@@ -407,8 +403,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
* numbered interrupts before the higher numbered ones.
*/
i = ffs(finished) - 1;
- if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
- break;
do {
int nr_done;
@@ -607,11 +601,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
}
-void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
- int js,
- u32 action,
- base_jd_core_req core_reqs,
- struct kbase_jd_atom *target_katom)
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
+ u32 action, base_jd_core_req core_reqs,
+ struct kbase_jd_atom *target_katom)
{
#if KBASE_KTRACE_ENABLE
u32 status_reg_before;
@@ -669,6 +661,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
struct kbase_context *head_kctx;
head = kbase_gpu_inspect(kbdev, js, 0);
+ if (unlikely(!head)) {
+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+ return;
+ }
head_kctx = head->kctx;
if (status_reg_before == BASE_JD_EVENT_ACTIVE)
@@ -737,7 +733,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
- int i;
+ unsigned int i;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -749,7 +745,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev;
- int target_js = target_katom->slot_nr;
+ unsigned int target_js = target_katom->slot_nr;
int i;
bool stop_sent = false;
@@ -927,8 +923,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
*
* Where possible any job in the next register is evicted before the soft-stop.
*/
-void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
- struct kbase_jd_atom *target_katom, u32 sw_flags)
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_jd_atom *target_katom, u32 sw_flags)
{
dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
@@ -948,8 +944,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
}
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
- struct kbase_jd_atom *target_katom)
+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
+ struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev = kctx->kbdev;
bool stopped;
@@ -1255,7 +1251,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
{
- int i;
+ unsigned int i;
int pending_jobs = 0;
/* Count the number of jobs */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index 1ebb843..e4cff1f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -34,21 +34,6 @@
#include <device/mali_kbase_device.h>
/**
- * kbase_job_submit_nolock() - Submit a job to a certain job-slot
- * @kbdev: Device pointer
- * @katom: Atom to submit
- * @js: Job slot to submit on
- *
- * The caller must check kbasep_jm_is_submit_slots_free() != false before
- * calling this.
- *
- * The following locking conditions are made on the caller:
- * - it must hold the hwaccess_lock
- */
-void kbase_job_submit_nolock(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom, int js);
-
-/**
* kbase_job_done_slot() - Complete the head job on a particular job-slot
* @kbdev: Device pointer
* @s: Job slot
@@ -60,17 +45,16 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp);
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
-static inline char *kbasep_make_job_slot_string(int js, char *js_string,
- size_t js_size)
+static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
{
- snprintf(js_string, js_size, "job_slot_%i", js);
+ snprintf(js_string, js_size, "job_slot_%u", js);
return js_string;
}
#endif
#if !MALI_USE_CSF
-static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
- struct kbase_context *kctx)
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_context *kctx)
{
return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
}
@@ -90,7 +74,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
*
* Return: 0 if the job was successfully submitted to hardware, an error otherwise.
*/
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js);
#if !MALI_USE_CSF
/**
@@ -106,11 +90,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
-void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
- int js,
- u32 action,
- base_jd_core_req core_reqs,
- struct kbase_jd_atom *target_katom);
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
+ u32 action, base_jd_core_req core_reqs,
+ struct kbase_jd_atom *target_katom);
#endif /* !MALI_USE_CSF */
/**
@@ -134,11 +116,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
*
* Return: true if an atom was stopped, false otherwise
*/
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js,
- struct kbase_jd_atom *katom,
- u32 action);
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js, struct kbase_jd_atom *katom, u32 action);
/**
* kbase_job_slot_init - Initialise job slot framework
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index e5af4ca..388b37f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
*
* Return: Atom removed from ringbuffer
*/
-static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
- int js,
- ktime_t *end_timestamp)
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js,
+ ktime_t *end_timestamp)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
struct kbase_jd_atom *katom;
@@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
return katom;
}
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
- int idx)
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
@@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
}
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
- int js)
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
@@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
{
- int js;
- int i;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ int i;
+
for (i = 0; i < SLOT_RB_SIZE; i++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
return false;
}
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js)
{
int nr = 0;
int i;
@@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
return nr;
}
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js)
{
int nr = 0;
int i;
@@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
return nr;
}
-static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
- enum kbase_atom_gpu_rb_state min_rb_state)
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js,
+ enum kbase_atom_gpu_rb_state min_rb_state)
{
int nr = 0;
int i;
@@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
bool secure)
{
- int js, i;
+ unsigned int js;
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ int i;
+
for (i = 0; i < SLOT_RB_SIZE; i++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
js, i);
@@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
return false;
}
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -430,9 +430,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
*
* Return: true if any slots other than @js are busy, false otherwise
*/
-static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js)
{
- int slot;
+ unsigned int slot;
for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
if (slot == js)
@@ -844,7 +844,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
void kbase_backend_slot_update(struct kbase_device *kbdev)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1013,25 +1013,25 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
kbase_pm_request_gpu_cycle_counter_l2_is_on(
kbdev);
- if (!kbase_job_hw_submit(kbdev, katom[idx], js))
+ if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
+
+ /* Inform power management at start/finish of
+ * atom so it can update its GPU utilisation
+ * metrics.
+ */
+ kbase_pm_metrics_update(kbdev,
+ &katom[idx]->start_timestamp);
+
+ /* Inform platform at start/finish of atom */
+ kbasep_platform_event_atom_submit(katom[idx]);
+ }
else
break;
/* ***TRANSITION TO HIGHER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_SUBMITTED:
-
- /* Inform power management at start/finish of
- * atom so it can update its GPU utilisation
- * metrics.
- */
- kbase_pm_metrics_update(kbdev,
- &katom[idx]->start_timestamp);
-
- /* Inform platform at start/finish of atom */
- kbasep_platform_event_atom_submit(katom[idx]);
-
break;
case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
@@ -1111,8 +1111,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
*
* Return: true if an atom was evicted, false otherwise.
*/
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
- u32 completion_code)
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code)
{
struct kbase_jd_atom *katom;
struct kbase_jd_atom *next_katom;
@@ -1120,6 +1119,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
lockdep_assert_held(&kbdev->hwaccess_lock);
katom = kbase_gpu_inspect(kbdev, js, 0);
+ if (!katom) {
+ dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js);
+ return false;
+ }
next_katom = kbase_gpu_inspect(kbdev, js, 1);
if (next_katom &&
@@ -1184,13 +1187,18 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
* on the HW and returned to the JS.
*/
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
- u32 completion_code,
- u64 job_tail,
- ktime_t *end_timestamp)
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+ u64 job_tail, ktime_t *end_timestamp)
{
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
- struct kbase_context *kctx = katom->kctx;
+ struct kbase_context *kctx = NULL;
+
+ if (unlikely(!katom)) {
+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+ return;
+ }
+
+ kctx = katom->kctx;
dev_dbg(kbdev->dev,
"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
@@ -1243,7 +1251,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
}
} else if (completion_code != BASE_JD_EVENT_DONE) {
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
- int i;
+ unsigned int i;
if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
@@ -1388,7 +1396,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1416,7 +1424,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
kbase_gpu_in_protected_mode(kbdev));
WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
kbase_jd_katom_is_protected(katom),
- "Protected atom on JS%d not supported", js);
+ "Protected atom on JS%u not supported", js);
}
if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
!kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1512,10 +1520,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev,
return ret;
}
-static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
- int js,
- struct kbase_jd_atom *katom,
- u32 action)
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_jd_atom *katom, u32 action)
{
struct kbase_context *kctx = katom->kctx;
u32 hw_action = action & JS_COMMAND_MASK;
@@ -1559,11 +1565,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
return -1;
}
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js,
- struct kbase_jd_atom *katom,
- u32 action)
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js, struct kbase_jd_atom *katom, u32 action)
{
struct kbase_jd_atom *katom_idx0;
struct kbase_context *kctx_idx0 = NULL;
@@ -1816,7 +1819,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
void kbase_gpu_dump_slots(struct kbase_device *kbdev)
{
unsigned long flags;
- int js;
+ unsigned int js;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1831,12 +1834,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
idx);
if (katom)
- dev_info(kbdev->dev,
- " js%d idx%d : katom=%pK gpu_rb_state=%d\n",
- js, idx, katom, katom->gpu_rb_state);
+ dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n",
+ js, idx, katom, katom->gpu_rb_state);
else
- dev_info(kbdev->dev, " js%d idx%d : empty\n",
- js, idx);
+ dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx);
}
}
@@ -1845,7 +1846,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
{
- int js;
+ unsigned int js;
bool tracked = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h
index d3ff203..32be0bf 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,8 +40,7 @@
*
* Return: true if job evicted from NEXT registers, false otherwise
*/
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
- u32 completion_code);
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code);
/**
* kbase_gpu_complete_hw - Complete an atom on job slot js
@@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
* completed
* @end_timestamp: Time of completion
*/
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
- u32 completion_code,
- u64 job_tail,
- ktime_t *end_timestamp);
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+ u64 job_tail, ktime_t *end_timestamp);
/**
* kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
@@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
* Return: The atom at that position in the ringbuffer
* or NULL if no atom present
*/
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
- int idx);
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx);
/**
* kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index 02d7cdb..0ed04bb 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
struct kbase_backend_data *backend;
- int s;
+ unsigned int s;
bool reset_needed = false;
KBASE_DEBUG_ASSERT(timer != NULL);
@@ -365,4 +365,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
backend->timeouts_updated = true;
}
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 19c5021..9d5f15e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -62,8 +62,9 @@
* document
*/
#include <mali_kbase.h>
+#include <device/mali_kbase_device.h>
#include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_mem_linux.h>
#if MALI_USE_CSF
@@ -338,21 +339,6 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
- .name = "tDUx",
- .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
- .as_present = 0xFF,
- .thread_max_threads = 0x180,
- .thread_max_workgroup_size = 0x180,
- .thread_max_barrier_size = 0x180,
- .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
- .tiler_features = 0x809,
- .mmu_features = 0x2830,
- .gpu_features_lo = 0,
- .gpu_features_hi = 0,
- .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
- .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
- },
- {
.name = "tODx",
.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
.as_present = 0xFF,
@@ -737,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model)
spin_lock_irqsave(&hw_error_status.access_lock, flags);
hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
- gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
+ gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ);
}
#endif /* !MALI_USE_CSF */
@@ -769,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy)
performance_counters.time = 0;
}
-static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
+static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot)
{
lockdep_assert_held(&hw_error_status.access_lock);
@@ -1102,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
return ret;
}
-void *midgard_model_create(const void *config)
+void *midgard_model_create(struct kbase_device *kbdev)
{
struct dummy_model_t *dummy = NULL;
@@ -1119,7 +1105,12 @@ void *midgard_model_create(const void *config)
GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
performance_counters.shader_present = get_implementation_register(
GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
+
+ gpu_device_set_data(dummy, kbdev);
+
+ dev_info(kbdev->dev, "Using Dummy Model");
}
+
return dummy;
}
@@ -1135,7 +1126,7 @@ static void midgard_model_get_outputs(void *h)
lockdep_assert_held(&hw_error_status.access_lock);
if (hw_error_status.job_irq_status)
- gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ);
if ((dummy->power_changed && dummy->power_changed_mask) ||
(dummy->reset_completed & dummy->reset_completed_mask) ||
@@ -1146,10 +1137,10 @@ static void midgard_model_get_outputs(void *h)
(dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
#endif
(dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
- gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ);
if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
- gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ);
}
static void midgard_model_update(void *h)
@@ -1216,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
}
}
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
+void midgard_model_write_reg(void *h, u32 addr, u32 value)
{
unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -1226,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
#if !MALI_USE_CSF
if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
- int slot_idx = (addr >> 7) & 0xf;
+ unsigned int slot_idx = (addr >> 7) & 0xf;
KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
@@ -1608,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
-
- return 1;
}
-u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
{
unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -2052,8 +2041,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
CSTD_UNUSED(dummy);
-
- return 1;
}
static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
@@ -2229,3 +2216,16 @@ int gpu_model_control(void *model,
return 0;
}
+
+/**
+ * kbase_is_gpu_removed - Has the GPU been removed.
+ * @kbdev: Kbase device pointer
+ *
+ * This function would return true if the GPU has been removed.
+ * It is stubbed here
+ * Return: Always false
+ */
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+ return false;
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
index 8eaf1b0..2a3351b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
@@ -21,11 +21,24 @@
/*
* Dummy Model interface
+ *
+ * Support for NO_MALI dummy Model interface.
+ *
+ * +-----------------------------------+
+ * | Kbase read/write/IRQ |
+ * +-----------------------------------+
+ * | Model Linux Framework |
+ * +-----------------------------------+
+ * | Model Dummy interface definitions |
+ * +-----------------+-----------------+
+ * | Fake R/W | Fake IRQ |
+ * +-----------------+-----------------+
*/
#ifndef _KBASE_MODEL_DUMMY_H_
#define _KBASE_MODEL_DUMMY_H_
+#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h>
#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h>
#define model_error_log(module, ...) pr_err(__VA_ARGS__)
@@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en {
u32 shader;
};
-void *midgard_model_create(const void *config);
-void midgard_model_destroy(void *h);
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
-u8 midgard_model_read_reg(void *h, u32 addr,
- u32 * const value);
void midgard_set_error(int job_slot);
int job_atom_inject_error(struct kbase_error_params *params);
int gpu_model_control(void *h,
@@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt
void gpu_model_glb_request_job_irq(void *model);
#endif /* MALI_USE_CSF */
-enum gpu_dummy_irq {
- GPU_DUMMY_JOB_IRQ,
- GPU_DUMMY_GPU_IRQ,
- GPU_DUMMY_MMU_IRQ
-};
-
-void gpu_device_raise_irq(void *model,
- enum gpu_dummy_irq irq);
-void gpu_device_set_data(void *model, void *data);
-void *gpu_device_get_data(void *model);
-
extern struct error_status_t hw_error_status;
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
index 972d1c8..75b1e7e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
@@ -21,7 +21,7 @@
#include <mali_kbase.h>
#include <linux/random.h>
-#include "backend/gpu/mali_kbase_model_dummy.h"
+#include "backend/gpu/mali_kbase_model_linux.h"
static struct kbase_error_atom *error_track_list;
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index 7887cb2..b37680d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -20,12 +20,12 @@
*/
/*
- * Model interface
+ * Model Linux Framework interfaces.
*/
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+
#include "backend/gpu/mali_kbase_model_linux.h"
#include "device/mali_kbase_device.h"
#include "mali_kbase_irq_internal.h"
@@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
kmem_cache_free(kbdev->irq_slab, data);
}
-void gpu_device_raise_irq(void *model,
- enum gpu_dummy_irq irq)
+void gpu_device_raise_irq(void *model, enum model_linux_irqs irq)
{
struct model_irq_data *data;
struct kbase_device *kbdev = gpu_device_get_data(model);
@@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model,
data->kbdev = kbdev;
switch (irq) {
- case GPU_DUMMY_JOB_IRQ:
+ case MODEL_LINUX_JOB_IRQ:
INIT_WORK(&data->work, serve_job_irq);
atomic_set(&kbdev->serving_job_irq, 1);
break;
- case GPU_DUMMY_GPU_IRQ:
+ case MODEL_LINUX_GPU_IRQ:
INIT_WORK(&data->work, serve_gpu_irq);
atomic_set(&kbdev->serving_gpu_irq, 1);
break;
- case GPU_DUMMY_MMU_IRQ:
+ case MODEL_LINUX_MMU_IRQ:
INIT_WORK(&data->work, serve_mmu_irq);
atomic_set(&kbdev->serving_mmu_irq, 1);
break;
@@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
-
KBASE_EXPORT_TEST_API(kbase_reg_read);
-/**
- * kbase_is_gpu_removed - Has the GPU been removed.
- * @kbdev: Kbase device pointer
- *
- * This function would return true if the GPU has been removed.
- * It is stubbed here
- * Return: Always false
- */
-bool kbase_is_gpu_removed(struct kbase_device *kbdev)
-{
- return false;
-}
-
int kbase_install_interrupts(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
@@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
int kbase_gpu_device_create(struct kbase_device *kbdev)
{
- kbdev->model = midgard_model_create(NULL);
+ kbdev->model = midgard_model_create(kbdev);
if (kbdev->model == NULL)
return -ENOMEM;
- gpu_device_set_data(kbdev->model, kbdev);
-
spin_lock_init(&kbdev->reg_op_lock);
- dev_warn(kbdev->dev, "Using Dummy Model");
-
return 0;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
index dcb2e7c..a24db17 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,13 +20,132 @@
*/
/*
- * Model interface
+ * Model Linux Framework interfaces.
+ *
+ * This framework is used to provide generic Kbase Models interfaces.
+ * Note: Backends cannot be used together; the selection is done at build time.
+ *
+ * - Without Model Linux Framework:
+ * +-----------------------------+
+ * | Kbase read/write/IRQ |
+ * +-----------------------------+
+ * | HW interface definitions |
+ * +-----------------------------+
+ *
+ * - With Model Linux Framework:
+ * +-----------------------------+
+ * | Kbase read/write/IRQ |
+ * +-----------------------------+
+ * | Model Linux Framework |
+ * +-----------------------------+
+ * | Model interface definitions |
+ * +-----------------------------+
*/
#ifndef _KBASE_MODEL_LINUX_H_
#define _KBASE_MODEL_LINUX_H_
+/*
+ * Include Model definitions
+ */
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* IS_ENABLED(CONFIG_MALI_NO_MALI) */
+
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+/**
+ * kbase_gpu_device_create() - Generic create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * Specific model hook is implemented by midgard_model_create()
+ *
+ * Return: 0 on success, error code otherwise.
+ */
int kbase_gpu_device_create(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_device_destroy() - Generic create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * Specific model hook is implemented by midgard_model_destroy()
+ */
void kbase_gpu_device_destroy(struct kbase_device *kbdev);
-#endif /* _KBASE_MODEL_LINUX_H_ */
+/**
+ * midgard_model_create() - Private create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * This hook is specific to the model built in Kbase.
+ *
+ * Return: Model handle.
+ */
+void *midgard_model_create(struct kbase_device *kbdev);
+
+/**
+ * midgard_model_destroy() - Private destroy function.
+ *
+ * @h: Model handle.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_destroy(void *h);
+
+/**
+ * midgard_model_write_reg() - Private model write function.
+ *
+ * @h: Model handle.
+ * @addr: Address at which to write.
+ * @value: value to write.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_write_reg(void *h, u32 addr, u32 value);
+
+/**
+ * midgard_model_read_reg() - Private model read function.
+ *
+ * @h: Model handle.
+ * @addr: Address from which to read.
+ * @value: Pointer where to store the read value.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
+
+/**
+ * gpu_device_raise_irq() - Private IRQ raise function.
+ *
+ * @model: Model handle.
+ * @irq: IRQ type to raise.
+ *
+ * This hook is global to the model Linux framework.
+ */
+void gpu_device_raise_irq(void *model, enum model_linux_irqs irq);
+
+/**
+ * gpu_device_set_data() - Private model set data function.
+ *
+ * @model: Model handle.
+ * @data: Data carried by model.
+ *
+ * This hook is global to the model Linux framework.
+ */
+void gpu_device_set_data(void *model, void *data);
+
+/**
+ * gpu_device_get_data() - Private model get data function.
+ *
+ * @model: Model handle.
+ *
+ * This hook is global to the model Linux framework.
+ *
+ * Return: Pointer to the data carried by model.
+ */
+void *gpu_device_get_data(void *model);
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+
+#endif /* _KBASE_MODEL_LINUX_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index a4d7168..b02f77f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -26,9 +26,7 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_dummy_job_wa.h>
int kbase_pm_ca_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index f864661..c51b133 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -804,6 +804,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
KBASE_MCU_HCTL_SHADERS_PEND_ON;
} else
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
+ } else if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
+ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
+ }
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
}
break;
@@ -832,8 +843,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbase_hwcnt_context_enable(
- kbdev->hwcnt_gpu_ctx);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
backend->hwcnt_disabled = false;
}
@@ -854,9 +864,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
backend->mcu_state =
KBASE_MCU_HCTL_MCU_ON_RECHECK;
}
- } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) {
+ } else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ else if (kbdev->csf.coresight.disable_on_pmode_enter) {
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
+ backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE;
+ } else if (kbdev->csf.coresight.enable_on_pmode_exit) {
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+ backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE;
}
+#endif
break;
case KBASE_MCU_HCTL_MCU_ON_RECHECK:
@@ -947,11 +967,45 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
#ifdef KBASE_PM_RUNTIME
if (backend->gpu_sleep_mode_active)
backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE;
- else
+ else {
#endif
backend->mcu_state = KBASE_MCU_ON_HALT;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
+ backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+ }
+ }
+ break;
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
+ backend->mcu_state = KBASE_MCU_ON;
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
}
break;
+ case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
+ backend->mcu_state = KBASE_MCU_ON;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+ }
+ break;
+ case KBASE_MCU_CORESIGHT_DISABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED))
+ backend->mcu_state = KBASE_MCU_ON_HALT;
+ break;
+
+ case KBASE_MCU_CORESIGHT_ENABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+ break;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
case KBASE_MCU_ON_HALT:
if (!kbase_pm_is_mcu_desired(kbdev)) {
@@ -1045,6 +1099,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
/* Reset complete */
if (!backend->in_reset)
backend->mcu_state = KBASE_MCU_OFF;
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
break;
default:
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
index 5e57c9d..3b448e3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,6 +66,13 @@
* is being put to sleep.
* @ON_PEND_SLEEP: MCU sleep is in progress.
* @IN_SLEEP: Sleep request is completed and MCU has halted.
+ * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to
+ * be requested, Coresight is being disabled.
+ * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened
+ * Coresight is being enabled.
+ * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled.
+ * @CORESIGHT_ENABLE: The MCU is on, host does not have control and
+ * Coresight is being enabled.
*/
KBASEP_MCU_STATE(OFF)
KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND)
KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
KBASEP_MCU_STATE(ON_PEND_SLEEP)
KBASEP_MCU_STATE(IN_SLEEP)
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+/* Additional MCU states for Coresight */
+KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE)
+KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE)
+KBASEP_MCU_STATE(CORESIGHT_DISABLE)
+KBASEP_MCU_STATE(CORESIGHT_ENABLE)
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index 07eb8a3..88d12d9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -464,7 +464,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev)
*/
static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index c5a6bee..a563058 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -28,7 +28,7 @@ bob_defaults {
defaults: [
"kernel_defaults",
],
- no_mali: {
+ mali_no_mali: {
kbuild_options: [
"CONFIG_MALI_NO_MALI=y",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
@@ -140,6 +140,9 @@ bob_defaults {
mali_fw_core_dump: {
kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
},
+ mali_coresight: {
+ kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
+ },
kbuild_options: [
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
"MALI_CUSTOMER_RELEASE={{.release}}",
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 3abc7a2..07d277b 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -34,6 +34,7 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <csf/mali_kbase_csf_csg_debugfs.h>
#include <csf/mali_kbase_csf_kcpu_debugfs.h>
+#include <csf/mali_kbase_csf_sync_debugfs.h>
#include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
#include <mali_kbase_debug_mem_view.h>
@@ -50,6 +51,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
kbase_jit_debugfs_init(kctx);
kbase_csf_queue_group_debugfs_init(kctx);
kbase_csf_kcpu_debugfs_init(kctx);
+ kbase_csf_sync_debugfs_init(kctx);
kbase_csf_tiler_heap_debugfs_init(kctx);
kbase_csf_tiler_heap_total_debugfs_init(kctx);
kbase_csf_cpu_queue_debugfs_init(kctx);
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index 2b02279..c5438f0 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -31,17 +31,21 @@ mali_kbase-y += \
csf/mali_kbase_csf_reset_gpu.o \
csf/mali_kbase_csf_csg_debugfs.o \
csf/mali_kbase_csf_kcpu_debugfs.o \
+ csf/mali_kbase_csf_sync_debugfs.o \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
csf/mali_kbase_csf_cpu_queue_debugfs.o \
csf/mali_kbase_csf_event.o \
csf/mali_kbase_csf_firmware_log.o \
csf/mali_kbase_csf_firmware_core_dump.o \
- csf/mali_kbase_csf_tiler_heap_reclaim.o
+ csf/mali_kbase_csf_tiler_heap_reclaim.o \
+ csf/mali_kbase_csf_mcu_shared_reg.o
-mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
-
-mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+mali_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o
+else
+mali_kbase-y += csf/mali_kbase_csf_firmware.o
+endif
mali_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index b17c010..dbfcfde 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,7 +34,8 @@
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_hwaccess_time.h>
#include "mali_kbase_csf_event.h"
-#include <linux/protected_memory_allocator.h>
+#include <tl/mali_kbase_tracepoints.h>
+#include "mali_kbase_csf_mcu_shared_reg.h"
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -132,21 +133,6 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
return 0;
}
-static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
- struct tagged_addr *phys)
-{
- size_t num_pages = 2;
-
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys,
- num_pages, MCU_AS_NR, true);
-
- WARN_ON(reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(kctx->kbdev, reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-}
-
static void init_user_io_pages(struct kbase_queue *queue)
{
u32 *input_addr = (u32 *)(queue->user_io_addr);
@@ -164,76 +150,15 @@ static void init_user_io_pages(struct kbase_queue *queue)
output_addr[CS_ACTIVE/4] = 0;
}
-/* Map the input/output pages in the shared interface segment of MCU firmware
- * address space.
- */
-static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
- struct tagged_addr *phys, struct kbase_va_region *reg)
-{
- unsigned long mem_flags = KBASE_REG_GPU_RD;
- const size_t num_pages = 2;
- int ret;
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
- if (kbdev->system_coherency == COHERENCY_NONE) {
- mem_flags |=
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
- } else {
- mem_flags |= KBASE_REG_SHARE_BOTH |
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
- }
-
- mutex_lock(&kbdev->csf.reg_lock);
- ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
-
- if (ret)
- return ret;
-
- /* Map input page */
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, &phys[0], 1,
- mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, mmu_sync_info,
- NULL, false);
- if (ret)
- goto bad_insert;
-
- /* Map output page, it needs rw access */
- mem_flags |= KBASE_REG_GPU_WR;
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn + 1, &phys[1], 1,
- mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, mmu_sync_info,
- NULL, false);
- if (ret)
- goto bad_insert_output_page;
-
- return 0;
-
-bad_insert_output_page:
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR,
- true);
-bad_insert:
- mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(kbdev, reg);
- mutex_unlock(&kbdev->csf.reg_lock);
-
- return ret;
-}
-
static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
struct kbase_queue *queue)
{
- const size_t num_pages = 2;
-
kbase_gpu_vm_lock(kctx);
vunmap(queue->user_io_addr);
- WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
- atomic_sub(num_pages, &kctx->permanent_mapped_pages);
+ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
+ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
kbase_gpu_vm_unlock(kctx);
}
@@ -311,17 +236,15 @@ static void release_queue(struct kbase_queue *queue);
*/
void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
{
- const size_t num_pages = 2;
-
- gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]);
kernel_unmap_user_io_pages(kctx, queue);
kbase_mem_pool_free_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, true, false);
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
+ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
- kfree(queue->reg);
- queue->reg = NULL;
+ /* The user_io_gpu_va should have been unmapped inside the scheduler */
+ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
/* If the queue has already been terminated by userspace
* then the ref count for queue object will drop to 0 here.
@@ -330,41 +253,37 @@ void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct
}
KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
-int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
- struct kbase_queue *queue)
+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_va_region *reg;
- const size_t num_pages = 2;
int ret;
lockdep_assert_held(&kctx->csf.lock);
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
- if (!reg)
- return -ENOMEM;
-
- ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages,
+ ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ KBASEP_NUM_CS_USER_IO_PAGES,
queue->phys, false);
- if (ret != num_pages)
- goto phys_alloc_failed;
+ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
+ /* Marking both the phys to zero for indicating there is no phys allocated */
+ queue->phys[0].tagged_addr = 0;
+ queue->phys[1].tagged_addr = 0;
+ return -ENOMEM;
+ }
ret = kernel_map_user_io_pages(kctx, queue);
if (ret)
goto kernel_map_failed;
+ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
init_user_io_pages(queue);
- ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
- if (ret)
- goto gpu_mmap_failed;
-
- queue->reg = reg;
+ /* user_io_gpu_va is only mapped when scheduler decides to put the queue
+ * on slot at runtime. Initialize it to 0, signalling no mapping.
+ */
+ queue->user_io_gpu_va = 0;
mutex_lock(&kbdev->csf.reg_lock);
- if (kbdev->csf.db_file_offsets >
- (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
+ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
kbdev->csf.db_file_offsets = 0;
queue->db_file_offset = kbdev->csf.db_file_offsets;
@@ -384,18 +303,14 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
return 0;
-gpu_mmap_failed:
- kernel_unmap_user_io_pages(kctx, queue);
-
kernel_map_failed:
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false, false);
-
-phys_alloc_failed:
- kfree(reg);
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
+ /* Marking both the phys to zero for indicating there is no phys allocated */
+ queue->phys[0].tagged_addr = 0;
+ queue->phys[1].tagged_addr = 0;
- return -ENOMEM;
+ return ret;
}
KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
@@ -928,6 +843,14 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
WARN_ON(csi_index >= ginfo->stream_num))
return;
+ /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to
+ * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to
+ * FW before CS_REQ/ACK is set.
+ *
+ * 'osh' is used as CPU and GPU would be in the same outer shareable domain.
+ */
+ dmb(osh);
+
value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
value ^= (1 << csi_index);
kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
@@ -945,6 +868,8 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_va_region *region;
int err = 0;
+ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
+
/* GPU work submission happening asynchronously to prevent the contention with
* scheduler lock and as the result blocking application thread. For this reason,
* the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
@@ -1033,6 +958,15 @@ static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
}
}
+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
+{
+ /* The queue's phys are zeroed when allocation fails. Both of them being
+ * zero is an impossible condition for a successful allocated set of phy pages.
+ */
+
+ return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr);
+}
+
void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
{
struct kbase_context *kctx = queue->kctx;
@@ -1058,8 +992,8 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
unbind_queue(kctx, queue);
}
- /* Free the resources, if allocated for this queue. */
- if (queue->reg)
+ /* Free the resources, if allocated phys for this queue */
+ if (kbase_csf_queue_phys_allocated(queue))
kbase_csf_free_command_stream_user_pages(kctx, queue);
}
@@ -1072,8 +1006,8 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
unbind_stopped_queue(kctx, queue);
- /* Free the resources, if allocated for this queue. */
- if (queue->reg)
+ /* Free the resources, if allocated phys for this queue */
+ if (kbase_csf_queue_phys_allocated(queue))
kbase_csf_free_command_stream_user_pages(kctx, queue);
}
@@ -1136,130 +1070,39 @@ static bool iface_has_enough_streams(struct kbase_device *const kbdev,
* @kctx: Pointer to kbase context where the queue group is created at
* @s_buf: Pointer to suspend buffer that is attached to queue group
*
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- * MMU page table. Otherwise -ENOMEM.
+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
+ * Otherwise -ENOMEM or error code.
*/
static int create_normal_suspend_buffer(struct kbase_context *const kctx,
struct kbase_normal_suspend_buffer *s_buf)
{
- struct kbase_va_region *reg = NULL;
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
const size_t nr_pages =
PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
- int err = 0;
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ int err;
lockdep_assert_held(&kctx->csf.lock);
- /* Allocate and initialize Region Object */
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
- if (!reg)
- return -ENOMEM;
+ /* The suspend buffer's mapping address is valid only when the CSG is to
+ * run on slot, initializing it 0, signalling the buffer is not mapped.
+ */
+ s_buf->gpu_va = 0;
s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
- if (!s_buf->phy) {
- err = -ENOMEM;
- goto phy_alloc_failed;
- }
+ if (!s_buf->phy)
+ return -ENOMEM;
/* Get physical page for a normal suspend buffer */
err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
&s_buf->phy[0], false);
- if (err < 0)
- goto phy_pages_alloc_failed;
-
- /* Insert Region Object into rbtree and make virtual address available
- * to map it to physical page
- */
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
- if (err)
- goto add_va_region_failed;
-
- /* Update MMU table */
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn,
- &s_buf->phy[0], nr_pages, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_FW, mmu_sync_info, NULL, false);
- if (err)
- goto mmu_insert_failed;
-
- s_buf->reg = reg;
+ if (err < 0) {
+ kfree(s_buf->phy);
+ return err;
+ }
+ kbase_process_page_usage_inc(kctx, nr_pages);
return 0;
-
-mmu_insert_failed:
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(kctx->kbdev, reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
-add_va_region_failed:
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
- &s_buf->phy[0], false, false);
-
-phy_pages_alloc_failed:
- kfree(s_buf->phy);
-phy_alloc_failed:
- kfree(reg);
-
- return err;
-}
-
-/**
- * init_protected_suspend_buffer() - Reserve the VA range for the protected-mode
- * suspend buffer of a queue group.
- * Allocation of physical pages will happen when
- * queue group enters protected mode.
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to suspend buffer that is attached to queue group
- *
- * Return: 0 if suspend buffer init is successful, Otherwise Negative error value.
- */
-static int init_protected_suspend_buffer(struct kbase_device *const kbdev,
- struct kbase_protected_suspend_buffer *s_buf)
-{
- struct kbase_va_region *reg = NULL;
- const size_t nr_pages =
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
- int err = 0;
-
- s_buf->reg = NULL;
- s_buf->pma = NULL;
- s_buf->alloc_retries = 0;
-
- /* Allocate and initialize Region Object */
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
- if (unlikely(!reg))
- return -ENOMEM;
-
- /* Insert Region Object into rbtree and make virtual address available
- * to map it to physical page.
- */
- mutex_lock(&kbdev->csf.reg_lock);
- err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
-
- if (unlikely(err))
- kbase_free_alloced_region(reg);
- else
- s_buf->reg = reg;
-
- return err;
}
static void timer_event_worker(struct work_struct *data);
@@ -1280,24 +1123,17 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
static int create_suspend_buffers(struct kbase_context *const kctx,
struct kbase_queue_group * const group)
{
- int err = 0;
-
if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
return -ENOMEM;
}
- if (kctx->kbdev->csf.pma_dev) {
- err = init_protected_suspend_buffer(kctx->kbdev, &group->protected_suspend_buf);
- if (err) {
- term_normal_suspend_buffer(kctx,
- &group->normal_suspend_buf);
- }
- } else {
- group->protected_suspend_buf.reg = NULL;
- }
+ /* Protected suspend buffer, runtime binding so just initialize it */
+ group->protected_suspend_buf.gpu_va = 0;
+ group->protected_suspend_buf.pma = NULL;
+ group->protected_suspend_buf.alloc_retries = 0;
- return err;
+ return 0;
}
/**
@@ -1363,6 +1199,9 @@ static int create_queue_group(struct kbase_context *const kctx,
group->cs_unrecoverable = false;
group->reevaluate_idle_status = false;
+ group->csg_reg = NULL;
+ group->csg_reg_bind_retries = 0;
+
group->dvs_buf = create->in.dvs_buf;
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1494,31 +1333,21 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
* @s_buf: Pointer to queue group suspend buffer to be freed
*/
static void term_normal_suspend_buffer(struct kbase_context *const kctx,
- struct kbase_normal_suspend_buffer *s_buf)
+ struct kbase_normal_suspend_buffer *s_buf)
{
- const size_t nr_pages =
- PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
+ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
lockdep_assert_held(&kctx->csf.lock);
- WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR,
- true));
+ /* The group should not have a bind remaining on any suspend buf region */
+ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(kctx->kbdev, s_buf->reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- nr_pages, &s_buf->phy[0], false, false);
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
+ &s_buf->phy[0], false, false);
+ kbase_process_page_usage_dec(kctx, nr_pages);
kfree(s_buf->phy);
s_buf->phy = NULL;
- kfree(s_buf->reg);
- s_buf->reg = NULL;
}
/**
@@ -1531,32 +1360,12 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
struct kbase_protected_suspend_buffer *sbuf)
{
+ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
if (sbuf->pma) {
const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
- size_t i = 0;
- struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
-
- for (i = 0; phys && i < nr_pages; i++)
- phys[i] = as_tagged(sbuf->pma[i]->pa);
-
- WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, sbuf->reg->start_pfn,
- phys, nr_pages, MCU_AS_NR, true));
-
- kfree(phys);
kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
sbuf->pma = NULL;
}
-
- if (sbuf->reg) {
- WARN_ON(sbuf->reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(kbdev, sbuf->reg);
- mutex_unlock(&kbdev->csf.reg_lock);
-
- kbase_free_alloced_region(sbuf->reg);
- sbuf->reg = NULL;
- }
}
void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
@@ -2365,17 +2174,10 @@ static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *co
struct kbase_device *const kbdev = group->kctx->kbdev;
struct kbase_context *kctx = group->kctx;
struct tagged_addr *phys = NULL;
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
- struct protected_memory_allocation **pma = NULL;
struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
size_t nr_pages;
int err = 0;
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
if (likely(sbuf->pma))
return 0;
@@ -2386,33 +2188,36 @@ static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *co
goto phys_free;
}
- pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
- if (pma == NULL) {
- err = -ENOMEM;
- goto phys_free;
- }
-
mutex_lock(&kctx->csf.lock);
+ kbase_csf_scheduler_lock(kbdev);
- if (unlikely(!sbuf->reg)) {
- dev_err(kbdev->dev,
- "No VA region for the group %d of context %d_%d trying to enter protected mode",
- group->handle, group->kctx->tgid, group->kctx->id);
- err = -EINVAL;
- kbase_csf_protected_memory_free(kbdev, pma, nr_pages, true);
+ if (unlikely(!group->csg_reg)) {
+ /* The only chance of the bound csg_reg is removed from the group is
+ * that it has been put off slot by the scheduler and the csg_reg resource
+ * is contended by other groups. In this case, it needs another occasion for
+ * mapping the pma, which needs a bound csg_reg. Since the group is already
+ * off-slot, returning no error is harmless as the scheduler, when place the
+ * group back on-slot again would do the required MMU map operation on the
+ * allocated and retained pma.
+ */
+ WARN_ON(group->csg_nr >= 0);
+ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
+ group->kctx->tgid, group->kctx->id, group->handle);
goto unlock;
}
- /* Update MMU table */
- err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, sbuf->reg->start_pfn, phys,
- nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
- mmu_sync_info, NULL, true);
- if (unlikely(err))
- kbase_csf_protected_memory_free(kbdev, pma, nr_pages, true);
- else
- sbuf->pma = pma;
+ /* Allocate the protected mode pages */
+ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
+ if (unlikely(!sbuf->pma)) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ /* Map the bound susp_reg to the just allocated pma pages */
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
unlock:
+ kbase_csf_scheduler_unlock(kbdev);
mutex_unlock(&kctx->csf.lock);
phys_free:
kfree(phys);
@@ -2586,7 +2391,10 @@ static void cs_error_worker(struct work_struct *const data)
struct kbase_queue_group *group;
u8 group_handle;
bool reset_prevented = false;
- int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ int err;
+
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
dev_warn(
@@ -2595,7 +2403,6 @@ static void cs_error_worker(struct work_struct *const data)
else
reset_prevented = true;
- kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&kctx->csf.lock);
group = get_bound_queue_group(queue);
@@ -2793,12 +2600,17 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
get_queue(queue);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
group, queue, cs_req ^ cs_ack);
- if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
+ if (!queue_work(wq, &queue->oom_event_work)) {
/* The work item shall not have been
* already queued, there can be only
* one pending OoM event for a
* queue.
*/
+ dev_warn(
+ kbdev->dev,
+ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
+ queue->csi_index, group->handle, queue->kctx->tgid,
+ queue->kctx->id);
release_queue(queue);
}
}
@@ -3128,6 +2940,10 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
kbase_ipa_control_protm_exited(kbdev);
kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
}
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
}
static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index fc3342e..9fbc932 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,6 +40,9 @@
*/
#define KBASEP_USER_DB_NR_INVALID ((s8)-1)
+/* Number of pages used for GPU command queue's User input & output data */
+#define KBASEP_NUM_CS_USER_IO_PAGES (2)
+
/* Indicates an invalid value for the scan out sequence number, used to
* signify there is no group that has protected mode execution pending.
*/
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index f83251a..0daea10 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -101,7 +101,7 @@ static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
}
}
-static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file)
+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev)
{
u32 max_csg_slots = kbdev->csf.global_iface.group_num;
DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
@@ -117,6 +117,8 @@ static void update_active_groups_status(struct kbase_device *kbdev, struct seq_f
* status of all on-slot groups when MCU sleep request is sent to it.
*/
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
+ /* Wait for the MCU sleep request to complete. */
+ kbase_pm_wait_for_desired_state(kbdev);
bitmap_copy(csg_slots_status_updated,
kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
return;
@@ -497,23 +499,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
{
u32 gr;
struct kbase_context *const kctx = file->private;
- struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_device *kbdev;
if (WARN_ON(!kctx))
return -EINVAL;
+ kbdev = kctx->kbdev;
+
seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n",
MALI_CSF_CSG_DEBUGFS_VERSION);
mutex_lock(&kctx->csf.lock);
kbase_csf_scheduler_lock(kbdev);
- if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
- /* Wait for the MCU sleep request to complete. Please refer the
- * update_active_groups_status() function for the explanation.
- */
- kbase_pm_wait_for_desired_state(kbdev);
- }
- update_active_groups_status(kbdev, file);
+ kbase_csf_debugfs_update_active_groups_status(kbdev);
for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
struct kbase_queue_group *const group =
kctx->csf.queue_groups[gr];
@@ -547,13 +545,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
MALI_CSF_CSG_DEBUGFS_VERSION);
kbase_csf_scheduler_lock(kbdev);
- if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
- /* Wait for the MCU sleep request to complete. Please refer the
- * update_active_groups_status() function for the explanation.
- */
- kbase_pm_wait_for_desired_state(kbdev);
- }
- update_active_groups_status(kbdev, file);
+ kbase_csf_debugfs_update_active_groups_status(kbdev);
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h
index 397e657..16a548b 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,4 +44,11 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx);
*/
void kbase_csf_debugfs_init(struct kbase_device *kbdev);
+/**
+ * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses
+ *
+ * @kbdev: Pointer to the device
+ */
+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev);
+
#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index b7ceebc..f1af1b9 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -33,6 +33,10 @@
#include "mali_kbase_csf_event.h"
#include <uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h>
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
/* Maximum number of KCPU command queues to be created per GPU address space.
*/
#define KBASEP_MAX_KCPU_QUEUES ((size_t)256)
@@ -298,9 +302,9 @@ struct kbase_csf_notification {
*
* @kctx: Pointer to the base context with which this GPU command queue
* is associated.
- * @reg: Pointer to the region allocated from the shared
- * interface segment for mapping the User mode
- * input/output pages in MCU firmware address space.
+ * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only
+ * valid (i.e. not 0 ) when the queue is enabled and its owner
+ * group has a runtime bound csg_reg (group region).
* @phys: Pointer to the physical pages allocated for the
* pair or User mode input/output page
* @user_io_addr: Pointer to the permanent kernel mapping of User mode
@@ -376,7 +380,7 @@ struct kbase_csf_notification {
*/
struct kbase_queue {
struct kbase_context *kctx;
- struct kbase_va_region *reg;
+ u64 user_io_gpu_va;
struct tagged_addr phys[2];
char *user_io_addr;
u64 handle;
@@ -421,19 +425,23 @@ struct kbase_queue {
/**
* struct kbase_normal_suspend_buffer - Object representing a normal
* suspend buffer for queue group.
- * @reg: Memory region allocated for the normal-mode suspend buffer.
+ * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this
+ * field is only valid when the owner group has a region bound at
+ * runtime.
* @phy: Array of physical memory pages allocated for the normal-
* mode suspend buffer.
*/
struct kbase_normal_suspend_buffer {
- struct kbase_va_region *reg;
+ u64 gpu_va;
struct tagged_addr *phy;
};
/**
* struct kbase_protected_suspend_buffer - Object representing a protected
* suspend buffer for queue group.
- * @reg: Memory region allocated for the protected-mode suspend buffer.
+ * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer.
+ * Note, this field is only valid when the owner group has a region
+ * bound at runtime.
* @pma: Array of pointer to protected mode allocations containing
* information about memory pages allocated for protected mode
* suspend buffer.
@@ -441,7 +449,7 @@ struct kbase_normal_suspend_buffer {
* for protected suspend buffers.
*/
struct kbase_protected_suspend_buffer {
- struct kbase_va_region *reg;
+ u64 gpu_va;
struct protected_memory_allocation **pma;
u8 alloc_retries;
};
@@ -515,6 +523,13 @@ struct kbase_protected_suspend_buffer {
* @deschedule_deferred_cnt: Counter keeping a track of the number of threads
* that tried to deschedule the group and had to defer
* the descheduling due to the dump on fault.
+ * @csg_reg: An opaque pointer to the runtime bound shared regions. It is
+ * dynamically managed by the scheduler and can be NULL if the
+ * group is off-slot.
+ * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts.
+ * It is accumulated on consecutive mapping attempt failures. On
+ * reaching a preset limit, the group is regarded as suffered
+ * a fatal error and triggers a fatal error notification.
*/
struct kbase_queue_group {
struct kbase_context *kctx;
@@ -565,6 +580,8 @@ struct kbase_queue_group {
#if IS_ENABLED(CONFIG_DEBUG_FS)
u32 deschedule_deferred_cnt;
#endif
+ void *csg_reg;
+ u8 csg_reg_bind_retries;
};
/**
@@ -626,6 +643,8 @@ struct kbase_csf_cpu_queue_context {
* @lock: Lock preventing concurrent access to the @in_use bitmap.
* @in_use: Bitmap that indicates which heap context structures are currently
* allocated (in @region).
+ * @heap_context_size_aligned: Size of a heap context structure, in bytes,
+ * aligned to GPU cacheline size.
*
* Heap context structures are allocated by the kernel for use by the firmware.
* The current implementation subdivides a single GPU memory region for use as
@@ -637,6 +656,7 @@ struct kbase_csf_heap_context_allocator {
u64 gpu_va;
struct mutex lock;
DECLARE_BITMAP(in_use, MAX_TILER_HEAPS);
+ u32 heap_context_size_aligned;
};
/**
@@ -878,6 +898,33 @@ struct kbase_csf_sched_heap_reclaim_mgr {
};
/**
+ * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared
+ * interface segment regions for scheduler
+ * operations
+ *
+ * @array_csg_regs: Base pointer of an internally created array_csg_regs[].
+ * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a
+ * group that is placed onto on-slot by the scheduler, it is dropped
+ * from the list (i.e busy active). The Scheduler will put an active
+ * item back when it's becoming off-slot (not in use).
+ * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal
+ * and pmode suspend buffers, as a default replacement of a CSG's pages
+ * for the MMU mapping when the csg_reg is not bound to a group.
+ * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with
+ * protected suspend buffer MMU map operations.
+ * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags.
+ * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true.
+ */
+struct kbase_csf_mcu_shared_regions {
+ void *array_csg_regs;
+ struct list_head unused_csg_regs;
+ struct tagged_addr *dummy_phys;
+ struct tagged_addr *pma_phys;
+ unsigned long userio_mem_rd_flags;
+ bool dummy_phys_allocated;
+};
+
+/**
* struct kbase_csf_scheduler - Object representing the scheduler used for
* CSF for an instance of GPU platform device.
* @lock: Lock to serialize the scheduler operations and
@@ -1011,6 +1058,9 @@ struct kbase_csf_sched_heap_reclaim_mgr {
* @interrupt_lock is used to serialize the access.
* @protm_enter_time: GPU protected mode enter time.
* @reclaim_mgr: CSGs tiler heap manager object.
+ * @mcu_regs_data: Scheduler MCU shared regions data for managing the
+ * shared interface mappings for on-slot queues and
+ * CSG suspend buffers.
*/
struct kbase_csf_scheduler {
struct mutex lock;
@@ -1054,6 +1104,7 @@ struct kbase_csf_scheduler {
u32 tick_protm_pending_seq;
ktime_t protm_enter_time;
struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
+ struct kbase_csf_mcu_shared_regions mcu_regs_data;
};
/*
@@ -1540,6 +1591,12 @@ struct kbase_csf_device {
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_dump_on_fault dof;
#endif /* CONFIG_DEBUG_FS */
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ /**
+ * @coresight: Coresight device structure.
+ */
+ struct kbase_debug_coresight_device coresight;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
};
/**
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 1e409ac..4dc9de4 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -39,7 +39,6 @@
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <csf/mali_kbase_csf_registers.h>
-
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/firmware.h>
@@ -287,6 +286,13 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
{
kbase_csf_firmware_enable_mcu(kbdev);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+
+ if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+ dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled");
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
wait_for_firmware_boot(kbdev);
}
@@ -1818,6 +1824,14 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
kbase_csf_firmware_global_input(global_iface,
GLB_ACK_IRQ_MASK, ack_irq_mask);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ /* Enable FW MCU read/write debug interfaces */
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_DEBUG_ACK_IRQ_MASK,
+ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK,
+ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -2552,6 +2566,119 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
kbdev->as_free |= MCU_AS_BITMASK;
}
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const reg_val)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+ int err;
+ u32 glb_req;
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Set the address and value to write */
+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val);
+
+ /* Set the Global Debug request for FW MCU write */
+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+ GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ /* Notify FW about the Global Debug request */
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val);
+
+ return err;
+}
+
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 *reg_val)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+ int err;
+ u32 glb_req;
+
+ if (WARN_ON(reg_val == NULL))
+ return -EINVAL;
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Set the address to read */
+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+
+ /* Set the Global Debug request for FW MCU read */
+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+ GLB_DEBUG_REQ_FW_AS_READ_MASK);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ /* Notify FW about the Global Debug request */
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ if (!err) {
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ }
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val);
+
+ return err;
+}
+
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const val_mask, u32 const reg_val)
+{
+ unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies;
+ u32 read_val;
+
+ dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask);
+
+ while (time_before(jiffies, remaining)) {
+ int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val);
+
+ if (err) {
+ dev_err(kbdev->dev,
+ "Error reading MCU register value (read_val = %u, expect = %u)\n",
+ read_val, reg_val);
+ return err;
+ }
+
+ if ((read_val & val_mask) == reg_val)
+ return 0;
+ }
+
+ dev_err(kbdev->dev,
+ "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n",
+ read_val, reg_val);
+
+ return -ETIMEDOUT;
+}
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index cc20f9a..5782ac3 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -449,6 +449,50 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
*/
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+/**
+ * kbase_csf_firmware_mcu_register_write - Write to MCU register
+ *
+ * @kbdev: Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to write into
+ * @reg_val: Value to be written
+ *
+ * Write a desired value to a register in MCU address space.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const reg_val);
+/**
+ * kbase_csf_firmware_mcu_register_read - Read from MCU register
+ *
+ * @kbdev: Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to read from
+ * @reg_val: Value as present in reg_addr register
+ *
+ * Read a value from MCU address space.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 *reg_val);
+
+/**
+ * kbase_csf_firmware_mcu_register_poll - Poll MCU register
+ *
+ * @kbdev: Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to read from
+ * @val_mask: Value to mask the read value for comparison
+ * @reg_val: Value to be compared against
+ *
+ * Continue to read a value from MCU address space until it matches given mask and value.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const val_mask, u32 const reg_val);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
*
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 2e2b59f..7976d90 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -32,7 +32,7 @@
#include "mali_kbase_csf_scheduler.h"
#include "mmu/mali_kbase_mmu.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <csf/mali_kbase_csf_registers.h>
#include <linux/list.h>
@@ -1230,8 +1230,6 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
/* NO_MALI: Don't stop firmware or unload MMU tables */
- kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
-
kbase_csf_scheduler_term(kbdev);
kbase_csf_free_dummy_user_reg_page(kbdev);
@@ -1261,6 +1259,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
* entry parsed from the firmware image.
*/
kbase_mcu_shared_interface_region_tracker_term(kbdev);
+
+ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
}
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index f357e9e..42d19e1 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -23,10 +23,7 @@
#include "mali_kbase_csf_heap_context_alloc.h"
/* Size of one heap context structure, in bytes. */
-#define HEAP_CTX_SIZE ((size_t)32)
-
-/* Total size of the GPU memory region allocated for heap contexts, in bytes. */
-#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE)
+#define HEAP_CTX_SIZE ((u32)32)
/**
* sub_alloc - Sub-allocate a heap context from a GPU memory region
@@ -38,8 +35,8 @@
static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
- int heap_nr = 0;
- size_t ctx_offset = 0;
+ unsigned long heap_nr = 0;
+ u32 ctx_offset = 0;
u64 heap_gpu_va = 0;
struct kbase_vmap_struct mapping;
void *ctx_ptr = NULL;
@@ -55,30 +52,65 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
return 0;
}
- ctx_offset = heap_nr * HEAP_CTX_SIZE;
+ ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
- HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping);
+ ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
if (unlikely(!ctx_ptr)) {
dev_err(kctx->kbdev->dev,
- "Failed to map tiler heap context %d (0x%llX)\n",
+ "Failed to map tiler heap context %lu (0x%llX)\n",
heap_nr, heap_gpu_va);
return 0;
}
- memset(ctx_ptr, 0, HEAP_CTX_SIZE);
+ memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
kbase_vunmap(ctx_ptr, &mapping);
bitmap_set(ctx_alloc->in_use, heap_nr, 1);
- dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n",
+ dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
heap_nr, heap_gpu_va);
return heap_gpu_va;
}
/**
+ * evict_heap_context - Evict the data of heap context from GPU's L2 cache.
+ *
+ * @ctx_alloc: Pointer to the heap context allocator.
+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
+ *
+ * This function is called when memory for the heap context is freed. It uses the
+ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
+ * there is nothing done. The whole GPU cache is anyways expected to be flushed
+ * on older GPUs when initial chunks of the heap are freed just before the memory
+ * for heap context is freed.
+ */
+static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+ u64 const heap_gpu_va)
+{
+ struct kbase_context *const kctx = ctx_alloc->kctx;
+ u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
+ u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
+ u32 page_index = offset_in_bytes >> PAGE_SHIFT;
+ struct tagged_addr page =
+ kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
+ phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
+
+ lockdep_assert_held(&ctx_alloc->lock);
+
+ /* There is no need to take vm_lock here as the ctx_alloc region is no_user_free
+ * refcounted. The region and the backing page can't disappear whilst this
+ * function is executing.
+ * Flush type is passed as FLUSH_PT to CLN+INV L2 only.
+ */
+ kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
+ heap_context_pa, ctx_alloc->heap_context_size_aligned,
+ KBASE_MMU_OP_FLUSH_PT);
+}
+
+/**
* sub_free - Free a heap context sub-allocated from a GPU memory region
*
* @ctx_alloc: Pointer to the heap context allocator.
@@ -88,7 +120,7 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
u64 const heap_gpu_va)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
- u64 ctx_offset = 0;
+ u32 ctx_offset = 0;
unsigned int heap_nr = 0;
lockdep_assert_held(&ctx_alloc->lock);
@@ -99,13 +131,15 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
return;
- ctx_offset = heap_gpu_va - ctx_alloc->gpu_va;
+ ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
- if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) ||
- WARN_ON(ctx_offset % HEAP_CTX_SIZE))
+ if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
+ WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
return;
- heap_nr = ctx_offset / HEAP_CTX_SIZE;
+ evict_heap_context(ctx_alloc, heap_gpu_va);
+
+ heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
dev_dbg(kctx->kbdev->dev,
"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
@@ -116,12 +150,17 @@ int kbase_csf_heap_context_allocator_init(
struct kbase_csf_heap_context_allocator *const ctx_alloc,
struct kbase_context *const kctx)
{
+ const u32 gpu_cache_line_size =
+ (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
/* We cannot pre-allocate GPU memory here because the
* custom VA zone may not have been created yet.
*/
ctx_alloc->kctx = kctx;
ctx_alloc->region = NULL;
ctx_alloc->gpu_va = 0;
+ ctx_alloc->heap_context_size_aligned =
+ (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
mutex_init(&ctx_alloc->lock);
bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
@@ -163,7 +202,7 @@ u64 kbase_csf_heap_context_allocator_alloc(
struct kbase_context *const kctx = ctx_alloc->kctx;
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
- u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
+ u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
u64 heap_gpu_va = 0;
/* Calls to this function are inherently asynchronous, with respect to
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 06a6990..8c1fcdb 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -80,7 +80,14 @@ static int kbase_kcpu_map_import_prepare(
* on the physical pages tracking object. When the last
* reference to the tracking object is dropped the pages
* would be unpinned if they weren't unpinned before.
+ *
+ * Region should be CPU cached: abort if it isn't.
*/
+ if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = kbase_jd_user_buf_pin_pages(kctx, reg);
if (ret)
goto out;
@@ -783,13 +790,14 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
return -EINVAL;
}
- sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val;
+ sig_set =
+ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val;
if (sig_set) {
bool error = false;
bitmap_set(cqs_wait->signaled, i, 1);
if ((cqs_wait->inherit_err_flags & (1U << i)) &&
- evt[BASEP_EVENT_ERR_INDEX] > 0) {
+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) {
queue->has_error = true;
error = true;
}
@@ -799,7 +807,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
error);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
- kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
+ kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
queue->command_started = false;
}
@@ -816,12 +824,34 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs);
}
+static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type)
+{
+ return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64;
+}
+
+static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type)
+{
+ BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES);
+ BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES);
+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type));
+
+ switch (data_type) {
+ default:
+ return false;
+ case BASEP_CQS_DATA_TYPE_U32:
+ return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0;
+ case BASEP_CQS_DATA_TYPE_U64:
+ return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0;
+ }
+}
+
static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
struct base_kcpu_command_cqs_wait_info *cqs_wait_info,
struct kbase_kcpu_command *current_command)
{
struct base_cqs_wait_info *objs;
unsigned int nr_objs = cqs_wait_info->nr_objs;
+ unsigned int i;
lockdep_assert_held(&queue->lock);
@@ -841,6 +871,17 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
return -ENOMEM;
}
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
+ kfree(objs);
+ return -EINVAL;
+ }
+ }
+
if (++queue->cqs_wait_count == 1) {
if (kbase_csf_event_wait_add(queue->kctx,
event_cqs_callback, queue)) {
@@ -896,14 +937,13 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
"Sync memory %llx already freed", cqs_set->objs[i].addr);
queue->has_error = true;
} else {
- evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error;
/* Set to signaled */
- evt[BASEP_EVENT_VAL_INDEX]++;
+ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++;
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET,
- queue, cqs_set->objs[i].addr,
- evt[BASEP_EVENT_ERR_INDEX]);
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr,
+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
}
}
@@ -920,6 +960,7 @@ static int kbase_kcpu_cqs_set_prepare(
{
struct base_cqs_set *objs;
unsigned int nr_objs = cqs_set_info->nr_objs;
+ unsigned int i;
lockdep_assert_held(&kcpu_queue->lock);
@@ -939,6 +980,17 @@ static int kbase_kcpu_cqs_set_prepare(
return -ENOMEM;
}
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
+ kfree(objs);
+ return -EINVAL;
+ }
+ }
+
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET;
current_command->info.cqs_set.nr_objs = nr_objs;
current_command->info.cqs_set.objs = objs;
@@ -981,8 +1033,9 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
if (!test_bit(i, cqs_wait_operation->signaled)) {
struct kbase_vmap_struct *mapping;
bool sig_set;
- u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx,
- cqs_wait_operation->objs[i].addr, &mapping);
+ uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get(
+ queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
+ u64 val = 0;
/* GPUCORE-28172 RDT to review */
if (!queue->command_started)
@@ -995,12 +1048,29 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
return -EINVAL;
}
+ switch (cqs_wait_operation->objs[i].data_type) {
+ default:
+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(
+ cqs_wait_operation->objs[i].data_type));
+ kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+ queue->has_error = true;
+ return -EINVAL;
+ case BASEP_CQS_DATA_TYPE_U32:
+ val = *(u32 *)evt;
+ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
+ break;
+ case BASEP_CQS_DATA_TYPE_U64:
+ val = *(u64 *)evt;
+ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
+ break;
+ }
+
switch (cqs_wait_operation->objs[i].operation) {
case BASEP_CQS_WAIT_OPERATION_LE:
- sig_set = *evt <= cqs_wait_operation->objs[i].val;
+ sig_set = val <= cqs_wait_operation->objs[i].val;
break;
case BASEP_CQS_WAIT_OPERATION_GT:
- sig_set = *evt > cqs_wait_operation->objs[i].val;
+ sig_set = val > cqs_wait_operation->objs[i].val;
break;
default:
dev_dbg(kbdev->dev,
@@ -1012,24 +1082,10 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
return -EINVAL;
}
- /* Increment evt up to the error_state value depending on the CQS data type */
- switch (cqs_wait_operation->objs[i].data_type) {
- default:
- dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
- /* Fallthrough - hint to compiler that there's really only 2 options at present */
- fallthrough;
- case BASEP_CQS_DATA_TYPE_U32:
- evt = (u64 *)((u8 *)evt + sizeof(u32));
- break;
- case BASEP_CQS_DATA_TYPE_U64:
- evt = (u64 *)((u8 *)evt + sizeof(u64));
- break;
- }
-
if (sig_set) {
bitmap_set(cqs_wait_operation->signaled, i, 1);
if ((cqs_wait_operation->inherit_err_flags & (1U << i)) &&
- *evt > 0) {
+ *(u32 *)evt > 0) {
queue->has_error = true;
}
@@ -1057,6 +1113,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
{
struct base_cqs_wait_operation_info *objs;
unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
+ unsigned int i;
lockdep_assert_held(&queue->lock);
@@ -1076,6 +1133,18 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
return -ENOMEM;
}
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
+ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
+ kfree(objs);
+ return -EINVAL;
+ }
+ }
+
if (++queue->cqs_wait_count == 1) {
if (kbase_csf_event_wait_add(queue->kctx,
event_cqs_callback, queue)) {
@@ -1106,6 +1175,44 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
return 0;
}
+static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue,
+ uintptr_t evt, u8 operation, u64 val)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+
+ switch (operation) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ *(u32 *)evt += (u32)val;
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ *(u32 *)evt = val;
+ break;
+ default:
+ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
+ queue->has_error = true;
+ break;
+ }
+}
+
+static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue,
+ uintptr_t evt, u8 operation, u64 val)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+
+ switch (operation) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ *(u64 *)evt += val;
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ *(u64 *)evt = val;
+ break;
+ default:
+ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
+ queue->has_error = true;
+ break;
+ }
+}
+
static void kbase_kcpu_cqs_set_operation_process(
struct kbase_device *kbdev,
struct kbase_kcpu_command_queue *queue,
@@ -1120,9 +1227,9 @@ static void kbase_kcpu_cqs_set_operation_process(
for (i = 0; i < cqs_set_operation->nr_objs; i++) {
struct kbase_vmap_struct *mapping;
- u64 *evt;
+ uintptr_t evt;
- evt = (u64 *)kbase_phy_alloc_mapping_get(
+ evt = (uintptr_t)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
/* GPUCORE-28172 RDT to review */
@@ -1132,39 +1239,31 @@ static void kbase_kcpu_cqs_set_operation_process(
"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
queue->has_error = true;
} else {
- switch (cqs_set_operation->objs[i].operation) {
- case BASEP_CQS_SET_OPERATION_ADD:
- *evt += cqs_set_operation->objs[i].val;
- break;
- case BASEP_CQS_SET_OPERATION_SET:
- *evt = cqs_set_operation->objs[i].val;
- break;
- default:
- dev_dbg(kbdev->dev,
- "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
- queue->has_error = true;
- break;
- }
+ struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i];
- /* Increment evt up to the error_state value depending on the CQS data type */
- switch (cqs_set_operation->objs[i].data_type) {
+ switch (obj->data_type) {
default:
- dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
- /* Fallthrough - hint to compiler that there's really only 2 options at present */
- fallthrough;
+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type));
+ queue->has_error = true;
+ goto skip_err_propagation;
case BASEP_CQS_DATA_TYPE_U32:
- evt = (u64 *)((u8 *)evt + sizeof(u32));
+ kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation,
+ obj->val);
+ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
break;
case BASEP_CQS_DATA_TYPE_U64:
- evt = (u64 *)((u8 *)evt + sizeof(u64));
+ kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation,
+ obj->val);
+ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
break;
}
/* GPUCORE-28172 RDT to review */
/* Always propagate errors */
- *evt = queue->has_error;
+ *(u32 *)evt = queue->has_error;
+skip_err_propagation:
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
}
}
@@ -1182,6 +1281,7 @@ static int kbase_kcpu_cqs_set_operation_prepare(
{
struct base_cqs_set_operation_info *objs;
unsigned int nr_objs = cqs_set_operation_info->nr_objs;
+ unsigned int i;
lockdep_assert_held(&kcpu_queue->lock);
@@ -1201,6 +1301,18 @@ static int kbase_kcpu_cqs_set_operation_prepare(
return -ENOMEM;
}
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
+ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
+ kfree(objs);
+ return -EINVAL;
+ }
+ }
+
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION;
current_command->info.cqs_set_operation.nr_objs = nr_objs;
current_command->info.cqs_set_operation.objs = objs;
@@ -2154,14 +2266,30 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
return -EINVAL;
}
+ /* There might be a race between one thread trying to enqueue commands to the queue
+ * and other thread trying to delete the same queue.
+ * This racing could lead to use-after-free problem by enqueuing thread if
+ * resources for the queue has already been freed by deleting thread.
+ *
+ * To prevent the issue, two mutexes are acquired/release asymmetrically as follows.
+ *
+ * Lock A (kctx mutex)
+ * Lock B (queue mutex)
+ * Unlock A
+ * Unlock B
+ *
+ * With the kctx mutex being held, enqueuing thread will check the queue
+ * and will return error code if the queue had already been deleted.
+ */
mutex_lock(&kctx->csf.kcpu_queues.lock);
queue = kctx->csf.kcpu_queues.array[enq->id];
- mutex_unlock(&kctx->csf.kcpu_queues.lock);
-
- if (queue == NULL)
+ if (queue == NULL) {
+ dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id);
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
return -EINVAL;
-
+ }
mutex_lock(&queue->lock);
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
if (kcpu_queue_get_space(queue) < enq->nr_commands) {
ret = -EBUSY;
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index bc3cafa..b8099fd 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -277,8 +277,8 @@ struct kbase_kcpu_command {
* @jit_blocked: Used to keep track of command queues blocked
* by a pending JIT allocation command.
* @fence_timeout: Timer used to detect the fence wait timeout.
- * @metadata: Metadata structure containing basic information about this
- * queue for any fence objects associated with this queue.
+ * @metadata: Metadata structure containing basic information about
+ * this queue for any fence objects associated with this queue.
*/
struct kbase_kcpu_command_queue {
struct mutex lock;
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
new file mode 100644
index 0000000..77e19db
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/protected_memory_allocator.h>
+#include <mali_kbase.h>
+#include "mali_kbase_csf.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
+#include <mali_kbase_mem_migrate.h>
+
+/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */
+#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8)
+
+/* MCU shared region map attempt limit */
+#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4)
+
+/* Convert a VPFN to its start addr */
+#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT)
+
+/* Macros for extract the corresponding VPFNs from a CSG_REG */
+#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn)
+#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages)
+#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi))
+
+/* MCU shared segment dummy page mapping flags */
+#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX)
+
+/* MCU shared segment suspend buffer mapping flags */
+#define SUSP_PAGE_MAP_FLAGS \
+ (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT))
+
+/**
+ * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime
+ * resources for suspend buffer pages, userio pages
+ * and their corresponding mapping GPU VA addresses
+ * from the MCU shared interface segment
+ *
+ * @link: Link to the managing list for the wrapper object.
+ * @reg: pointer to the region allocated from the shared interface segment, which
+ * covers the normal/P-mode suspend buffers, userio pages of the queues
+ * @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free).
+ * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's
+ * protected mode suspend buffer pages.
+ */
+struct kbase_csg_shared_region {
+ struct list_head link;
+ struct kbase_va_region *reg;
+ struct kbase_queue_group *grp;
+ bool pmode_mapped;
+};
+
+static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
+{
+ unsigned long userio_map_flags;
+
+ if (kbdev->system_coherency == COHERENCY_NONE)
+ userio_map_flags =
+ KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+ else
+ userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH |
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
+
+ return (userio_map_flags | KBASE_REG_GPU_NX);
+}
+
+static void set_page_meta_status_not_movable(struct tagged_addr phy)
+{
+ if (kbase_page_migration_enabled) {
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));
+
+ if (page_md) {
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+}
+
+static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group)
+{
+ return (struct kbase_csg_shared_region *)group->csg_reg;
+}
+
+static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn,
+ u32 nr_pages)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
+
+ return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages,
+ mem_flags, KBASE_MEM_GROUP_CSF_FW);
+}
+
+static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+ mmu_sync_info, NULL, false);
+}
+
+/* Reset consecutive retry count to zero */
+static void notify_group_csg_reg_map_done(struct kbase_queue_group *group)
+{
+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+ /* Just clear the internal map retry count */
+ group->csg_reg_bind_retries = 0;
+}
+
+/* Return true if a fatal group error has already been triggered */
+static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group)
+{
+ struct kbase_device *kbdev = group->kctx->kbdev;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (group->csg_reg_bind_retries < U8_MAX)
+ group->csg_reg_bind_retries++;
+
+ /* Allow only one fatal error notification */
+ if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) {
+ struct base_gpu_queue_group_error const err_payload = {
+ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } }
+ };
+
+ dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit",
+ group->kctx->tgid, group->kctx->id, group->handle);
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ kbase_event_wakeup(group->kctx);
+ }
+
+ return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT;
+}
+
+/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping.
+ * If phys is NULL, the internal dummy_phys is used, which effectively
+ * restores back to the initialized state for the given queue's userio_pages
+ * (i.e. mapped to the default dummy page).
+ * In case of CSF mmu update error on a queue, the dummy phy is used to restore
+ * back the default 'unbound' (i.e. mapped to dummy) condition.
+ *
+ * It's the caller's responsibility to ensure that the given vpfn is extracted
+ * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN().
+ */
+static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ int err = 0, err1;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (phys) {
+ unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags;
+ unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR;
+
+ /* Dealing with a queue's INPUT page */
+ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input,
+ KBASE_MEM_GROUP_CSF_IO);
+ /* Dealing with a queue's OUTPUT page */
+ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1,
+ mem_flags_output, KBASE_MEM_GROUP_CSF_IO);
+ if (unlikely(err1))
+ err = err1;
+ }
+
+ if (unlikely(err) || !phys) {
+ /* Restore back to dummy_userio_phy */
+ update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
+ }
+
+ return err;
+}
+
+/* Update a group's queues' mappings for a group with its runtime bound group region */
+static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group,
+ struct kbase_queue_group *prev_grp)
+{
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ struct tagged_addr *phy;
+ int err = 0, err1;
+ u32 i;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg"))
+ return -EINVAL;
+
+ for (i = 0; i < nr_csis; i++) {
+ struct kbase_queue *queue = group->bound_queues[i];
+ struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL;
+
+ /* Set the phy if the group's queue[i] needs mapping, otherwise NULL */
+ phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL;
+
+ /* Either phy is valid, or this update is for a transition change from
+ * prev_group, and the prev_queue was mapped, so an update is required.
+ */
+ if (phy || (prev_queue && prev_queue->user_io_gpu_va)) {
+ u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages);
+
+ err1 = userio_pages_replace_phys(kbdev, vpfn, phy);
+
+ if (unlikely(err1)) {
+ dev_warn(kbdev->dev,
+ "%s: Error in update queue-%d mapping for csg_%d_%d_%d",
+ __func__, i, group->kctx->tgid, group->kctx->id,
+ group->handle);
+ err = err1;
+ } else if (phy)
+ queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
+
+ /* Mark prev_group's queue has lost its mapping */
+ if (prev_queue)
+ prev_queue->user_io_gpu_va = 0;
+ }
+ }
+
+ return err;
+}
+
+/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced
+ * with the given group's phy pages, or, if no replacement, the default dummy pages.
+ * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its
+ * new binding owner in this function. At the end, the prev_grp would be completely
+ * detached away from the previously bound csg_reg.
+ */
+static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group,
+ struct kbase_csg_shared_region *csg_reg)
+{
+ const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ struct kbase_queue_group *prev_grp = csg_reg->grp;
+ struct kbase_va_region *reg = csg_reg->reg;
+ struct tagged_addr *phy;
+ int err = 0, err1;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* The csg_reg is expected still on the unused list so its link is not empty */
+ if (WARN_ON_ONCE(list_empty(&csg_reg->link))) {
+ dev_dbg(kbdev->dev, "csg_reg is marked in active use");
+ return -EINVAL;
+ }
+
+ if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) {
+ dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group");
+ prev_grp->csg_reg = NULL;
+ return -EINVAL;
+ }
+
+ /* Replacing the csg_reg bound group to the newly given one */
+ csg_reg->grp = group;
+ group->csg_reg = csg_reg;
+
+ /* Resolving mappings, deal with protected mode first */
+ if (group->protected_suspend_buf.pma) {
+ /* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping
+ * status is now stale during this transition of ownership. For the new owner,
+ * its mapping would have been updated away when it lost its binding previously.
+ * So it needs an update to this pma map. By clearing here the mapped flag
+ * ensures it reflects the new owner's condition.
+ */
+ csg_reg->pmode_mapped = false;
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+ } else if (csg_reg->pmode_mapped) {
+ /* Need to unmap the previous one, use the dummy pages */
+ err = update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+
+ if (unlikely(err))
+ dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d",
+ __func__, group->kctx->tgid, group->kctx->id, group->handle);
+
+ csg_reg->pmode_mapped = false;
+ }
+
+ /* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is
+ * actually reflected by a specific mapped flag (due to phys[] is only allocated on
+ * in-need basis). So the GPU_VA is always updated to the bound region's corresponding
+ * VA, as a reflection of the binding to the csg_reg.
+ */
+ group->protected_suspend_buf.gpu_va =
+ GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages));
+
+ /* Deal with normal mode suspend buffer */
+ phy = group->normal_suspend_buf.phy;
+ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy,
+ nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW);
+
+ if (unlikely(err1)) {
+ dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d",
+ __func__, group->kctx->tgid, group->kctx->id, group->handle);
+
+ /* Attempt a restore to default dummy for removing previous mapping */
+ if (prev_grp)
+ update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+ err = err1;
+ /* Marking the normal suspend buffer is not mapped (due to error) */
+ group->normal_suspend_buf.gpu_va = 0;
+ } else {
+ /* Marking the normal suspend buffer is actually mapped */
+ group->normal_suspend_buf.gpu_va =
+ GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages));
+ }
+
+ /* Deal with queue uerio_pages */
+ err1 = csg_reg_update_on_csis(kbdev, group, prev_grp);
+ if (likely(!err1))
+ err = err1;
+
+ /* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */
+ if (prev_grp) {
+ prev_grp->normal_suspend_buf.gpu_va = 0;
+ prev_grp->protected_suspend_buf.gpu_va = 0;
+ prev_grp->csg_reg = NULL;
+ }
+
+ return err;
+}
+
+/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */
+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding",
+ group->kctx->tgid, group->kctx->id, group->handle))
+ return;
+
+ /* By dropping out the csg_reg from the unused list, it becomes active and is tracked
+ * by its bound group that is on-slot. The design is that, when this on-slot group is
+ * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the
+ * unused list.
+ */
+ if (!WARN_ON_ONCE(list_empty(&csg_reg->link)))
+ list_del_init(&csg_reg->link);
+}
+
+/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use
+ * anymore. Existing bounding/mappings are left untouched. These would only be dealt with
+ * if the bound csg_reg is to be reused with another group.
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound",
+ group->kctx->tgid, group->kctx->id, group->handle))
+ return;
+
+ /* By adding back the csg_reg to the unused list, it becomes available for another
+ * group to break its existing binding and set up a new one.
+ */
+ if (!list_empty(&csg_reg->link)) {
+ WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot");
+ list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
+ } else
+ list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
+}
+
+/* Adding a new queue to an existing on-slot group */
+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
+{
+ struct kbase_queue_group *group = queue->group;
+ struct kbase_csg_shared_region *csg_reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u64 vpfn;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot"))
+ return -EIO;
+
+ csg_reg = get_group_bound_csg_reg(group);
+ if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link),
+ "No bound csg_reg, or in wrong state"))
+ return -EIO;
+
+ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
+ err = userio_pages_replace_phys(kbdev, vpfn, queue->phys);
+ if (likely(!err)) {
+ /* Mark the queue has been successfully mapped */
+ queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
+ } else {
+ /* Mark the queue has no mapping on its phys[] */
+ queue->user_io_gpu_va = 0;
+ dev_dbg(kbdev->dev,
+ "%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__,
+ queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle);
+
+ /* notify the error for the bound group */
+ if (notify_group_csg_reg_map_error(group))
+ err = -EIO;
+ }
+
+ return err;
+}
+
+/* Unmap a given queue's userio pages, when the queue is deleted */
+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
+{
+ struct kbase_queue_group *group;
+ struct kbase_csg_shared_region *csg_reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u64 vpfn;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* The queue has no existing mapping, nothing to do */
+ if (!queue || !queue->user_io_gpu_va)
+ return;
+
+ group = queue->group;
+ if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region"))
+ return;
+
+ csg_reg = get_group_bound_csg_reg(group);
+
+ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
+
+ WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL),
+ "Unexpected restoring to dummy map update error");
+ queue->user_io_gpu_va = 0;
+}
+
+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ int err = 0, err1;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL"))
+ return -EINVAL;
+
+ /* If the pmode already mapped, nothing to do */
+ if (csg_reg->pmode_mapped)
+ return 0;
+
+ /* P-mode map not in place and the group has allocated P-mode pages, map it */
+ if (group->protected_suspend_buf.pma) {
+ unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
+ struct tagged_addr *phy = shared_regs->pma_phys;
+ struct kbase_va_region *reg = csg_reg->reg;
+ u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ u32 i;
+
+ /* Populate the protected phys from pma to phy[] */
+ for (i = 0; i < nr_susp_pages; i++)
+ phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa);
+
+ /* Add the P-mode suspend buffer mapping */
+ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags,
+ KBASE_MEM_GROUP_CSF_FW);
+
+ /* If error, restore to default dummpy */
+ if (unlikely(err)) {
+ err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages);
+ if (unlikely(err1))
+ dev_warn(
+ kbdev->dev,
+ "%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d",
+ __func__, group->kctx->tgid, group->kctx->id,
+ group->handle);
+
+ csg_reg->pmode_mapped = false;
+ } else
+ csg_reg->pmode_mapped = true;
+ }
+
+ return err;
+}
+
+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ struct kbase_va_region *reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ int err = 0;
+ u32 i;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* Nothing to do for clearing up if no bound csg_reg */
+ if (!csg_reg)
+ return;
+
+ reg = csg_reg->reg;
+ /* Restore mappings default dummy pages for any mapped pages */
+ if (csg_reg->pmode_mapped) {
+ err = update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+ WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping");
+
+ csg_reg->pmode_mapped = false;
+ }
+
+ if (group->normal_suspend_buf.gpu_va) {
+ err = update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+ WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping");
+ }
+
+ /* Deal with queue uerio pages */
+ for (i = 0; i < nr_csis; i++)
+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]);
+
+ group->normal_suspend_buf.gpu_va = 0;
+ group->protected_suspend_buf.gpu_va = 0;
+
+ /* Break the binding */
+ group->csg_reg = NULL;
+ csg_reg->grp = NULL;
+
+ /* Put the csg_reg to the front of the unused list */
+ if (WARN_ON_ONCE(list_empty(&csg_reg->link)))
+ list_add(&csg_reg->link, &shared_regs->unused_csg_regs);
+ else
+ list_move(&csg_reg->link, &shared_regs->unused_csg_regs);
+}
+
+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_csg_shared_region *csg_reg;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ csg_reg = get_group_bound_csg_reg(group);
+ if (!csg_reg)
+ csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs,
+ struct kbase_csg_shared_region, link);
+
+ if (!WARN_ON_ONCE(!csg_reg)) {
+ struct kbase_queue_group *prev_grp = csg_reg->grp;
+
+ /* Deal with the previous binding and lazy unmap, i.e if the previous mapping not
+ * the required one, unmap it.
+ */
+ if (prev_grp == group) {
+ /* Update existing bindings, if there have been some changes */
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+ if (likely(!err))
+ err = csg_reg_update_on_csis(kbdev, group, NULL);
+ } else
+ err = group_bind_csg_reg(kbdev, group, csg_reg);
+ } else {
+ /* This should not have been possible if the code operates rightly */
+ dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d",
+ __func__, group->handle, group->kctx->tgid, group->kctx->id);
+ return -EIO;
+ }
+
+ if (likely(!err))
+ notify_group_csg_reg_map_done(group);
+ else
+ notify_group_csg_reg_map_error(group);
+
+ return err;
+}
+
+static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
+ struct kbase_csg_shared_region *csg_reg)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis);
+ struct kbase_va_region *reg;
+ u64 vpfn;
+ int err, i;
+
+ INIT_LIST_HEAD(&csg_reg->link);
+ reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
+
+ if (!reg) {
+ dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
+ __func__, nr_csg_reg_pages);
+ return -ENOMEM;
+ }
+
+ /* Insert the region into rbtree, so it becomes ready to use */
+ mutex_lock(&kbdev->csf.reg_lock);
+ err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1);
+ reg->flags &= ~KBASE_REG_FREE;
+ mutex_unlock(&kbdev->csf.reg_lock);
+ if (err) {
+ kfree(reg);
+ dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__,
+ nr_csg_reg_pages);
+ return err;
+ }
+
+ /* Initialize the mappings so MMU only need to update the the corresponding
+ * mapped phy-pages at runtime.
+ * Map the normal suspend buffer pages to the prepared dummy phys[].
+ */
+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
+
+ if (unlikely(err))
+ goto fail_susp_map_fail;
+
+ /* Map the protected suspend buffer pages to the prepared dummy phys[] */
+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
+
+ if (unlikely(err))
+ goto fail_pmod_map_fail;
+
+ for (i = 0; i < nr_csis; i++) {
+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+ err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
+
+ if (unlikely(err))
+ goto fail_userio_pages_map_fail;
+ }
+
+ /* Replace the previous NULL-valued field with the successully initialized reg */
+ csg_reg->reg = reg;
+
+ return 0;
+
+fail_userio_pages_map_fail:
+ while (i-- > 0) {
+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+ }
+
+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, MCU_AS_NR, true);
+fail_pmod_map_fail:
+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, MCU_AS_NR, true);
+fail_susp_map_fail:
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_remove_va_region(kbdev, reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
+ kfree(reg);
+
+ return err;
+}
+
+/* Note, this helper can only be called on scheduler shutdown */
+static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
+ struct kbase_csg_shared_region *csg_reg)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_va_region *reg = csg_reg->reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ u64 vpfn;
+ int i;
+
+ for (i = 0; i < nr_csis; i++) {
+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+ }
+
+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, MCU_AS_NR, true);
+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, MCU_AS_NR, true);
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_remove_va_region(kbdev, reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
+ kfree(reg);
+}
+
+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
+ struct kbase_csg_shared_region *array_csg_regs;
+ const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ const u32 nr_groups = kbdev->csf.global_iface.group_num;
+ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
+ const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES);
+ u32 i;
+ int err;
+
+ shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev);
+ INIT_LIST_HEAD(&shared_regs->unused_csg_regs);
+
+ shared_regs->dummy_phys =
+ kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL);
+ if (!shared_regs->dummy_phys)
+ return -ENOMEM;
+
+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
+ &shared_regs->dummy_phys[0], false) <= 0)
+ return -ENOMEM;
+
+ shared_regs->dummy_phys_allocated = true;
+ set_page_meta_status_not_movable(shared_regs->dummy_phys[0]);
+
+ /* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */
+ for (i = 1; i < nr_dummy_phys; i++)
+ shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0];
+
+ shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL);
+ if (!shared_regs->pma_phys)
+ return -ENOMEM;
+
+ array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL);
+ if (!array_csg_regs)
+ return -ENOMEM;
+ shared_regs->array_csg_regs = array_csg_regs;
+
+ /* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs
+ * are properly populated and ready to use. Now initialize the items in
+ * shared_regs->array_csg_regs[]
+ */
+ for (i = 0; i < nr_csg_regs; i++) {
+ err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]);
+ if (err)
+ return err;
+
+ list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs);
+ }
+
+ return 0;
+}
+
+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
+ struct kbase_csg_shared_region *array_csg_regs =
+ (struct kbase_csg_shared_region *)shared_regs->array_csg_regs;
+ const u32 nr_groups = kbdev->csf.global_iface.group_num;
+ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
+
+ if (array_csg_regs) {
+ struct kbase_csg_shared_region *csg_reg;
+ u32 i, cnt_csg_regs = 0;
+
+ for (i = 0; i < nr_csg_regs; i++) {
+ csg_reg = &array_csg_regs[i];
+ /* There should not be any group mapping bindings */
+ WARN_ONCE(csg_reg->grp, "csg_reg has a bound group");
+
+ if (csg_reg->reg) {
+ shared_mcu_csg_reg_term(kbdev, csg_reg);
+ cnt_csg_regs++;
+ }
+ }
+
+ /* The nr_susp_regs counts should match the array_csg_regs' length */
+ list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link)
+ cnt_csg_regs--;
+
+ WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs");
+ kfree(shared_regs->array_csg_regs);
+ }
+
+ if (shared_regs->dummy_phys_allocated) {
+ struct page *page = as_page(shared_regs->dummy_phys[0]);
+
+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+ }
+
+ kfree(shared_regs->dummy_phys);
+ kfree(shared_regs->pma_phys);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h
new file mode 100644
index 0000000..61943cb
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_MCU_SHARED_REG_H_
+#define _KBASE_CSF_MCU_SHARED_REG_H_
+
+/**
+ * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with
+ * scheduling action. Essential runtime resources
+ * are bound with the group for it to run
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that is placed into active on-slot running by the scheduler.
+ *
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with
+ * scheduling action. Some of bound runtime
+ * resources can be reallocated for others to use
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that is placed off-slot by the scheduler.
+ *
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected
+ * suspend buffer pages to be mapped for supporting
+ * protected mode operations.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping.
+ *
+ * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an
+ * error code is returned.
+ */
+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the
+ * given group is evicted out of the runtime
+ * operations.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that has been evicted out of set of operational groups.
+ *
+ * This function will taken away any of the bindings/mappings immediately so the resources
+ * are not tied up to the given group, which has been evicted out of scheduling action for
+ * termination.
+ */
+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be
+ * run on slot.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @queue: Pointer to the queue that requires some runtime resource to be bound for joining
+ * others that are already running on-slot with their bound group.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
+
+/**
+ * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped
+ * from its operational state from a group.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @queue: Pointer to the queue that has been stopped from operational state.
+ *
+ */
+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
+
+/**
+ * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given
+ * group for ready to run on-slot.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the queue group that requires the runtime resources.
+ *
+ * This function binds/maps the required suspend buffer pages and userio pages for the given
+ * group, readying it to run on-slot.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for
+ * the given device.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function allocate and initialize the MCU shared VA regions for runtime operations
+ * of the CSF scheduler.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for
+ * the given device.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function terminates the MCU shared VA regions allocated for runtime operations
+ * of the CSF scheduler.
+ */
+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index b133efd..82389e5 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -257,6 +257,16 @@
#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
+#ifdef CONFIG_MALI_CORESIGHT
+#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4
+#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT)
+#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5
+#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT)
+#define GLB_DEBUG_ARG_IN0 0x0FE0
+#define GLB_DEBUG_ARG_IN1 0x0FE4
+#define GLB_DEBUG_ARG_OUT0 0x0FE0
+#endif /* CONFIG_MALI_CORESIGHT */
+
/* USER register offsets */
#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
@@ -316,10 +326,17 @@
#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
- (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+ (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
(((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \
(((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
+#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12
+#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
+#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \
+ (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
+#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \
+ (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \
+ (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK))
#define CS_REQ_TILER_OOM_SHIFT 26
#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
@@ -594,6 +611,13 @@
#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \
(((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \
(((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK))
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \
+ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \
+ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \
+ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK))
#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31
#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 282f7e2..755df75 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -32,6 +32,7 @@
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
#include "mali_kbase_csf_tiler_heap_reclaim.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -57,6 +58,9 @@
/* Time to wait for completion of PING req before considering MCU as hung */
#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
+/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
+#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
+
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
static
@@ -1450,6 +1454,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
err = sched_halt_stream(queue);
unassign_user_doorbell_from_queue(kbdev, queue);
+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
}
mutex_unlock(&kbdev->csf.scheduler.lock);
@@ -1567,17 +1572,15 @@ static void program_cs(struct kbase_device *kbdev,
kbase_csf_firmware_cs_input(stream, CS_SIZE,
queue->size);
- user_input = (queue->reg->start_pfn << PAGE_SHIFT);
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
- user_input & 0xFFFFFFFF);
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
- user_input >> 32);
+ user_input = queue->user_io_gpu_va;
+ WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
+
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
- user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
- user_output & 0xFFFFFFFF);
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
- user_output >> 32);
+ user_output = user_input + PAGE_SIZE;
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
kbase_csf_firmware_cs_input(stream, CS_CONFIG,
(queue->doorbell_nr << 8) | (queue->priority & 0xF));
@@ -1608,8 +1611,10 @@ static void program_cs(struct kbase_device *kbdev,
* or protected mode switch.
*/
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK,
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK);
/* Set state to START/STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
@@ -1624,6 +1629,20 @@ static void program_cs(struct kbase_device *kbdev,
update_hw_active(queue, true);
}
+static int onslot_csg_add_new_queue(struct kbase_queue *queue)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
+ if (!err)
+ program_cs(kbdev, queue, true);
+
+ return err;
+}
+
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
@@ -1679,8 +1698,28 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
* user door-bell on such a case.
*/
kbase_csf_ring_cs_user_doorbell(kbdev, queue);
- } else
- program_cs(kbdev, queue, true);
+ } else {
+ err = onslot_csg_add_new_queue(queue);
+ /* For an on slot CSG, the only error in adding a new
+ * queue to run is that the scheduler could not map
+ * the required userio pages due to likely some resource
+ * issues. In such a case, and if the group is yet
+ * to enter its fatal error state, we return a -EBUSY
+ * to the submitter for another kick. The queue itself
+ * has yet to be programmed hence needs to remain its
+ * previous (disabled) state. If the error persists,
+ * the group will eventually reports a fatal error by
+ * the group's error reporting mechanism, when the MCU
+ * shared region map retry limit of the group is
+ * exceeded. For such a case, the expected error value
+ * is -EIO.
+ */
+ if (unlikely(err)) {
+ queue->enabled = cs_enabled;
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+ return (err != -EIO) ? -EBUSY : err;
+ }
+ }
}
queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(kbase_get_timeout_ms(
@@ -1891,9 +1930,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
struct kbase_vmap_struct *mapping;
bool updated = false;
u32 *sync_ptr;
+ u32 sync_wait_size;
+ u32 sync_wait_align_mask;
u32 sync_wait_cond;
u32 sync_current_val;
struct kbase_device *kbdev;
+ bool sync_wait_align_valid = false;
bool sync_wait_cond_valid = false;
if (WARN_ON(!queue))
@@ -1903,6 +1945,16 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
+ sync_wait_align_mask =
+ (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
+ sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
+ if (!sync_wait_align_valid) {
+ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
+ queue->sync_ptr);
+ goto out;
+ }
+
sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
&mapping);
@@ -1987,7 +2039,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
queue, status);
- if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
+ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
queue->status_wait = status;
queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_POINTER_LO);
@@ -2003,7 +2055,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
kbase_csf_firmware_cs_output(stream,
CS_STATUS_BLOCKED_REASON));
- if (!evaluate_sync_update(queue)) {
+ if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
+ !evaluate_sync_update(queue)) {
is_waiting = true;
} else {
/* Sync object already got updated & met the condition
@@ -2434,9 +2487,14 @@ static void save_csg_slot(struct kbase_queue_group *group)
if (!queue || !queue->enabled)
continue;
- if (save_slot_cs(ginfo, queue))
- sync_wait = true;
- else {
+ if (save_slot_cs(ginfo, queue)) {
+ /* sync_wait is only true if the queue is blocked on
+ * a CQS and not a scoreboard.
+ */
+ if (queue->blocked_reason !=
+ CS_STATUS_BLOCKED_ON_SB_WAIT)
+ sync_wait = true;
+ } else {
/* Need to confirm if ringbuffer of the GPU
* queue is empty or not. A race can arise
* between the flush of GPU queue and suspend
@@ -2550,6 +2608,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
kbdev->gpu_props.props.raw_props.gpu_id, slot);
+ /* Notify the group is off-slot and the csg_reg might be available for
+ * resue with other groups in a 'lazy unbinding' style.
+ */
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+
return as_fault;
}
@@ -2633,8 +2696,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
u32 state;
int i;
unsigned long flags;
- const u64 normal_suspend_buf =
- group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
+ u64 normal_suspend_buf;
+ u64 protm_suspend_buf;
struct kbase_csf_csg_slot *csg_slot =
&kbdev->csf.scheduler.csg_slots[slot];
@@ -2646,6 +2709,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
+ if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
+ dev_warn(kbdev->dev,
+ "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
+ group->handle, group->kctx->tgid, kctx->id, slot);
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+ return;
+ }
+
+ /* The suspend buf has already been mapped through binding to csg_reg */
+ normal_suspend_buf = group->normal_suspend_buf.gpu_va;
+ protm_suspend_buf = group->protected_suspend_buf.gpu_va;
+ WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
+
ginfo = &global_iface->groups[slot];
/* Pick an available address space for this context */
@@ -2658,6 +2734,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
group->handle, kctx->tgid, kctx->id, slot);
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
return;
}
@@ -2708,15 +2785,15 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
normal_suspend_buf >> 32);
- if (group->protected_suspend_buf.reg) {
- const u64 protm_suspend_buf =
- group->protected_suspend_buf.reg->start_pfn <<
- PAGE_SHIFT;
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
- protm_suspend_buf & U32_MAX);
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
- protm_suspend_buf >> 32);
- }
+ /* Note, we program the P-mode buffer pointer here, but actual runtime
+ * enter into pmode execution is controlled by the P-mode phy pages are
+ * allocated and mapped with the bound csg_reg, which has a specific flag
+ * for indicating this P-mode runnable condition before a group is
+ * granted its p-mode section entry. Without a P-mode entry, the buffer
+ * pointed is not going to be accessed at all.
+ */
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
if (group->dvs_buf) {
kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
@@ -2769,6 +2846,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
/* Programming a slot consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
+
+ /* Notify the group's bound csg_reg is now in active use */
+ kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
}
static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -2846,6 +2926,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
}
kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
+
+ /* Clear all the bound shared regions and unmap any in-place MMU maps */
+ kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
}
static int term_group_sync(struct kbase_queue_group *group)
@@ -3942,6 +4025,15 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
0u);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ /* Coresight must be disabled before entering protected mode. */
+ kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
kbase_csf_enter_protected_mode(kbdev);
/* Set the pending protm seq number to the next one */
protm_enter_set_next_pending_seq(kbdev);
@@ -6331,7 +6423,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
return -ENOMEM;
}
- return 0;
+ return kbase_csf_mcu_shared_regs_data_init(kbdev);
}
int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
@@ -6431,6 +6523,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
}
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+ /* Terminating the MCU shared regions, following the release of slots */
+ kbase_csf_mcu_shared_regs_data_term(kbdev);
}
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
new file mode 100644
index 0000000..a5e0ab5
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_sync_debugfs.h"
+#include "mali_kbase_csf_csg_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+#include "mali_kbase_sync.h"
+#endif
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
+
+/* GPU queue related values */
+#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
+#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+
+#define MAX_NR_GPU_CALLS (5)
+#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
+#define MOVE_DEST_MASK ((u64)0xFF << 48)
+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
+#define SYNC_SRC0_MASK ((u64)0xFF << 40)
+#define SYNC_SRC1_MASK ((u64)0xFF << 32)
+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
+
+/* Enumeration for types of GPU queue sync events for
+ * the purpose of dumping them through debugfs.
+ */
+enum debugfs_gpu_sync_type {
+ DEBUGFS_GPU_SYNC_WAIT,
+ DEBUGFS_GPU_SYNC_SET,
+ DEBUGFS_GPU_SYNC_ADD,
+ NUM_DEBUGFS_GPU_SYNC_TYPES
+};
+
+/**
+ * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
+ *
+ * @kctx: The context of the queue.
+ * @obj_addr: Pointer to the CQS live 32-bit value.
+ * @live_val: Pointer to the u32 that will be set to the CQS object's current, live
+ * value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr,
+ u32 *live_val)
+{
+ struct kbase_vmap_struct *mapping;
+ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+ if (!cpu_ptr)
+ return -1;
+
+ *live_val = *cpu_ptr;
+ kbase_phy_alloc_mapping_put(kctx, mapping);
+ return 0;
+}
+
+/**
+ * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object.
+ *
+ * @kctx: The context of the queue.
+ * @obj_addr: Pointer to the CQS live value (32 or 64-bit).
+ * @live_val: Pointer to the u64 that will be set to the CQS object's current, live
+ * value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr,
+ u64 *live_val)
+{
+ struct kbase_vmap_struct *mapping;
+ u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+ if (!cpu_ptr)
+ return -1;
+
+ *live_val = *cpu_ptr;
+ kbase_phy_alloc_mapping_put(kctx, mapping);
+ return 0;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait
+ * or Fence Signal command, contained in a
+ * KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
+ */
+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file,
+ struct kbase_kcpu_command *cmd,
+ const char *cmd_name)
+{
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence = NULL;
+#else
+ struct dma_fence *fence = NULL;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+
+ struct kbase_sync_fence_info info;
+ const char *timeline_name = NULL;
+ bool is_signaled = false;
+
+ fence = cmd->info.fence.fence;
+ if (WARN_ON(!fence))
+ return;
+
+ kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
+ timeline_name = fence->ops->get_timeline_name(fence);
+ is_signaled = info.status > 0;
+
+ seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence,
+ is_signaled);
+
+ /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
+ seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+ timeline_name, fence->context, (u64)fence->seqno);
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
+ * contained in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ struct kbase_context *kctx = file->private;
+ size_t i;
+
+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+ struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i];
+
+ u32 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", (u64)live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val);
+ }
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
+ * Set command, contained in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ struct kbase_context *kctx = file->private;
+ size_t i;
+
+ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
+ struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i];
+
+ u32 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", (u64)live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:add arg_value:0x%.8x", 1);
+ }
+}
+
+/**
+ * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op)
+{
+ const char *string;
+
+ switch (op) {
+ case BASEP_CQS_WAIT_OPERATION_LE:
+ string = "le";
+ break;
+ case BASEP_CQS_WAIT_OPERATION_GT:
+ string = "gt";
+ break;
+ default:
+ string = "??";
+ break;
+ }
+ return string;
+}
+
+/**
+ * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op)
+{
+ const char *string;
+
+ switch (op) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ string = "add";
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ string = "set";
+ break;
+ default:
+ string = "???";
+ break;
+ }
+ return string;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS
+ * Wait Operation command, contained
+ * in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ size_t i;
+ struct kbase_context *kctx = file->private;
+
+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+ struct base_cqs_wait_operation_info *wait_op =
+ &cmd->info.cqs_wait_operation.objs[i];
+ const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation);
+
+ u64 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val);
+
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+ }
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS
+ * Set Operation command, contained
+ * in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ size_t i;
+ struct kbase_context *kctx = file->private;
+
+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+ struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
+ const char *op_name = kbasep_csf_sync_get_set_op_name(
+ (basep_cqs_set_operation_op)set_op->operation);
+
+ u64 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val);
+
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+ }
+}
+
+/**
+ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue
+ *
+ * @file: The seq_file to print to.
+ * @queue: Pointer to the KCPU queue.
+ */
+static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
+ struct kbase_kcpu_command_queue *queue)
+{
+ char started_or_pending;
+ struct kbase_kcpu_command *cmd;
+ struct kbase_context *kctx = file->private;
+ size_t i;
+
+ if (WARN_ON(!queue))
+ return;
+
+ lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ mutex_lock(&queue->lock);
+
+ for (i = 0; i != queue->num_pending_cmds; ++i) {
+ started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
+ seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id,
+ started_or_pending);
+
+ cmd = &queue->commands[queue->start_offset + i];
+ switch (cmd->type) {
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL");
+ break;
+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT");
+ break;
+#endif
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
+ kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET:
+ kbasep_csf_sync_print_kcpu_cqs_set(file, cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+ kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+ kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd);
+ break;
+ default:
+ seq_puts(file, ", U, Unknown blocking command");
+ break;
+ }
+
+ seq_puts(file, "\n");
+ }
+
+ mutex_unlock(&queue->lock);
+}
+
+/**
+ * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info
+ *
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file)
+{
+ struct kbase_context *kctx = file->private;
+ unsigned long queue_idx;
+
+ mutex_lock(&kctx->csf.kcpu_queues.lock);
+ seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id);
+
+ queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
+
+ while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
+ kbasep_csf_sync_kcpu_debugfs_print_queue(file,
+ kctx->csf.kcpu_queues.array[queue_idx]);
+
+ queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
+ queue_idx + 1);
+ }
+
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ return 0;
+}
+
+/**
+ * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
+ * from a MOVE instruction.
+ *
+ * @move_cmd: Raw MOVE instruction.
+ * @sync_addr_reg: Register identifier from SYNC_* instruction.
+ * @compare_val_reg: Register identifier from SYNC_* instruction.
+ * @sync_val: Pointer to store CQS object address for sync operation.
+ * @compare_val: Pointer to store compare value for sync operation.
+ *
+ * Return: True if value is obtained by checking for correct register identifier,
+ * or false otherwise.
+ */
+static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg,
+ u64 compare_val_reg, u64 *sync_val,
+ u64 *compare_val)
+{
+ u64 imm_mask;
+
+ /* Verify MOVE instruction and get immediate mask */
+ if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE)
+ imm_mask = MOVE32_IMM_MASK;
+ else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE)
+ imm_mask = MOVE_IMM_MASK;
+ else
+ /* Error return */
+ return false;
+
+ /* Verify value from MOVE instruction and assign to variable */
+ if (sync_addr_reg == MOVE_DEST_GET(move_cmd))
+ *sync_val = move_cmd & imm_mask;
+ else if (compare_val_reg == MOVE_DEST_GET(move_cmd))
+ *compare_val = move_cmd & imm_mask;
+ else
+ /* Error return */
+ return false;
+
+ return true;
+}
+
+/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided
+ * offset.
+ *
+ * @queue: Pointer to the queue.
+ * @ringbuff_offset: Ringbuffer offset.
+ *
+ * Return: the u64 in the ringbuffer at the desired offset.
+ */
+static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset)
+{
+ u64 page_off = ringbuff_offset >> PAGE_SHIFT;
+ u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
+ struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
+ u64 *ringbuffer = kmap_atomic(page);
+ u64 value = ringbuffer[offset_within_page / sizeof(u64)];
+
+ kunmap_atomic(ringbuffer);
+ return value;
+}
+
+/**
+ * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command.
+ *
+ * @file: Pointer to debugfs seq_file file struct for writing output.
+ * @kctx: Pointer to kbase context.
+ * @queue: Pointer to the GPU command queue.
+ * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command.
+ * (Useful for finding preceding MOVE commands)
+ * @sync_cmd: Entire u64 of the sync command, which has both sync address and
+ * comparison-value encoded in it.
+ * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT).
+ * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false).
+ * @follows_wait: Bool to indicate if the operation follows at least one wait
+ * operation. Used to determine whether it's pending or started.
+ */
+static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx,
+ struct kbase_queue *queue, u32 ringbuff_offset,
+ u64 sync_cmd, enum debugfs_gpu_sync_type type,
+ bool is_64bit, bool follows_wait)
+{
+ u64 sync_addr = 0, compare_val = 0, live_val = 0;
+ u64 move_cmd;
+ u8 sync_addr_reg, compare_val_reg, wait_condition = 0;
+ int err;
+
+ static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" };
+ static const char *const gpu_sync_type_op[] = {
+ "wait", /* This should never be printed, only included to simplify indexing */
+ "set", "add"
+ };
+
+ if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) {
+ dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!");
+ return;
+ }
+
+ /* We expect there to be at least 2 preceding MOVE instructions, and
+ * Base will always arrange for the 2 MOVE + SYNC instructions to be
+ * contiguously located, and is therefore never expected to be wrapped
+ * around the ringbuffer boundary.
+ */
+ if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) {
+ dev_warn(kctx->kbdev->dev,
+ "Unexpected wraparound detected between %s & MOVE instruction",
+ gpu_sync_type_name[type]);
+ return;
+ }
+
+ /* 1. Get Register identifiers from SYNC_* instruction */
+ sync_addr_reg = SYNC_SRC0_GET(sync_cmd);
+ compare_val_reg = SYNC_SRC1_GET(sync_cmd);
+
+ /* 2. Get values from first MOVE command */
+ ringbuff_offset -= sizeof(u64);
+ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+ &sync_addr, &compare_val))
+ return;
+
+ /* 3. Get values from next MOVE command */
+ ringbuff_offset -= sizeof(u64);
+ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+ &sync_addr, &compare_val))
+ return;
+
+ /* 4. Get CQS object value */
+ if (is_64bit)
+ err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val);
+ else
+ err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val));
+
+ if (err)
+ return;
+
+ /* 5. Print info */
+ seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle,
+ queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P',
+ gpu_sync_type_name[type]);
+
+ if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
+ seq_puts(file, "slot:-");
+ else
+ seq_printf(file, "slot:%d", (int)queue->group->csg_nr);
+
+ seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
+
+ if (type == DEBUGFS_GPU_SYNC_WAIT) {
+ wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
+ seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
+ } else
+ seq_printf(file, "op:%s ", gpu_sync_type_op[type]);
+
+ seq_printf(file, "arg_value:0x%.16llx\n", compare_val);
+}
+
+/**
+ * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information.
+ *
+ * @file: seq_file for printing to.
+ * @queue: Address of a GPU command queue to examine.
+ *
+ * This function will iterate through each command in the ring buffer of the given GPU queue from
+ * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and
+ * print relevant information to the debugfs file.
+ * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e.
+ * when there are no more commands to view) or a number of consumed GPU CALL commands have
+ * been observed.
+ */
+static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue)
+{
+ struct kbase_context *kctx;
+ u32 *addr;
+ u64 cs_extract, cs_insert, instr, cursor;
+ bool follows_wait = false;
+ int nr_calls = 0;
+
+ if (!queue)
+ return;
+
+ kctx = queue->kctx;
+
+ addr = (u32 *)queue->user_io_addr;
+ cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32);
+
+ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+ cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32);
+
+ cursor = cs_extract;
+
+ if (!is_power_of_2(queue->size)) {
+ dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2",
+ queue->csi_index, queue->size);
+ return;
+ }
+
+ while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) {
+ bool instr_is_64_bit = false;
+ /* Calculate offset into ringbuffer from the absolute cursor,
+ * by finding the remainder of the cursor divided by the
+ * ringbuffer size. The ringbuffer size is guaranteed to be
+ * a power of 2, so the remainder can be calculated without an
+ * explicit modulo. queue->size - 1 is the ringbuffer mask.
+ */
+ u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1));
+
+ /* Find instruction that cursor is currently on */
+ instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset);
+
+ switch (INSTR_OPCODE_GET(instr)) {
+ case GPU_CSF_SYNC_ADD64_OPCODE:
+ case GPU_CSF_SYNC_SET64_OPCODE:
+ case GPU_CSF_SYNC_WAIT64_OPCODE:
+ instr_is_64_bit = true;
+ default:
+ break;
+ }
+
+ switch (INSTR_OPCODE_GET(instr)) {
+ case GPU_CSF_SYNC_ADD_OPCODE:
+ case GPU_CSF_SYNC_ADD64_OPCODE:
+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+ instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit,
+ follows_wait);
+ break;
+ case GPU_CSF_SYNC_SET_OPCODE:
+ case GPU_CSF_SYNC_SET64_OPCODE:
+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+ instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit,
+ follows_wait);
+ break;
+ case GPU_CSF_SYNC_WAIT_OPCODE:
+ case GPU_CSF_SYNC_WAIT64_OPCODE:
+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+ instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit,
+ follows_wait);
+ follows_wait = true; /* Future commands will follow at least one wait */
+ break;
+ case GPU_CSF_CALL_OPCODE:
+ nr_calls++;
+ /* Fallthrough */
+ default:
+ /* Unrecognized command, skip past it */
+ break;
+ }
+
+ cursor += sizeof(u64);
+ }
+}
+
+/**
+ * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
+ * the provided queue group.
+ *
+ * @file: seq_file for printing to.
+ * @group: Address of a GPU command group to iterate through.
+ *
+ * This function will iterate through each queue in the provided GPU queue group and
+ * print its SYNC related commands.
+ */
+static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file,
+ struct kbase_queue_group *const group)
+{
+ struct kbase_context *kctx = file->private;
+ unsigned int i;
+
+ seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
+ group->csg_nr, kctx->tgid, kctx->id);
+
+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
+ kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]);
+}
+
+/**
+ * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info
+ *
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
+{
+ u32 gr;
+ struct kbase_context *kctx = file->private;
+ struct kbase_device *kbdev;
+
+ if (WARN_ON(!kctx))
+ return -EINVAL;
+
+ kbdev = kctx->kbdev;
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_csf_debugfs_update_active_groups_status(kbdev);
+
+ for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) {
+ struct kbase_queue_group *const group =
+ kbdev->csf.scheduler.csg_slots[gr].resident_group;
+ if (!group || group->kctx != kctx)
+ continue;
+ kbasep_csf_dump_active_group_sync_state(file, group);
+ }
+
+ kbase_csf_scheduler_unlock(kbdev);
+ return 0;
+}
+
+/**
+ * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information
+ *
+ * @file: The seq_file for printing to.
+ * @data: The debugfs dentry private data, a pointer to kbase_context.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data)
+{
+ seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION);
+
+ kbasep_csf_sync_kcpu_debugfs_show(file);
+ kbasep_csf_sync_gpu_debugfs_show(file);
+ return 0;
+}
+
+static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file)
+{
+ return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private);
+}
+
+static const struct file_operations kbasep_csf_sync_debugfs_fops = {
+ .open = kbasep_csf_sync_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/**
+ * kbase_csf_sync_debugfs_init() - Initialise debugfs file.
+ *
+ * @kctx: Kernel context pointer.
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
+{
+ struct dentry *file;
+ const mode_t mode = 0444;
+
+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+ return;
+
+ file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx,
+ &kbasep_csf_sync_debugfs_fops);
+
+ if (IS_ERR_OR_NULL(file))
+ dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry");
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
new file mode 100644
index 0000000..177e15d
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_
+#define _KBASE_CSF_SYNC_DEBUGFS_H_
+
+/* Forward declaration */
+struct kbase_context;
+
+#define MALI_CSF_SYNC_DEBUGFS_VERSION 0
+
+/**
+ * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx);
+
+#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */
diff --git a/mali_kbase/debug/Kbuild b/mali_kbase/debug/Kbuild
index 1682c0f..8beee2d 100644
--- a/mali_kbase/debug/Kbuild
+++ b/mali_kbase/debug/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@ mali_kbase-y += debug/mali_kbase_debug_ktrace.o
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o
+ mali_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o
else
mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o
endif
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c b/mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c
new file mode 100644
index 0000000..ff5f947
--- /dev/null
+++ b/mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/slab.h>
+#include <csf/mali_kbase_csf_registers.h>
+#include <csf/mali_kbase_csf_firmware.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
+
+static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state)
+{
+ switch (state) {
+ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
+ return "DISABLED";
+ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
+ return "ENABLED";
+ default:
+ break;
+ }
+
+ return "UNKNOWN";
+}
+
+static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client,
+ struct kbase_device *kbdev, u32 reg_addr, u8 op_type)
+{
+ int i;
+
+ if (reg_addr & 0x3) {
+ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned",
+ op_type, reg_addr);
+ return false;
+ }
+
+ for (i = 0; i < client->nr_ranges; i++) {
+ struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i];
+
+ if ((range->start <= reg_addr) && (reg_addr <= range->end))
+ return true;
+ }
+
+ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type,
+ reg_addr);
+
+ return false;
+}
+
+static bool validate_op(struct kbase_debug_coresight_csf_client *client,
+ struct kbase_debug_coresight_csf_op *op)
+{
+ struct kbase_device *kbdev;
+ u32 reg;
+
+ if (!op)
+ return false;
+
+ if (!client)
+ return false;
+
+ kbdev = (struct kbase_device *)client->drv_data;
+
+ switch (op->type) {
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
+ return true;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
+ if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
+ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
+ reg += sizeof(u32)) {
+ if (!validate_reg_addr(client, kbdev, reg, op->type))
+ return false;
+ }
+
+ return true;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
+ if (!op->op.write.ptr) {
+ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
+ break;
+ }
+
+ if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
+ if (!op->op.read.ptr) {
+ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
+ break;
+ }
+
+ if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
+ if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
+ fallthrough;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
+ fallthrough;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
+ fallthrough;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
+ if (op->op.bitw.ptr != NULL)
+ return true;
+
+ dev_err(kbdev->dev, "Invalid bitwise operation pointer");
+
+ break;
+ default:
+ dev_err(kbdev->dev, "Invalid operation %d", op->type);
+ break;
+ }
+
+ return false;
+}
+
+static bool validate_seq(struct kbase_debug_coresight_csf_client *client,
+ struct kbase_debug_coresight_csf_sequence *seq)
+{
+ struct kbase_debug_coresight_csf_op *ops = seq->ops;
+ int nr_ops = seq->nr_ops;
+ int i;
+
+ for (i = 0; i < nr_ops; i++) {
+ if (!validate_op(client, &ops[i]))
+ return false;
+ }
+
+ return true;
+}
+
+static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op)
+{
+ int result = -EINVAL;
+ u32 reg;
+
+ dev_dbg(kbdev->dev, "Execute operation %d", op->type);
+
+ switch (op->type) {
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
+ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
+ op->op.write_imm.val);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
+ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
+ reg += sizeof(u32)) {
+ result = kbase_csf_firmware_mcu_register_write(kbdev, reg,
+ op->op.write_imm_range.val);
+ if (!result)
+ break;
+ }
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
+ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
+ *op->op.write.ptr);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
+ result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr,
+ op->op.read.ptr);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
+ result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr,
+ op->op.poll.mask, op->op.poll.val);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
+ *op->op.bitw.ptr &= op->op.bitw.val;
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
+ *op->op.bitw.ptr |= op->op.bitw.val;
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
+ *op->op.bitw.ptr ^= op->op.bitw.val;
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
+ *op->op.bitw.ptr = ~(*op->op.bitw.ptr);
+ result = 0;
+ break;
+ default:
+ dev_err(kbdev->dev, "Invalid operation %d", op->type);
+ break;
+ }
+
+ return result;
+}
+
+static int coresight_config_enable(struct kbase_device *kbdev,
+ struct kbase_debug_coresight_csf_config *config)
+{
+ int ret = 0;
+ int i;
+
+ if (!config)
+ return -EINVAL;
+
+ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED)
+ return ret;
+
+ for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++)
+ ret = execute_op(kbdev, &config->enable_seq->ops[i]);
+
+ if (!ret) {
+ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
+ coresight_state_to_string(config->state),
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
+ config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED;
+ }
+
+ /* Always assign the return code during config enable.
+ * It gets propagated when calling config disable.
+ */
+ config->error = ret;
+
+ return ret;
+}
+
+static int coresight_config_disable(struct kbase_device *kbdev,
+ struct kbase_debug_coresight_csf_config *config)
+{
+ int ret = 0;
+ int i;
+
+ if (!config)
+ return -EINVAL;
+
+ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED)
+ return ret;
+
+ for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++)
+ ret = execute_op(kbdev, &config->disable_seq->ops[i]);
+
+ if (!ret) {
+ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
+ coresight_state_to_string(config->state),
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
+ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
+ } else {
+ /* Only assign the error if ret is not 0.
+ * As we don't want to overwrite an error from config enable
+ */
+ if (!config->error)
+ config->error = ret;
+ }
+
+ return ret;
+}
+
+void *kbase_debug_coresight_csf_register(void *drv_data,
+ struct kbase_debug_coresight_csf_address_range *ranges,
+ int nr_ranges)
+{
+ struct kbase_debug_coresight_csf_client *client, *client_entry;
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ int k;
+
+ if (unlikely(!drv_data)) {
+ pr_err("NULL drv_data");
+ return NULL;
+ }
+
+ kbdev = (struct kbase_device *)drv_data;
+
+ if (unlikely(!ranges)) {
+ dev_err(kbdev->dev, "NULL ranges");
+ return NULL;
+ }
+
+ if (unlikely(!nr_ranges)) {
+ dev_err(kbdev->dev, "nr_ranges is 0");
+ return NULL;
+ }
+
+ for (k = 0; k < nr_ranges; k++) {
+ if (ranges[k].end < ranges[k].start) {
+ dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x",
+ ranges[k].start, ranges[k].end);
+ return NULL;
+ }
+ }
+
+ client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL);
+
+ if (!client)
+ return NULL;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) {
+ struct kbase_debug_coresight_csf_address_range *client_ranges =
+ client_entry->addr_ranges;
+ int i;
+
+ for (i = 0; i < client_entry->nr_ranges; i++) {
+ int j;
+
+ for (j = 0; j < nr_ranges; j++) {
+ if ((ranges[j].start < client_ranges[i].end) &&
+ (client_ranges[i].start < ranges[j].end)) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ kfree(client);
+ dev_err(kbdev->dev,
+ "Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x",
+ client_ranges[i].start, client_ranges[i].end,
+ ranges[j].start, ranges[j].end);
+
+ return NULL;
+ }
+ }
+ }
+ }
+
+ client->drv_data = drv_data;
+ client->addr_ranges = ranges;
+ client->nr_ranges = nr_ranges;
+ list_add(&client->link, &kbdev->csf.coresight.clients);
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ return client;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_register);
+
+void kbase_debug_coresight_csf_unregister(void *client_data)
+{
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_debug_coresight_csf_config *config_entry;
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ bool retry = true;
+
+ if (unlikely(!client_data)) {
+ pr_err("NULL client");
+ return;
+ }
+
+ client = (struct kbase_debug_coresight_csf_client *)client_data;
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return;
+ }
+
+ /* check for active config from client */
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_del_init(&client->link);
+
+ while (retry && !list_empty(&kbdev->csf.coresight.configs)) {
+ retry = false;
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (config_entry->client == client) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ kbase_debug_coresight_csf_config_free(config_entry);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ retry = true;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ kfree(client);
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister);
+
+void *
+kbase_debug_coresight_csf_config_create(void *client_data,
+ struct kbase_debug_coresight_csf_sequence *enable_seq,
+ struct kbase_debug_coresight_csf_sequence *disable_seq)
+{
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_debug_coresight_csf_config *config;
+ struct kbase_device *kbdev;
+
+ if (unlikely(!client_data)) {
+ pr_err("NULL client");
+ return NULL;
+ }
+
+ client = (struct kbase_debug_coresight_csf_client *)client_data;
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return NULL;
+ }
+
+ if (enable_seq) {
+ if (!validate_seq(client, enable_seq)) {
+ dev_err(kbdev->dev, "Invalid enable_seq");
+ return NULL;
+ }
+ }
+
+ if (disable_seq) {
+ if (!validate_seq(client, disable_seq)) {
+ dev_err(kbdev->dev, "Invalid disable_seq");
+ return NULL;
+ }
+ }
+
+ config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL);
+ if (WARN_ON(!client))
+ return NULL;
+
+ config->client = client;
+ config->enable_seq = enable_seq;
+ config->disable_seq = disable_seq;
+ config->error = 0;
+ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
+
+ INIT_LIST_HEAD(&config->link);
+
+ return config;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create);
+
+void kbase_debug_coresight_csf_config_free(void *config_data)
+{
+ struct kbase_debug_coresight_csf_config *config;
+
+ if (unlikely(!config_data)) {
+ pr_err("NULL config");
+ return;
+ }
+
+ config = (struct kbase_debug_coresight_csf_config *)config_data;
+
+ kbase_debug_coresight_csf_config_disable(config);
+
+ kfree(config);
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free);
+
+int kbase_debug_coresight_csf_config_enable(void *config_data)
+{
+ struct kbase_debug_coresight_csf_config *config;
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_device *kbdev;
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+ int ret = 0;
+
+ if (unlikely(!config_data)) {
+ pr_err("NULL config");
+ return -EINVAL;
+ }
+
+ config = (struct kbase_debug_coresight_csf_config *)config_data;
+ client = (struct kbase_debug_coresight_csf_client *)config->client;
+
+ if (unlikely(!client)) {
+ pr_err("NULL client in config");
+ return -EINVAL;
+ }
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return -EINVAL;
+ }
+
+ /* Check to prevent double entry of config */
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (config_entry == config) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ dev_err(kbdev->dev, "Config already enabled");
+ return -EINVAL;
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Check the state of Scheduler to confirm the desired state of MCU */
+ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
+ (kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
+ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
+ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ /* Wait for MCU to reach the stable ON state */
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+
+ if (ret)
+ dev_err(kbdev->dev,
+ "Wait for PM state failed when enabling coresight config");
+ else
+ ret = coresight_config_enable(kbdev, config);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ }
+
+ /* Add config to next enable sequence */
+ if (!ret) {
+ spin_lock(&kbdev->csf.coresight.lock);
+ list_add(&config->link, &kbdev->csf.coresight.configs);
+ spin_unlock(&kbdev->csf.coresight.lock);
+ }
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ kbase_csf_scheduler_unlock(kbdev);
+
+ return ret;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable);
+
+int kbase_debug_coresight_csf_config_disable(void *config_data)
+{
+ struct kbase_debug_coresight_csf_config *config;
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_device *kbdev;
+ struct kbase_debug_coresight_csf_config *config_entry;
+ bool found_in_list = false;
+ unsigned long flags;
+ int ret = 0;
+
+ if (unlikely(!config_data)) {
+ pr_err("NULL config");
+ return -EINVAL;
+ }
+
+ config = (struct kbase_debug_coresight_csf_config *)config_data;
+
+ /* Exit early if not enabled prior */
+ if (list_empty(&config->link))
+ return ret;
+
+ client = (struct kbase_debug_coresight_csf_client *)config->client;
+
+ if (unlikely(!client)) {
+ pr_err("NULL client in config");
+ return -EINVAL;
+ }
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return -EINVAL;
+ }
+
+ /* Check if the config is in the correct list */
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (config_entry == config) {
+ found_in_list = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ if (!found_in_list) {
+ dev_err(kbdev->dev, "Config looks corrupted");
+ return -EINVAL;
+ }
+
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Check the state of Scheduler to confirm the desired state of MCU */
+ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
+ (kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
+ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
+ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ /* Wait for MCU to reach the stable ON state */
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+
+ if (ret)
+ dev_err(kbdev->dev,
+ "Wait for PM state failed when disabling coresight config");
+ else
+ ret = coresight_config_disable(kbdev, config);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ } else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) {
+ /* MCU is OFF, so the disable sequence was already executed.
+ *
+ * Propagate any error that would have occurred during the enable
+ * or disable sequence.
+ *
+ * This is done as part of the disable sequence, since the call from
+ * client is synchronous.
+ */
+ ret = config->error;
+ }
+
+ /* Remove config from next disable sequence */
+ spin_lock(&kbdev->csf.coresight.lock);
+ list_del_init(&config->link);
+ spin_unlock(&kbdev->csf.coresight.lock);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ kbase_csf_scheduler_unlock(kbdev);
+
+ return ret;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable);
+
+static void coresight_config_enable_all(struct work_struct *data)
+{
+ struct kbase_device *kbdev =
+ container_of(data, struct kbase_device, csf.coresight.enable_work);
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ if (coresight_config_enable(kbdev, config_entry))
+ dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ wake_up_all(&kbdev->csf.coresight.event_wait);
+}
+
+static void coresight_config_disable_all(struct work_struct *data)
+{
+ struct kbase_device *kbdev =
+ container_of(data, struct kbase_device, csf.coresight.disable_work);
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ if (coresight_config_disable(kbdev, config_entry))
+ dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ wake_up_all(&kbdev->csf.coresight.event_wait);
+}
+
+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter",
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ kbase_pm_lock(kbdev);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ kbdev->csf.coresight.disable_on_pmode_enter = true;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+ kbase_pm_update_state(kbdev);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ kbase_pm_wait_for_desired_state(kbdev);
+
+ kbase_pm_unlock(kbdev);
+}
+
+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit",
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter);
+
+ kbdev->csf.coresight.enable_on_pmode_exit = true;
+ kbase_pm_update_state(kbdev);
+}
+
+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state)
+{
+ if (unlikely(!kbdev))
+ return;
+
+ if (unlikely(!kbdev->csf.coresight.workq))
+ return;
+
+ dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state));
+
+ switch (state) {
+ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
+ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
+ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work);
+ break;
+ default:
+ dev_err(kbdev->dev, "Invalid Coresight state %d", state);
+ break;
+ }
+}
+
+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state)
+{
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+ bool success = true;
+
+ dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state));
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (state != config_entry->state) {
+ success = false;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ return success;
+}
+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check);
+
+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state)
+{
+ const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
+ unsigned long flags;
+ bool success = true;
+
+ dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state));
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
+ link) {
+ const enum kbase_debug_coresight_csf_state prev_state = config_entry->state;
+ long remaining;
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ remaining = wait_event_timeout(kbdev->csf.coresight.event_wait,
+ state == config_entry->state, wait_timeout);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ if (!remaining) {
+ success = false;
+ dev_err(kbdev->dev,
+ "Timeout waiting for Coresight state transition %s to %s",
+ coresight_state_to_string(prev_state),
+ coresight_state_to_string(state));
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ return success;
+}
+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait);
+
+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev)
+{
+ kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0);
+ if (kbdev->csf.coresight.workq == NULL)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&kbdev->csf.coresight.clients);
+ INIT_LIST_HEAD(&kbdev->csf.coresight.configs);
+ INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all);
+ INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all);
+ init_waitqueue_head(&kbdev->csf.coresight.event_wait);
+ spin_lock_init(&kbdev->csf.coresight.lock);
+
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+
+ return 0;
+}
+
+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev)
+{
+ struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry;
+ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
+ unsigned long flags;
+
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+
+ cancel_work_sync(&kbdev->csf.coresight.enable_work);
+ cancel_work_sync(&kbdev->csf.coresight.disable_work);
+ destroy_workqueue(kbdev->csf.coresight.workq);
+ kbdev->csf.coresight.workq = NULL;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
+ link) {
+ list_del_init(&config_entry->link);
+ kfree(config_entry);
+ }
+
+ list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients,
+ link) {
+ list_del_init(&client_entry->link);
+ kfree(client_entry);
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+}
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h
new file mode 100644
index 0000000..06d62dc
--- /dev/null
+++ b/mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
+#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
+
+#include <mali_kbase.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+
+/**
+ * struct kbase_debug_coresight_csf_client - Coresight client definition
+ *
+ * @drv_data: Pointer to driver device data.
+ * @addr_ranges: Arrays of address ranges used by the registered client.
+ * @nr_ranges: Size of @addr_ranges array.
+ * @link: Link item of a Coresight client.
+ * Linked to &struct_kbase_device.csf.coresight.clients.
+ */
+struct kbase_debug_coresight_csf_client {
+ void *drv_data;
+ struct kbase_debug_coresight_csf_address_range *addr_ranges;
+ u32 nr_ranges;
+ struct list_head link;
+};
+
+/**
+ * enum kbase_debug_coresight_csf_state - Coresight configuration states
+ *
+ * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled.
+ * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled.
+ */
+enum kbase_debug_coresight_csf_state {
+ KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0,
+ KBASE_DEBUG_CORESIGHT_CSF_ENABLED,
+};
+
+/**
+ * struct kbase_debug_coresight_csf_config - Coresight configuration definition
+ *
+ * @client: Pointer to the client for which the configuration is created.
+ * @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL.
+ * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL.
+ * @state: Current Coresight configuration state.
+ * @error: Error code used to know if an error occurred during the execution
+ * of the enable or disable sequences.
+ * @link: Link item of a Coresight configuration.
+ * Linked to &struct_kbase_device.csf.coresight.configs.
+ */
+struct kbase_debug_coresight_csf_config {
+ void *client;
+ struct kbase_debug_coresight_csf_sequence *enable_seq;
+ struct kbase_debug_coresight_csf_sequence *disable_seq;
+ enum kbase_debug_coresight_csf_state state;
+ int error;
+ struct list_head link;
+};
+
+/**
+ * struct kbase_debug_coresight_device - Object representing the Coresight device
+ *
+ * @clients: List head to maintain Coresight clients.
+ * @configs: List head to maintain Coresight configs.
+ * @lock: A lock to protect client/config lists.
+ * Lists can be accessed concurrently by
+ * Coresight kernel modules and kernel threads.
+ * @workq: Work queue for Coresight enable/disable execution.
+ * @enable_work: Work item used to enable Coresight.
+ * @disable_work: Work item used to disable Coresight.
+ * @event_wait: Wait queue for Coresight events.
+ * @enable_on_pmode_exit: Flag used by the PM state machine to
+ * identify if Coresight enable is needed.
+ * @disable_on_pmode_enter: Flag used by the PM state machine to
+ * identify if Coresight disable is needed.
+ */
+struct kbase_debug_coresight_device {
+ struct list_head clients;
+ struct list_head configs;
+ spinlock_t lock;
+ struct workqueue_struct *workq;
+ struct work_struct enable_work;
+ struct work_struct disable_work;
+ wait_queue_head_t event_wait;
+ bool enable_on_pmode_exit;
+ bool disable_on_pmode_enter;
+};
+
+/**
+ * kbase_debug_coresight_csf_init - Initialize Coresight resources.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called once at device initialization.
+ *
+ * Return: 0 on success.
+ */
+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_term - Terminate Coresight resources.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called at device termination to prevent any
+ * memory leaks if Coresight module would have been removed without calling
+ * kbasep_debug_coresight_csf_trace_disable().
+ */
+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected
+ * mode enter.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called just before requesting to enter protected mode.
+ * It will trigger a PM state machine transition from MCU_ON
+ * to ON_PMODE_ENTER_CORESIGHT_DISABLE.
+ */
+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected
+ * mode enter.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called after protected mode exit is acknowledged.
+ * It will trigger a PM state machine transition from MCU_ON
+ * to ON_PMODE_EXIT_CORESIGHT_ENABLE.
+ */
+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_state_request - Request Coresight state transition.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @state: Coresight state to check for.
+ */
+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state);
+
+/**
+ * kbase_debug_coresight_csf_state_check - Check Coresight state.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @state: Coresight state to check for.
+ *
+ * Return: true if all states of configs are @state.
+ */
+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state);
+
+/**
+ * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @state: Coresight state to wait for.
+ *
+ * Return: true if all configs become @state in pre-defined time period.
+ */
+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state);
+
+#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index d9ee3fc..217a056 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -29,10 +29,7 @@
#include <mali_kbase_reset_gpu.h>
#include <csf/mali_kbase_csf.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>
-#endif
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -92,13 +89,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_ipa_control_init(kbdev);
@@ -142,9 +139,9 @@ fail_pm_metrics_init:
kbase_ipa_control_term(kbdev);
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
fail_interrupt_test:
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
@@ -283,12 +280,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
}
static const struct kbase_device_init dev_init[] = {
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
-#else
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+ { kbase_gpu_device_create, kbase_gpu_device_destroy,
+ "Dummy model initialization failed" },
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
-#endif
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
@@ -344,6 +342,10 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ { kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term,
+ "Coresight initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
};
static void kbase_device_term_partial(struct kbase_device *kbdev,
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 6e7c64b..b74ed2c 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -24,6 +24,7 @@
#include <backend/gpu/mali_kbase_instr_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <device/mali_kbase_device.h>
+#include <device/mali_kbase_device_internal.h>
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_ctx_sched.h>
@@ -186,7 +187,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
}
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
-static bool kbase_is_register_accessible(u32 offset)
+bool kbase_is_register_accessible(u32 offset)
{
#ifdef CONFIG_MALI_DEBUG
if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) ||
@@ -198,7 +199,9 @@ static bool kbase_is_register_accessible(u32 offset)
return true;
}
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
if (WARN_ON(!kbdev->pm.backend.gpu_powered))
@@ -246,4 +249,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index e6f0197..38223af 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -106,7 +106,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
}
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
WARN_ON(!kbdev->pm.backend.gpu_powered);
@@ -140,4 +140,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 8673588..c104fa4 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -30,10 +30,7 @@
#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>
-#endif /* CONFIG_MALI_NO_MALI */
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include <arbiter/mali_kbase_arbiter_pm.h>
@@ -74,13 +71,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
err = kbase_job_slot_init(kbdev);
@@ -119,9 +116,9 @@ fail_devfreq_init:
fail_job_slot:
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
fail_interrupt_test:
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
@@ -213,12 +210,13 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd
}
static const struct kbase_device_init dev_init[] = {
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
-#else
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+ { kbase_gpu_device_create, kbase_gpu_device_destroy,
+ "Dummy model initialization failed" },
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
-#endif
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h
index d4f6875..de54c83 100644
--- a/mali_kbase/device/mali_kbase_device_internal.h
+++ b/mali_kbase/device/mali_kbase_device_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -89,3 +89,13 @@ int kbase_device_late_init(struct kbase_device *kbdev);
* @kbdev: Device pointer
*/
void kbase_device_late_term(struct kbase_device *kbdev);
+
+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/**
+ * kbase_is_register_accessible - Checks if register is accessible
+ * @offset: Register offset
+ *
+ * Return: true if the register is accessible, false otherwise.
+ */
+bool kbase_is_register_accessible(u32 offset);
+#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index a2ecd08..124fd4c 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -34,13 +34,11 @@
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
#include "mali_kbase_hwaccess_time.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <linux/log2.h>
#include "mali_kbase_ccswe.h"
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
/* Ring buffer virtual address start at 4GB */
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
@@ -103,6 +101,8 @@ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_i
static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags)
+ __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
+ ctx->kbdev->csf.scheduler.interrupt_lock)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -117,6 +117,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_i
static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long flags)
+ __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
+ ctx->kbdev->csf.scheduler.interrupt_lock)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 6ddd7ba..8b3caac 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -27,10 +27,7 @@
#include "mali_kbase_hwaccess_instr.h"
#include "mali_kbase_hwaccess_time.h"
#include "mali_kbase_ccswe.h"
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include "backend/gpu/mali_kbase_model_dummy.h"
-#endif /* CONFIG_MALI_NO_MALI */
+#include "backend/gpu/mali_kbase_model_linux.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index 2324c38..cd5a9bf 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -23,10 +23,7 @@
#include "mali_kbase_ipa_counter_common_jm.h"
#include "mali_kbase.h"
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
+#include <backend/gpu/mali_kbase_model_linux.h>
/* Performance counter blocks base offsets */
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index 8b8bbd1..0e8abb1 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -84,11 +84,11 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
static struct device_node *get_model_dt_node(struct kbase_ipa_model *model,
bool dt_required)
{
- struct device_node *model_dt_node;
+ struct device_node *model_dt_node = NULL;
char compat_string[64];
- snprintf(compat_string, sizeof(compat_string), "arm,%s",
- model->ops->name);
+ if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name)))
+ return NULL;
/* of_find_compatible_node() will call of_node_put() on the root node,
* so take a reference on it first.
@@ -111,12 +111,12 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
const char *name, s32 *addr,
size_t num_elems, bool dt_required)
{
- int err, i;
+ int err = -EINVAL, i;
struct device_node *model_dt_node = get_model_dt_node(model,
dt_required);
char *origin;
- err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+ err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems);
/* We're done with model_dt_node now, so drop the reference taken in
* get_model_dt_node()/of_find_compatible_node().
*/
@@ -138,11 +138,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
for (i = 0; i < num_elems; ++i) {
char elem_name[32];
- if (num_elems == 1)
- snprintf(elem_name, sizeof(elem_name), "%s", name);
- else
- snprintf(elem_name, sizeof(elem_name), "%s.%d",
- name, i);
+ if (num_elems == 1) {
+ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ } else {
+ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ }
dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
model->ops->name, elem_name, addr[i], origin);
@@ -164,7 +170,7 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
int err;
struct device_node *model_dt_node = get_model_dt_node(model,
dt_required);
- const char *string_prop_value;
+ const char *string_prop_value = "";
char *origin;
err = of_property_read_string(model_dt_node, name,
diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c
index aaea77a..0fd2136 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_simple.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c
@@ -231,14 +231,12 @@ static int add_params(struct kbase_ipa_model *model)
(struct kbase_ipa_model_simple_data *)model->model_data;
err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
- &model_data->static_coefficient,
- 1, true);
+ (s32 *)&model_data->static_coefficient, 1, true);
if (err)
goto end;
err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
- &model_data->dynamic_coefficient,
- 1, true);
+ (s32 *)&model_data->dynamic_coefficient, 1, true);
if (err)
goto end;
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index e431698..fe8995a 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -578,7 +578,7 @@ struct kbase_jd_atom {
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
int work_id;
#endif
- int slot_nr;
+ unsigned int slot_nr;
u32 atom_flags;
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
index d03bcc0..53819ca 100644
--- a/mali_kbase/jm/mali_kbase_jm_js.h
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -132,15 +132,15 @@ void kbasep_js_kctx_term(struct kbase_context *kctx);
* Atoms of higher priority might still be able to be pulled from the context
* on @js. This helps with starting a high priority atom as soon as possible.
*/
-static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx,
- int js, int sched_prio)
+static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js,
+ int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
WARN(!slot_tracking->atoms_pulled_pri[sched_prio],
- "When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
+ "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
js, sched_prio);
slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio;
@@ -510,19 +510,6 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
struct kbase_jd_atom *katom);
/**
- * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
- * @kctx: Context Pointer
- * @prio: Priority (specifies the queue together with js).
- * @js: Job slot (specifies the queue together with prio).
- *
- * Pushes all possible atoms from the linked list to the ringbuffer.
- * Number of atoms are limited to free space in the ringbuffer and
- * number of available atoms in the linked list.
- *
- */
-void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
-
-/**
* kbase_js_pull - Pull an atom from a context in the job scheduler for
* execution.
*
@@ -536,7 +523,7 @@ void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
* Return: a pointer to an atom, or NULL if there are no atoms for this
* slot that can be currently run.
*/
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js);
/**
* kbase_js_unpull - Return an atom to the job scheduler ringbuffer.
@@ -617,7 +604,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
* been used.
*
*/
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask);
/**
* kbase_js_zap_context - Attempt to deschedule a context that is being
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index ea143ab..c6fea79 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -131,16 +131,6 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[
BASE_HW_FEATURE_END
};
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = {
- BASE_HW_FEATURE_FLUSH_REDUCTION,
- BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_IDVS_GROUP_SIZE,
- BASE_HW_FEATURE_L2_CONFIG,
- BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
- BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
- BASE_HW_FEATURE_END
-};
-
__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 35c3828..2dc0402 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -64,6 +64,9 @@ enum base_hw_issue {
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -88,6 +91,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -108,6 +113,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -128,6 +135,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -143,6 +152,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI
BASE_HW_ISSUE_TMIX_8343,
BASE_HW_ISSUE_TMIX_8456,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -156,6 +167,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -169,6 +182,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -182,6 +197,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -194,6 +211,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -204,6 +223,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHE
BASE_HW_ISSUE_TMIX_8042,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -217,6 +238,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -230,6 +253,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -242,6 +267,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -253,6 +280,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -263,6 +292,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSI
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -274,6 +305,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -284,6 +317,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDV
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -296,6 +331,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -306,6 +343,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNO
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -318,6 +357,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -330,6 +371,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -340,6 +383,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGO
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -356,6 +401,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -372,6 +419,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -387,6 +436,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -399,6 +450,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTR
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -415,6 +468,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -430,6 +485,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -442,6 +499,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNA
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -456,6 +515,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -469,6 +530,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -482,6 +545,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -495,6 +560,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -507,6 +574,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBE
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -521,6 +590,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -534,6 +605,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -547,6 +620,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -560,6 +635,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -572,26 +649,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBA
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
- BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
- BASE_HW_ISSUE_9435,
- BASE_HW_ISSUE_TSIX_2033,
- BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_921,
- BASE_HW_ISSUE_TTRX_3414,
- BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
- BASE_HW_ISSUE_5736,
- BASE_HW_ISSUE_9435,
- BASE_HW_ISSUE_TSIX_2033,
- BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3414,
- BASE_HW_ISSUE_TTRX_3083,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -601,6 +660,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -610,6 +671,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -618,6 +681,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -626,6 +691,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -634,6 +701,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -642,6 +711,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -653,6 +724,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -664,6 +738,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -674,6 +751,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -684,6 +764,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -694,6 +777,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -704,6 +790,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -712,6 +801,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTI
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -720,6 +812,9 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 9e53c0a..0e63821 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -332,21 +332,8 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
-void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
- struct kbase_jd_atom *target_katom, u32 sw_flags);
-
-/**
- * kbase_job_slot_hardstop - Hard-stop the specified job slot
- * @kctx: The kbase context that contains the job(s) that should
- * be hard-stopped
- * @js: The job slot to hard-stop
- * @target_katom: The job that should be hard-stopped (or NULL for all
- * jobs from the context)
- * Context:
- * The job slot lock must be held when calling this function.
- */
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
- struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_jd_atom *target_katom, u32 sw_flags);
/**
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
@@ -441,19 +428,6 @@ static inline void kbase_free_user_buffer(
}
}
-/**
- * kbase_mem_copy_from_extres() - Copy from external resources.
- *
- * @kctx: kbase context within which the copying is to take place.
- * @buf_data: Pointer to the information about external resources:
- * pages pertaining to the external resource, number of
- * pages to copy.
- *
- * Return: 0 on success, error code otherwise.
- */
-int kbase_mem_copy_from_extres(struct kbase_context *kctx,
- struct kbase_debug_copy_buffer *buf_data);
-
#if !MALI_USE_CSF
int kbase_process_soft_job(struct kbase_jd_atom *katom);
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
diff --git a/mali_kbase/mali_kbase_as_fault_debugfs.c b/mali_kbase/mali_kbase_as_fault_debugfs.c
index 77f450d..ad33691 100644
--- a/mali_kbase/mali_kbase_as_fault_debugfs.c
+++ b/mali_kbase/mali_kbase_as_fault_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -98,11 +98,9 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
"unable to create address_spaces debugfs directory");
} else {
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
- snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
- debugfs_create_file(as_name, 0444,
- debugfs_directory,
- (void *)(uintptr_t)i,
- &as_fault_fops);
+ if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i)))
+ debugfs_create_file(as_name, 0444, debugfs_directory,
+ (void *)(uintptr_t)i, &as_fault_fops);
}
}
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 9c867d1..a8f8791 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -31,10 +31,7 @@
#include <ipa/mali_kbase_ipa_debugfs.h>
#endif /* CONFIG_DEVFREQ_THERMAL */
#endif /* CONFIG_MALI_DEVFREQ */
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include "backend/gpu/mali_kbase_model_linux.h"
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
#include "uapi/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_mem_pool_debugfs.h"
@@ -624,7 +621,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
#if IS_ENABLED(CONFIG_DEBUG_FS)
- snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+ if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id)))
+ return -ENOMEM;
mutex_init(&kctx->mem_profile_lock);
@@ -1461,6 +1459,9 @@ static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx,
static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
union kbase_ioctl_cs_tiler_heap_init *heap_init)
{
+ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
+ return -EINVAL;
+
kctx->jit_group_id = heap_init->in.group_id;
return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -1473,6 +1474,9 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx,
union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init)
{
+ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
+ return -EINVAL;
+
kctx->jit_group_id = heap_init->in.group_id;
return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -4272,7 +4276,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
kfree(kbdev->protected_dev);
}
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
return 0;
@@ -4280,7 +4284,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
{
}
-#else /* CONFIG_MALI_NO_MALI */
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
int err = 0;
@@ -4316,7 +4320,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
kbdev->reg_size = 0;
}
}
-#endif /* CONFIG_MALI_NO_MALI */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
int registers_map(struct kbase_device * const kbdev)
{
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index 3e58500..beb2928 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -119,7 +119,7 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
if (atomic_inc_return(&kctx->refcount) == 1) {
int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
- if (free_as != KBASEP_AS_NR_INVALID) {
+ if (free_as >= 0) {
kbdev->as_free &= ~(1u << free_as);
/* Only program the MMU if the context has not been
* assigned the same address space before.
@@ -173,8 +173,10 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
*/
WARN_ON(!atomic_read(&kctx->refcount));
#endif
- WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
- WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)))
+ WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+ else
+ WARN(true, "Invalid as_nr(%d)", kctx->as_nr);
atomic_inc(&kctx->refcount);
}
@@ -188,16 +190,17 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
new_ref_count = atomic_dec_return(&kctx->refcount);
if (new_ref_count == 0) {
- kbdev->as_free |= (1u << kctx->as_nr);
- if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
- KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(
- kbdev, kctx->id);
- kbdev->as_to_kctx[kctx->as_nr] = NULL;
- kctx->as_nr = KBASEP_AS_NR_INVALID;
- kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
+ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) {
+ kbdev->as_free |= (1u << kctx->as_nr);
+ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
+ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id);
+ kbdev->as_to_kctx[kctx->as_nr] = NULL;
+ kctx->as_nr = KBASEP_AS_NR_INVALID;
+ kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
#if !MALI_USE_CSF
- kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
#endif
+ }
}
}
@@ -214,7 +217,7 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
WARN_ON(atomic_read(&kctx->refcount) != 0);
- if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+ if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) {
if (kbdev->pm.backend.gpu_powered)
kbase_mmu_disable(kctx);
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 722ffc7..e98ab45 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -1026,7 +1026,7 @@ struct kbase_device {
char devname[DEVNAME_SIZE];
u32 id;
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
void *model;
struct kmem_cache *irq_slab;
struct workqueue_struct *irq_workq;
@@ -1034,7 +1034,7 @@ struct kbase_device {
atomic_t serving_gpu_irq;
atomic_t serving_mmu_irq;
spinlock_t reg_op_lock;
-#endif /* CONFIG_MALI_NO_MALI */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
struct kbase_pm_device_data pm;
struct kbase_mem_pool_group mem_pools;
diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c
index dd0b63e..25b4c9c 100644
--- a/mali_kbase/mali_kbase_fence_ops.c
+++ b/mali_kbase/mali_kbase_fence_ops.c
@@ -68,10 +68,12 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
#endif
{
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
- snprintf(str, size, "%u", fence->seqno);
+ const char *format = "%u";
#else
- snprintf(str, size, "%llu", fence->seqno);
+ const char *format = "%llu";
#endif
+ if (unlikely(!scnprintf(str, size, format, fence->seqno)))
+ pr_err("Fail to encode fence seqno to string");
}
#if MALI_USE_CSF
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index ad72f06..afbba3d 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -311,7 +311,6 @@ static void kbase_gpuprops_calculate_props(
struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
{
int i;
- u32 gpu_id;
/* Populate the base_gpu_props structure */
kbase_gpuprops_update_core_props_gpu_id(gpu_props);
@@ -361,49 +360,23 @@ static void kbase_gpuprops_calculate_props(
gpu_props->thread_props.tls_alloc =
gpu_props->raw_props.thread_tls_alloc;
- /* MIDHARC-2364 was intended for tULx.
- * Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
- */
- gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-
#if MALI_USE_CSF
- CSTD_UNUSED(gpu_id);
gpu_props->thread_props.max_registers =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 0U, 22);
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
gpu_props->thread_props.impl_tech =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 22U, 2);
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
gpu_props->thread_props.max_task_queue =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 24U, 8);
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
gpu_props->thread_props.max_thread_group_split = 0;
#else
- if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) {
- gpu_props->thread_props.max_registers =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 0U, 22);
- gpu_props->thread_props.impl_tech =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 22U, 2);
- gpu_props->thread_props.max_task_queue =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 24U, 8);
- gpu_props->thread_props.max_thread_group_split = 0;
- } else {
- gpu_props->thread_props.max_registers =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 0U, 16);
- gpu_props->thread_props.max_task_queue =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 16U, 8);
- gpu_props->thread_props.max_thread_group_split =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 24U, 6);
- gpu_props->thread_props.impl_tech =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 30U, 2);
- }
+ gpu_props->thread_props.max_registers =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+ gpu_props->thread_props.max_task_queue =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+ gpu_props->thread_props.max_thread_group_split =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+ gpu_props->thread_props.impl_tech =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
#endif
/* If values are not specified, then use defaults */
@@ -539,7 +512,7 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
static u32 l2_hash_values[ASN_HASH_COUNT] = {
0,
};
-static int num_override_l2_hash_values;
+static unsigned int num_override_l2_hash_values;
module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
@@ -593,7 +566,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
kbdev->l2_hash_values_override = false;
if (num_override_l2_hash_values) {
- int i;
+ unsigned int i;
kbdev->l2_hash_values_override = true;
for (i = 0; i < num_override_l2_hash_values; i++)
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index bb079c2..c658fb7 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TBAX:
features = base_hw_features_tBAx;
break;
- case GPU_ID2_PRODUCT_TDUX:
- features = base_hw_features_tDUx;
- break;
case GPU_ID2_PRODUCT_TODX:
case GPU_ID2_PRODUCT_LODX:
features = base_hw_features_tODx;
@@ -211,10 +208,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 },
{ U32_MAX, NULL } } },
- { GPU_ID2_PRODUCT_TDUX,
- { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 },
- { U32_MAX, NULL } } },
-
{ GPU_ID2_PRODUCT_TODX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
{ GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 },
@@ -394,9 +387,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TBAX:
issues = base_hw_issues_model_tBAx;
break;
- case GPU_ID2_PRODUCT_TDUX:
- issues = base_hw_issues_model_tDUx;
- break;
case GPU_ID2_PRODUCT_TODX:
case GPU_ID2_PRODUCT_LODX:
issues = base_hw_issues_model_tODx;
@@ -415,7 +405,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_LTIX:
issues = base_hw_issues_model_tTIx;
break;
-
default:
dev_err(kbdev->dev,
"Unknown GPU ID %x", gpu_id);
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index 124a6d6..ca77c19 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -97,8 +97,8 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
* Return: true if context is now active, false otherwise (ie if context does
* not have an address space assigned)
*/
-bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
- struct kbase_context *kctx, int js);
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js);
/**
* kbase_backend_release_ctx_irq - Release a context from the GPU. This will
@@ -183,8 +183,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
*
* Return: Atom currently at the head of slot @js, or NULL
*/
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
- int js);
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
@@ -194,7 +193,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
*
* Return: Number of atoms currently on slot
*/
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
@@ -204,7 +203,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
*
* Return: Number of atoms currently on slot @js that are currently on the GPU.
*/
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
@@ -233,7 +232,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
*
* Return: Number of jobs that can be submitted.
*/
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_job_check_leave_disjoint - potentially leave disjoint state
@@ -287,8 +286,8 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
* Context:
* The job slot lock must be held when calling this function.
*/
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
- struct kbase_jd_atom *target_katom);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
+ struct kbase_jd_atom *target_katom);
/**
* kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 722cf1c..bb71caa 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -28,6 +28,11 @@
#include <linux/version.h>
#include <linux/ratelimit.h>
#include <linux/priority_control_manager.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
#include <mali_kbase_jm.h>
#include <mali_kbase_kinstr_jm.h>
@@ -1074,11 +1079,19 @@ int kbase_jd_submit(struct kbase_context *kctx,
return -EINVAL;
}
+ if (nr_atoms > BASE_JD_ATOM_COUNT) {
+ dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d",
+ nr_atoms, kctx->tgid, kctx->id);
+ return -EINVAL;
+ }
+
/* All atoms submitted in this call have the same flush ID */
latest_flush = kbase_backend_get_current_flush_id(kbdev);
for (i = 0; i < nr_atoms; i++) {
- struct base_jd_atom user_atom;
+ struct base_jd_atom user_atom = {
+ .seq_nr = 0,
+ };
struct base_jd_fragment user_jc_incr;
struct kbase_jd_atom *katom;
@@ -1202,6 +1215,12 @@ while (false)
kbase_disjoint_event_potential(kbdev);
mutex_unlock(&jctx->lock);
+ if (fatal_signal_pending(current)) {
+ dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d",
+ kctx->tgid, kctx->id);
+ /* We're being killed so the result code doesn't really matter */
+ return 0;
+ }
}
if (need_to_try_schedule_context)
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index 6cbd6f1..1ac5cd3 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,15 +37,13 @@
*
* Return: true if slot can still be submitted on, false if slot is now full.
*/
-static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
- int nr_jobs_to_submit)
+static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit)
{
struct kbase_context *kctx;
int i;
kctx = kbdev->hwaccess.active_kctx[js];
- dev_dbg(kbdev->dev,
- "Trying to run the next %d jobs in kctx %pK (s:%d)\n",
+ dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n",
nr_jobs_to_submit, (void *)kctx, js);
if (!kctx)
@@ -60,7 +58,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
kbase_backend_run_atom(kbdev, katom);
}
- dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js);
+ dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js);
return false;
}
@@ -72,7 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask);
while (js_mask) {
- int js = ffs(js_mask) - 1;
+ unsigned int js = ffs(js_mask) - 1;
int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
@@ -111,14 +109,14 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == kctx) {
- dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx,
+ js);
kbdev->hwaccess.active_kctx[js] = NULL;
}
}
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 491bc06..d623aca 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -77,8 +77,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
struct kbase_device *kbdev, struct kbase_context *kctx,
struct kbasep_js_atom_retained_state *katom_retained_state);
-static int kbase_js_get_slot(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom);
+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
kbasep_js_ctx_job_cb *callback);
@@ -151,8 +150,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev)
*
* Return: true if there are no atoms to pull, false otherwise.
*/
-static inline bool
-jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio)
{
bool none_to_pull;
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
@@ -161,9 +159,8 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree);
- dev_dbg(kctx->kbdev->dev,
- "Slot %d (prio %d) is %spullable in kctx %pK\n",
- js, prio, none_to_pull ? "not " : "", kctx);
+ dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio,
+ none_to_pull ? "not " : "", kctx);
return none_to_pull;
}
@@ -179,8 +176,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
* Return: true if the ring buffers for all priorities have no pullable atoms,
* false otherwise.
*/
-static inline bool
-jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js)
{
int prio;
@@ -212,8 +208,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
*
* The HW access lock must always be held when calling this function.
*/
-static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js,
- int prio, kbasep_js_ctx_job_cb *callback)
+static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio,
+ kbasep_js_ctx_job_cb *callback)
{
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
@@ -272,7 +268,7 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js,
* jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
* for each entry, and remove the entry from the queue.
*/
-static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js,
+static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js,
kbasep_js_ctx_job_cb *callback)
{
int prio;
@@ -293,15 +289,14 @@ static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js,
*
* Return: Pointer to next atom in buffer, or NULL if there is no atom.
*/
-static inline struct kbase_jd_atom *
-jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js,
+ int prio)
{
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
struct rb_node *node;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- dev_dbg(kctx->kbdev->dev,
- "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n",
+ dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n",
(void *)kctx, prio, js);
node = rb_first(&rb->runnable_tree);
@@ -326,8 +321,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
*
* Return: Pointer to next atom in buffer, or NULL if there is no atom.
*/
-static inline struct kbase_jd_atom *
-jsctx_rb_peek(struct kbase_context *kctx, int js)
+static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js)
{
int prio;
@@ -358,7 +352,7 @@ static inline void
jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
int prio = katom->sched_priority;
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -377,14 +371,14 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
struct kbase_device *kbdev = kctx->kbdev;
int prio = katom->sched_priority;
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n",
- (void *)katom, (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom,
+ (void *)kctx, js);
while (*new) {
struct kbase_jd_atom *entry = container_of(*new,
@@ -425,15 +419,11 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
jsctx_tree_add(kctx, katom);
}
-static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
- int js,
- bool is_scheduled);
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled);
static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js);
+ struct kbase_context *kctx, unsigned int js);
static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js);
+ struct kbase_context *kctx, unsigned int js);
typedef bool(katom_ordering_func)(const struct kbase_jd_atom *,
const struct kbase_jd_atom *);
@@ -685,7 +675,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
{
struct kbase_device *kbdev;
struct kbasep_js_kctx_info *js_kctx_info;
- int js;
+ unsigned int js;
bool update_ctx_count = false;
unsigned long flags;
CSTD_UNUSED(js_kctx_info);
@@ -733,8 +723,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
*/
/* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */
-static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
- int js, int sched_prio)
+static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js,
+ int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
@@ -746,7 +736,7 @@ static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
NULL, 0, js, (unsigned int)sched_prio);
}
-static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
+static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js)
{
return atomic_read(&kctx->slot_tracking[js].atoms_pulled);
}
@@ -756,7 +746,7 @@ static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
* - that priority level is blocked
* - or, any higher priority level is blocked
*/
-static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
+static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js,
int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
@@ -796,7 +786,7 @@ static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
const struct kbase_jd_atom *katom)
{
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
int sched_prio = katom->sched_priority;
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
@@ -805,7 +795,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio),
- "Should not have pulled atoms for slot %d from a context that is blocked at priority %d or higher",
+ "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher",
js, sched_prio);
nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots);
@@ -834,7 +824,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
const struct kbase_jd_atom *katom)
{
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
int sched_prio = katom->sched_priority;
int atoms_pulled_pri;
struct kbase_jsctx_slot_tracking *slot_tracking =
@@ -883,14 +873,12 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+ struct kbase_context *kctx, unsigned int js)
{
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -925,14 +913,13 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
*
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
-static bool kbase_js_ctx_list_add_pullable_head_nolock(
- struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx, unsigned int js)
{
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -970,8 +957,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+ struct kbase_context *kctx, unsigned int js)
{
bool ret;
unsigned long flags;
@@ -1001,14 +987,12 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+ struct kbase_context *kctx, unsigned int js)
{
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js);
list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
@@ -1043,9 +1027,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
*
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
-static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
bool ret = false;
@@ -1081,9 +1064,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
* Return: Context to use for specified slot.
* NULL if no contexts present for specified slot
*/
-static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
- struct kbase_device *kbdev,
- int js)
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev,
+ unsigned int js)
{
struct kbase_context *kctx;
int i;
@@ -1099,9 +1081,8 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
jctx.sched_info.ctx.ctx_list_entry[js]);
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
- dev_dbg(kbdev->dev,
- "Popped %pK from the pullable queue (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx,
+ js);
return kctx;
}
return NULL;
@@ -1116,8 +1097,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
* Return: Context to use for specified slot.
* NULL if no contexts present for specified slot
*/
-static struct kbase_context *kbase_js_ctx_list_pop_head(
- struct kbase_device *kbdev, int js)
+static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js)
{
struct kbase_context *kctx;
unsigned long flags;
@@ -1141,8 +1121,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head(
* Return: true if context can be pulled from on specified slot
* false otherwise
*/
-static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
- bool is_scheduled)
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled)
{
struct kbasep_js_device_data *js_devdata;
struct kbase_jd_atom *katom;
@@ -1161,8 +1140,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
}
katom = jsctx_rb_peek(kctx, js);
if (!katom) {
- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js);
return false; /* No pullable atoms */
}
if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
@@ -1170,7 +1148,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom,
katom->jc, js, (unsigned int)katom->sched_priority);
dev_dbg(kbdev->dev,
- "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n",
(void *)kctx, katom->sched_priority, js);
return false;
}
@@ -1191,14 +1169,14 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) {
dev_dbg(kbdev->dev,
- "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n",
(void *)katom, js);
return false;
}
}
- dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n",
- (void *)katom, (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom,
+ (void *)kctx, js);
return true;
}
@@ -1209,7 +1187,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
struct kbase_device *kbdev = kctx->kbdev;
bool ret = true;
bool has_dep = false, has_x_dep = false;
- int js = kbase_js_get_slot(kbdev, katom);
+ unsigned int js = kbase_js_get_slot(kbdev, katom);
int prio = katom->sched_priority;
int i;
@@ -1217,7 +1195,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
if (dep_atom) {
- int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+ unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom);
int dep_prio = dep_atom->sched_priority;
dev_dbg(kbdev->dev,
@@ -1372,7 +1350,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
{
struct kbase_device *kbdev = kctx->kbdev;
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -2078,9 +2056,8 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev)
kbase_backend_timeouts_changed(kbdev);
}
-static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
@@ -2088,7 +2065,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
bool kctx_suspended = false;
int as_nr;
- dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js);
+ dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js);
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -2115,8 +2092,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
}
}
- if (as_nr == KBASEP_AS_NR_INVALID)
- return false; /* No address spaces currently available */
+ if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS))
+ return false; /* No address space currently available */
/*
* Atomic transaction on the Context and Run Pool begins
@@ -2223,9 +2200,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
return true;
}
-static bool kbase_js_use_ctx(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
unsigned long flags;
@@ -2233,9 +2209,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
-
- dev_dbg(kbdev->dev,
- "kctx %pK already has ASID - mark as active (s:%d)\n",
+ dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n",
(void *)kctx, js);
if (kbdev->hwaccess.active_kctx[js] != kctx) {
@@ -2502,8 +2476,7 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
return true;
}
-static int kbase_js_get_slot(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom)
+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
{
if (katom->core_req & BASE_JD_REQ_JOB_SLOT)
return katom->jobslot;
@@ -2542,11 +2515,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
(katom->pre_dep && (katom->pre_dep->atom_flags &
KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
int prio = katom->sched_priority;
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
- dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n",
- (void *)katom, js);
+ dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js);
list_add_tail(&katom->queue, &queue->x_dep_head);
katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
@@ -2637,8 +2609,8 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
*
* Context: Caller must hold the HW access lock
*/
-static void kbase_js_evict_deps(struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js, int prio)
+static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom,
+ unsigned int js, int prio)
{
struct kbase_jd_atom *x_dep = katom->x_post_dep;
struct kbase_jd_atom *next_katom = katom->post_dep;
@@ -2670,7 +2642,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
}
}
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js)
{
struct kbase_jd_atom *katom;
struct kbasep_js_device_data *js_devdata;
@@ -2680,8 +2652,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
KBASE_DEBUG_ASSERT(kctx);
kbdev = kctx->kbdev;
- dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js);
js_devdata = &kbdev->js_data;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -2700,13 +2671,12 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom = jsctx_rb_peek(kctx, js);
if (!katom) {
- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js);
return NULL;
}
if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
dev_dbg(kbdev->dev,
- "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n",
(void *)kctx, katom->sched_priority, js);
return NULL;
}
@@ -2740,7 +2710,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
kbase_backend_nr_atoms_on_slot(kbdev, js)) {
dev_dbg(kbdev->dev,
- "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n",
(void *)katom, js);
return NULL;
}
@@ -2763,7 +2733,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom->ticks = 0;
- dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n",
(void *)katom, (void *)kctx, js);
return katom;
@@ -3366,7 +3336,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
!kbase_jsctx_atoms_pulled(kctx) &&
!kbase_ctx_flag(kctx, KCTX_DYING)) {
- int js;
+ unsigned int js;
kbasep_js_set_submit_allowed(js_devdata, kctx);
@@ -3378,7 +3348,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
}
} else if (katom->x_post_dep &&
kbasep_js_is_submit_allowed(js_devdata, kctx)) {
- int js;
+ unsigned int js;
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
if (kbase_js_ctx_pullable(kctx, js, true))
@@ -3603,13 +3573,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
return false;
}
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask)
{
struct kbasep_js_device_data *js_devdata;
struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
bool timer_sync = false;
bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
- int js;
+ unsigned int js;
KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0);
@@ -3638,24 +3608,20 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
if (!kctx) {
js_mask &= ~(1 << js);
- dev_dbg(kbdev->dev,
- "No kctx on pullable list (s:%d)\n",
- js);
+ dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js);
break;
}
if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
context_idle = true;
- dev_dbg(kbdev->dev,
- "kctx %pK is not active (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx,
+ js);
if (kbase_pm_context_active_handle_suspend(
kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
- dev_dbg(kbdev->dev,
- "Suspend pending (s:%d)\n", js);
+ dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js);
/* Suspend pending - return context to
* queue and stop scheduling
*/
@@ -3713,16 +3679,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
kbase_ctx_flag_clear(kctx, KCTX_PULLED);
if (!kbase_jm_kick(kbdev, 1 << js)) {
- dev_dbg(kbdev->dev,
- "No more jobs can be submitted (s:%d)\n",
- js);
+ dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js);
js_mask &= ~(1 << js);
}
if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
bool pullable;
- dev_dbg(kbdev->dev,
- "No atoms pulled from kctx %pK (s:%d)\n",
+ dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n",
(void *)kctx, js);
pullable = kbase_js_ctx_pullable(kctx, js,
@@ -3806,8 +3769,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
ctx_waiting[js]) {
- dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
- (void *)last_active[js], js);
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n",
+ (void *)last_active[js], js);
kbdev->hwaccess.active_kctx[js] = NULL;
}
}
@@ -3878,7 +3841,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
*/
if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
unsigned long flags;
- int js;
+ unsigned int js;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
@@ -4002,7 +3965,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
{
struct kbase_device *kbdev;
unsigned long flags;
- u32 js;
+ unsigned int js;
kbdev = kctx->kbdev;
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index ef9d224..cdc32f9 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -36,6 +36,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
@@ -120,19 +121,6 @@ struct kbase_kinstr_prfcnt_client_config {
};
/**
- * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to
- * carry out for a kinstr_prfcnt_client.
- * @dump_work: Worker for performing asynchronous counter dumps.
- * @user_data: User data for asynchronous dump in progress.
- * @ts_end_ns: End timestamp of most recent async dump.
- */
-struct kbase_kinstr_prfcnt_async {
- struct work_struct dump_work;
- u64 user_data;
- u64 ts_end_ns;
-};
-
-/**
* enum kbase_kinstr_prfcnt_client_init_state - A list of
* initialisation states that the
* kinstr_prfcnt client can be at
@@ -167,9 +155,7 @@ enum kbase_kinstr_prfcnt_client_init_state {
* @hvcli: Hardware counter virtualizer client.
* @node: Node used to attach this client to list in
* kinstr_prfcnt context.
- * @cmd_sync_lock: Lock coordinating the reader interface for commands
- * that need interacting with the async sample dump
- * worker thread.
+ * @cmd_sync_lock: Lock coordinating the reader interface for commands.
* @next_dump_time_ns: Time in ns when this client's next periodic dump must
* occur. If 0, not a periodic client.
* @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
@@ -190,15 +176,10 @@ enum kbase_kinstr_prfcnt_client_init_state {
* @waitq: Client's notification queue.
* @sample_size: Size of the data required for one sample, in bytes.
* @sample_count: Number of samples the client is able to capture.
- * @sync_sample_count: Number of available spaces for synchronous samples.
- * It can differ from sample_count if asynchronous
- * sample requests are reserving space in the buffer.
* @user_data: User data associated with the session.
* This is set when the session is started and stopped.
* This value is ignored for control commands that
* provide another value.
- * @async: Asynchronous sampling operations to carry out in this
- * client's session.
*/
struct kbase_kinstr_prfcnt_client {
struct kbase_kinstr_prfcnt_context *kinstr_ctx;
@@ -219,9 +200,7 @@ struct kbase_kinstr_prfcnt_client {
wait_queue_head_t waitq;
size_t sample_size;
size_t sample_count;
- atomic_t sync_sample_count;
u64 user_data;
- struct kbase_kinstr_prfcnt_async async;
};
static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
@@ -456,6 +435,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
size_t grp, blk, blk_inst;
struct prfcnt_metadata **ptr_md = block_meta_base;
const struct kbase_hwcnt_metadata *metadata;
+ uint8_t block_idx = 0;
if (!dst || !*block_meta_base)
return -EINVAL;
@@ -464,6 +444,10 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
u8 *dst_blk;
+ /* Block indices must be reported with no gaps. */
+ if (blk_inst == 0)
+ block_idx = 0;
+
/* Skip unavailable or non-enabled blocks */
if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
@@ -477,13 +461,14 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
kbase_hwcnt_metadata_block_type(metadata, grp,
blk));
- (*ptr_md)->u.block_md.block_idx = (u8)blk_inst;
+ (*ptr_md)->u.block_md.block_idx = block_idx;
(*ptr_md)->u.block_md.set = counter_set;
(*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN;
(*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr);
/* update the buf meta data block pointer to next item */
(*ptr_md)++;
+ block_idx++;
}
return 0;
@@ -536,33 +521,6 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata(
}
/**
- * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample
- * for output.
- * @cli: Non-NULL pointer to a kinstr_prfcnt client.
- * @buf_idx: The index to the sample array for saving the sample.
- */
-static void kbasep_kinstr_prfcnt_client_output_empty_sample(
- struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx)
-{
- struct kbase_hwcnt_dump_buffer *dump_buf;
- struct prfcnt_metadata *ptr_md;
-
- if (WARN_ON(buf_idx >= cli->sample_arr.sample_count))
- return;
-
- dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf;
- ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
-
- kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map);
-
- /* Use end timestamp from most recent async dump */
- ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns;
- ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns;
-
- kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md);
-}
-
-/**
* kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output.
* @cli: Non-NULL pointer to a kinstr_prfcnt client.
* @buf_idx: The index to the sample array for saving the sample.
@@ -611,16 +569,11 @@ static void kbasep_kinstr_prfcnt_client_output_sample(
* @cli: Non-NULL pointer to a kinstr_prfcnt client.
* @event_id: Event type that triggered the dump.
* @user_data: User data to return to the user.
- * @async_dump: Whether this is an asynchronous dump or not.
- * @empty_sample: Sample block data will be 0 if this is true.
*
* Return: 0 on success, else error code.
*/
-static int
-kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
- enum base_hwcnt_reader_event event_id,
- u64 user_data, bool async_dump,
- bool empty_sample)
+static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
+ enum base_hwcnt_reader_event event_id, u64 user_data)
{
int ret;
u64 ts_start_ns = 0;
@@ -638,17 +591,11 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
/* Check if there is a place to copy HWC block into. Calculate the
* number of available samples count, by taking into account the type
* of dump.
- * Asynchronous dumps have the ability to reserve space in the samples
- * array for future dumps, unlike synchronous dumps. Because of that,
- * the samples count for synchronous dumps is managed by a variable
- * called sync_sample_count, that originally is defined as equal to the
- * size of the whole array but later decreases every time an
- * asynchronous dump request is pending and then re-increased every
- * time an asynchronous dump request is completed.
*/
- available_samples_count = async_dump ?
- cli->sample_arr.sample_count :
- atomic_read(&cli->sync_sample_count);
+ available_samples_count = cli->sample_arr.sample_count;
+ WARN_ON(available_samples_count < 1);
+ /* Reserve one slot to store the implicit sample taken on CMD_STOP */
+ available_samples_count -= 1;
if (write_idx - read_idx == available_samples_count) {
/* For periodic sampling, the current active dump
* will be accumulated in the next sample, when
@@ -664,38 +611,19 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
*/
write_idx %= cli->sample_arr.sample_count;
- if (!empty_sample) {
- ret = kbase_hwcnt_virtualizer_client_dump(
- cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf);
- /* HWC dump error, set the sample with error flag */
- if (ret)
- cli->sample_flags |= SAMPLE_FLAG_ERROR;
-
- /* Make the sample ready and copy it to the userspace mapped buffer */
- kbasep_kinstr_prfcnt_client_output_sample(
- cli, write_idx, user_data, ts_start_ns, ts_end_ns);
- } else {
- if (!async_dump) {
- struct prfcnt_metadata *ptr_md;
- /* User data will not be updated for empty samples. */
- ptr_md = cli->sample_arr.samples[write_idx].sample_meta;
- ptr_md->u.sample_md.user_data = user_data;
- }
+ ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns,
+ &cli->tmp_buf);
+ /* HWC dump error, set the sample with error flag */
+ if (ret)
+ cli->sample_flags |= SAMPLE_FLAG_ERROR;
- /* Make the sample ready and copy it to the userspace mapped buffer */
- kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx);
- }
+ /* Make the sample ready and copy it to the userspace mapped buffer */
+ kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns,
+ ts_end_ns);
/* Notify client. Make sure all changes to memory are visible. */
wmb();
atomic_inc(&cli->write_idx);
- if (async_dump) {
- /* Remember the end timestamp of async dump for empty samples */
- if (!empty_sample)
- cli->async.ts_end_ns = ts_end_ns;
-
- atomic_inc(&cli->sync_sample_count);
- }
wake_up_interruptible(&cli->waitq);
/* Reset the flags for the next sample dump */
cli->sample_flags = 0;
@@ -709,6 +637,9 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
{
int ret;
u64 tm_start, tm_end;
+ unsigned int write_idx;
+ unsigned int read_idx;
+ size_t available_samples_count;
WARN_ON(!cli);
lockdep_assert_held(&cli->cmd_sync_lock);
@@ -717,6 +648,16 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
if (cli->active)
return 0;
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+
+ /* Check whether there is space to store atleast an implicit sample
+ * corresponding to CMD_STOP.
+ */
+ available_samples_count = cli->sample_count - (write_idx - read_idx);
+ if (!available_samples_count)
+ return -EBUSY;
+
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
&cli->config.phys_em);
@@ -729,7 +670,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL);
if (!ret) {
- atomic_set(&cli->sync_sample_count, cli->sample_count);
cli->active = true;
cli->user_data = user_data;
cli->sample_flags = 0;
@@ -743,16 +683,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
return ret;
}
-static int kbasep_kinstr_prfcnt_client_wait_async_done(
- struct kbase_kinstr_prfcnt_client *cli)
-{
- lockdep_assert_held(&cli->cmd_sync_lock);
-
- return wait_event_interruptible(cli->waitq,
- atomic_read(&cli->sync_sample_count) ==
- cli->sample_count);
-}
-
static int
kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
u64 user_data)
@@ -761,7 +691,7 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
u64 tm_start = 0;
u64 tm_end = 0;
struct kbase_hwcnt_physical_enable_map phys_em;
- struct kbase_hwcnt_dump_buffer *tmp_buf = NULL;
+ size_t available_samples_count;
unsigned int write_idx;
unsigned int read_idx;
@@ -772,12 +702,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
if (!cli->active)
return -EINVAL;
- /* Wait until pending async sample operation done */
- ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
-
- if (ret < 0)
- return -ERESTARTSYS;
+ mutex_lock(&cli->kinstr_ctx->lock);
+ /* Disable counters under the lock, so we do not race with the
+ * sampling thread.
+ */
phys_em.fe_bm = 0;
phys_em.tiler_bm = 0;
phys_em.mmu_l2_bm = 0;
@@ -785,15 +714,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
- mutex_lock(&cli->kinstr_ctx->lock);
-
/* Check whether one has the buffer to hold the last sample */
write_idx = atomic_read(&cli->write_idx);
read_idx = atomic_read(&cli->read_idx);
- /* Check if there is a place to save the last stop produced sample */
- if (write_idx - read_idx < cli->sample_arr.sample_count)
- tmp_buf = &cli->tmp_buf;
+ available_samples_count = cli->sample_count - (write_idx - read_idx);
ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli,
&cli->enable_map,
@@ -803,7 +728,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
if (ret)
cli->sample_flags |= SAMPLE_FLAG_ERROR;
- if (tmp_buf) {
+ /* There must be a place to save the last stop produced sample */
+ if (!WARN_ON(!available_samples_count)) {
write_idx %= cli->sample_arr.sample_count;
/* Handle the last stop sample */
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
@@ -833,50 +759,6 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli,
u64 user_data)
{
int ret;
- bool empty_sample = false;
-
- lockdep_assert_held(&cli->cmd_sync_lock);
-
- /* If the client is not started, or not manual, the command invalid */
- if (!cli->active || cli->dump_interval_ns)
- return -EINVAL;
-
- /* Wait until pending async sample operation done, this is required to
- * satisfy the stated sample sequence following their issuing order,
- * reflected by the sample start timestamp.
- */
- if (atomic_read(&cli->sync_sample_count) != cli->sample_count) {
- /* Return empty sample instead of performing real dump.
- * As there is an async dump currently in-flight which will
- * have the desired information.
- */
- empty_sample = true;
- ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
-
- if (ret < 0)
- return -ERESTARTSYS;
- }
-
- mutex_lock(&cli->kinstr_ctx->lock);
-
- ret = kbasep_kinstr_prfcnt_client_dump(cli,
- BASE_HWCNT_READER_EVENT_MANUAL,
- user_data, false, empty_sample);
-
- mutex_unlock(&cli->kinstr_ctx->lock);
-
- return ret;
-}
-
-static int
-kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
- u64 user_data)
-{
- unsigned int write_idx;
- unsigned int read_idx;
- unsigned int active_async_dumps;
- unsigned int new_async_buf_idx;
- int ret;
lockdep_assert_held(&cli->cmd_sync_lock);
@@ -886,45 +768,7 @@ kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
mutex_lock(&cli->kinstr_ctx->lock);
- write_idx = atomic_read(&cli->write_idx);
- read_idx = atomic_read(&cli->read_idx);
- active_async_dumps =
- cli->sample_count - atomic_read(&cli->sync_sample_count);
- new_async_buf_idx = write_idx + active_async_dumps;
-
- /* Check if there is a place to copy HWC block into.
- * If successful, reserve space in the buffer for the asynchronous
- * operation to make sure that it can actually take place.
- * Because we reserve space for asynchronous dumps we need to take that
- * in consideration here.
- */
- ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ?
- -EBUSY :
- 0;
-
- if (ret == -EBUSY) {
- mutex_unlock(&cli->kinstr_ctx->lock);
- return ret;
- }
-
- if (active_async_dumps > 0) {
- struct prfcnt_metadata *ptr_md;
- unsigned int buf_idx =
- new_async_buf_idx % cli->sample_arr.sample_count;
- /* Instead of storing user_data, write it directly to future
- * empty sample.
- */
- ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
- ptr_md->u.sample_md.user_data = user_data;
-
- atomic_dec(&cli->sync_sample_count);
- } else {
- cli->async.user_data = user_data;
- atomic_dec(&cli->sync_sample_count);
-
- kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt,
- &cli->async.dump_work);
- }
+ ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data);
mutex_unlock(&cli->kinstr_ctx->lock);
@@ -981,10 +825,6 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
ret = kbasep_kinstr_prfcnt_client_sync_dump(
cli, control_cmd->user_data);
break;
- case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC:
- ret = kbasep_kinstr_prfcnt_client_async_dump(
- cli, control_cmd->user_data);
- break;
case PRFCNT_CONTROL_CMD_DISCARD:
ret = kbasep_kinstr_prfcnt_client_discard(cli);
break;
@@ -1039,17 +879,6 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
sample_meta = cli->sample_arr.samples[read_idx].sample_meta;
sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf;
- /* Verify that a valid sample has been dumped in the read_idx.
- * There are situations where this may not be the case,
- * for instance if the client is trying to get an asynchronous
- * sample which has not been dumped yet.
- */
- if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE ||
- sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) {
- err = -EINVAL;
- goto error_out;
- }
-
sample_access->sequence = sample_meta->u.sample_md.seq;
sample_access->sample_offset_bytes = sample_offset_bytes;
@@ -1339,9 +1168,8 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
list_for_each_entry(pos, &kinstr_ctx->clients, node) {
if (pos->active && (pos->next_dump_time_ns != 0) &&
(pos->next_dump_time_ns < cur_time_ns))
- kbasep_kinstr_prfcnt_client_dump(
- pos, BASE_HWCNT_READER_EVENT_PERIODIC,
- pos->user_data, false, false);
+ kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC,
+ pos->user_data);
}
kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx);
@@ -1350,48 +1178,6 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
}
/**
- * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client
- * to take a single asynchronous
- * sample.
- * @work: Work structure.
- */
-static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work)
-{
- struct kbase_kinstr_prfcnt_async *cli_async =
- container_of(work, struct kbase_kinstr_prfcnt_async, dump_work);
- struct kbase_kinstr_prfcnt_client *cli = container_of(
- cli_async, struct kbase_kinstr_prfcnt_client, async);
-
- mutex_lock(&cli->kinstr_ctx->lock);
- /* While the async operation is in flight, a sync stop might have been
- * executed, for which the dump should be skipped. Further as we are
- * doing an async dump, we expect that there is reserved buffer for
- * this to happen. This is to avoid the rare corner case where the
- * user side has issued a stop/start pair before the async work item
- * get the chance to execute.
- */
- if (cli->active &&
- (atomic_read(&cli->sync_sample_count) < cli->sample_count))
- kbasep_kinstr_prfcnt_client_dump(cli,
- BASE_HWCNT_READER_EVENT_MANUAL,
- cli->async.user_data, true,
- false);
-
- /* While the async operation is in flight, more async dump requests
- * may have been submitted. In this case, no more async dumps work
- * will be queued. Instead space will be reserved for that dump and
- * an empty sample will be return after handling the current async
- * dump.
- */
- while (cli->active &&
- (atomic_read(&cli->sync_sample_count) < cli->sample_count)) {
- kbasep_kinstr_prfcnt_client_dump(
- cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true);
- }
- mutex_unlock(&cli->kinstr_ctx->lock);
-}
-
-/**
* kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for
* execution as soon as possible.
* @timer: Timer structure.
@@ -1852,9 +1638,14 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
struct kbase_kinstr_prfcnt_client *cli;
enum kbase_kinstr_prfcnt_client_init_state init_state;
- WARN_ON(!kinstr_ctx);
- WARN_ON(!setup);
- WARN_ON(!req_arr);
+ if (WARN_ON(!kinstr_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(!setup))
+ return -EINVAL;
+
+ if (WARN_ON(!req_arr))
+ return -EINVAL;
cli = kzalloc(sizeof(*cli), GFP_KERNEL);
@@ -1889,7 +1680,6 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
&cli->config.phys_em);
cli->sample_count = cli->config.buffer_count;
- atomic_set(&cli->sync_sample_count, cli->sample_count);
cli->sample_size =
kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
@@ -1923,7 +1713,6 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
case KINSTR_PRFCNT_WAITQ_MUTEX:
init_waitqueue_head(&cli->waitq);
- INIT_WORK(&cli->async.dump_work, kbasep_kinstr_prfcnt_async_dump_worker);
mutex_init(&cli->cmd_sync_lock);
break;
@@ -2159,17 +1948,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
union kbase_ioctl_kinstr_prfcnt_setup *setup)
{
int err;
- unsigned int item_count;
- unsigned long bytes;
- struct prfcnt_request_item *req_arr;
+ size_t item_count;
+ size_t bytes;
+ struct prfcnt_request_item *req_arr = NULL;
struct kbase_kinstr_prfcnt_client *cli = NULL;
+ const size_t max_bytes = 32 * sizeof(*req_arr);
if (!kinstr_ctx || !setup)
return -EINVAL;
item_count = setup->in.request_item_count;
- /* Limiting the request items to 2x of the expected: acommodating
+ /* Limiting the request items to 2x of the expected: accommodating
* moderate duplications but rejecting excessive abuses.
*/
if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) ||
@@ -2177,7 +1967,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
return -EINVAL;
}
- bytes = item_count * sizeof(*req_arr);
+ if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes))
+ return -EINVAL;
+
+ /* Further limiting the max bytes to copy from userspace by setting it in the following
+ * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items,
+ * each currently at the size of prfcnt_request_item.
+ *
+ * Note: if more request types get added, this max limit needs to be updated.
+ */
+ if (bytes > max_bytes)
+ return -EINVAL;
+
req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes);
if (IS_ERR(req_arr))
diff --git a/mali_kbase/mali_kbase_linux.h b/mali_kbase/mali_kbase_linux.h
index 1d8d196..e5c6f7a 100644
--- a/mali_kbase/mali_kbase_linux.h
+++ b/mali_kbase/mali_kbase_linux.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,7 @@
#include <linux/module.h>
#include <linux/atomic.h>
-#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+#if IS_ENABLED(MALI_KERNEL_TEST_API)
#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
#else
#define KBASE_EXPORT_TEST_API(func)
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index abd01c1..b18b1e2 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -2062,6 +2062,7 @@ void kbase_sync_single(struct kbase_context *kctx,
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
@@ -4985,10 +4986,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct page **pages;
struct tagged_addr *pa;
long i, dma_mapped_pages;
- unsigned long address;
struct device *dev;
- unsigned long offset_within_page;
- unsigned long remaining_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
@@ -5004,19 +5002,29 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset_within_page = address & ~PAGE_MASK;
- remaining_size = alloc->imported.user_buf.size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < pinned_pages; i++) {
- unsigned long map_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
- dma_addr_t dma_addr = dma_map_page(dev, pages[i],
- offset_within_page, map_size,
- DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
err = dma_mapping_error(dev, dma_addr);
if (err)
@@ -5025,8 +5033,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- remaining_size -= map_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -5043,19 +5050,22 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
alloc->nents = 0;
- offset_within_page = address & ~PAGE_MASK;
- remaining_size = alloc->imported.user_buf.size;
dma_mapped_pages = i;
- /* Run the unmap loop in the same order as map loop */
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
for (i = 0; i < dma_mapped_pages; i++) {
- unsigned long unmap_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- unmap_size, DMA_BIDIRECTIONAL);
- remaining_size -= unmap_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
}
/* The user buffer could already have been previously pinned before
@@ -5096,12 +5106,85 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long unmap_size =
- MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size,
- DMA_BIDIRECTIONAL);
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
@@ -5109,7 +5192,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
pages[i] = NULL;
#endif
- remaining_size -= unmap_size;
+ remaining_size -= imported_size;
offset_within_page = 0;
}
#if !MALI_USE_CSF
@@ -5190,8 +5273,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
break;
}
default:
- WARN(1, "Invalid external resource GPU allocation type (%x) on mapping",
- alloc->type);
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid external resource GPU allocation type (%x) on mapping",
+ alloc->type);
return -EINVAL;
}
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index f727538..83872a1 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index f815144..e577452 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1577,10 +1577,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
int zone = KBASE_REG_ZONE_CUSTOM_VA;
bool shared_zone = false;
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
- unsigned long offset_within_page;
- unsigned long remaining_size;
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
+ struct tagged_addr *pa;
+ struct device *dev;
int write;
/* Flag supported only for dma-buf imported memory */
@@ -1722,20 +1722,33 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
reg->gpu_alloc->nents = 0;
reg->extension = 0;
- if (pages) {
- struct device *dev = kctx->kbdev->dev;
- struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+ pa = kbase_get_gpu_phy_pages(reg);
+ dev = kctx->kbdev->dev;
+ if (pages) {
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
- offset_within_page = user_buf->address & ~PAGE_MASK;
- remaining_size = user_buf->size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < faulted_pages; i++) {
- unsigned long map_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
- dma_addr_t dma_addr = dma_map_page(dev, pages[i],
- offset_within_page, map_size, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr =
+ dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
@@ -1743,8 +1756,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- remaining_size -= map_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1753,19 +1765,19 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
return reg;
unwind_dma_map:
- offset_within_page = user_buf->address & ~PAGE_MASK;
- remaining_size = user_buf->size;
dma_mapped_pages = i;
- /* Run the unmap loop in the same order as map loop */
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This precautionary measure is kept here to keep this code
+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+ * in the future.
+ */
for (i = 0; i < dma_mapped_pages; i++) {
- unsigned long unmap_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = user_buf->dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev,
- user_buf->dma_addrs[i],
- unmap_size, DMA_BIDIRECTIONAL);
- remaining_size -= unmap_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
}
fault_mismatch:
if (pages) {
@@ -1785,7 +1797,6 @@ no_alloc_obj:
no_region:
bad_size:
return NULL;
-
}
@@ -2068,7 +2079,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
*flags &= ~BASE_MEM_COHERENT_SYSTEM;
}
-
+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+ goto bad_flags;
+ }
if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
dev_warn(kctx->kbdev->dev,
"padding is only supported for UMM");
@@ -2752,7 +2766,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
size_t *nr_pages, size_t *aligned_offset)
{
- int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+ unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
struct kbase_va_region *reg;
int err = 0;
@@ -3280,6 +3294,9 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
if (kbase_is_region_invalid_or_free(reg))
goto out_unlock;
+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+ goto out_unlock;
+
addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u);
out_unlock:
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index 8c62bd3..9c4b0d9 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -102,6 +102,7 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p,
dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
kfree(page_md);
+ set_page_private(p, 0);
ClearPagePrivate(p);
}
@@ -456,7 +457,6 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
kbase_free_page_later(kbdev, new_page);
queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
} else if (status_not_movable) {
- __ClearPageMovable(old_page);
err = -EINVAL;
} else if (status_mapped) {
err = kbasep_migrate_page_allocated_mapped(old_page, new_page);
@@ -464,6 +464,14 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
err = kbasep_migrate_page_pt_mapped(old_page, new_page);
}
+ /* While we want to preserve the movability of pages for which we return
+ * EAGAIN, according to the kernel docs, movable pages for which a critical
+ * error is returned are called putback on, which may not be what we
+ * expect.
+ */
+ if (err < 0 && err != -EAGAIN)
+ __ClearPageMovable(old_page);
+
return err;
}
@@ -485,6 +493,12 @@ static void kbase_page_putback(struct page *p)
struct kbase_page_metadata *page_md = kbase_page_private(p);
struct kbase_device *kbdev = NULL;
+ /* If we don't have page metadata, the page may not belong to the
+ * driver or may already have been freed, and there's nothing we can do
+ */
+ if (!page_md)
+ return;
+
spin_lock(&page_md->migrate_lock);
if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) {
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index bede1f4..75569cc 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -284,7 +284,14 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool)
void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p)
{
- struct kbase_device *kbdev = pool->kbdev;
+ struct kbase_device *kbdev;
+
+ if (WARN_ON(!pool))
+ return;
+ if (WARN_ON(!p))
+ return;
+
+ kbdev = pool->kbdev;
if (!pool->order && kbase_page_migration_enabled) {
kbase_free_page_later(kbdev, p);
@@ -536,14 +543,16 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
/* Zero pages first without holding the next_pool lock */
for (i = 0; i < nr_to_spill; i++) {
p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS);
- list_add(&p->lru, &spill_list);
+ if (p)
+ list_add(&p->lru, &spill_list);
}
}
while (!kbase_mem_pool_is_empty(pool)) {
/* Free remaining pages to kernel */
p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS);
- list_add(&p->lru, &free_list);
+ if (p)
+ list_add(&p->lru, &free_list);
}
kbase_mem_pool_unlock(pool);
@@ -595,17 +604,10 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
{
- struct page *p;
-
lockdep_assert_held(&pool->pool_lock);
pool_dbg(pool, "alloc_locked()\n");
- p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
-
- if (p)
- return p;
-
- return NULL;
+ return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
}
void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index 95003c8..b64bbc1 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -502,6 +502,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
kbase_js_sched_all(katom->kctx->kbdev);
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
{
struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
@@ -673,8 +674,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
{
struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
- unsigned long nr_pages =
- alloc->imported.user_buf.nr_pages;
+ const unsigned long nr_pages = alloc->imported.user_buf.nr_pages;
+ const unsigned long start = alloc->imported.user_buf.address;
if (alloc->imported.user_buf.mm != current->mm) {
ret = -EINVAL;
@@ -686,11 +687,9 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
ret = -ENOMEM;
goto out_unlock;
}
-
- ret = get_user_pages_fast(
- alloc->imported.user_buf.address,
- nr_pages, 0,
- buffers[i].extres_pages);
+ kbase_gpu_vm_unlock(katom->kctx);
+ ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages);
+ kbase_gpu_vm_lock(katom->kctx);
if (ret != nr_pages) {
/* Adjust number of pages, so that we only
* attempt to release pages in the array that we
@@ -728,7 +727,6 @@ out_cleanup:
return ret;
}
-#endif /* !MALI_USE_CSF */
#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
@@ -760,8 +758,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
}
#endif
-int kbase_mem_copy_from_extres(struct kbase_context *kctx,
- struct kbase_debug_copy_buffer *buf_data)
+/**
+ * kbase_mem_copy_from_extres() - Copy from external resources.
+ *
+ * @kctx: kbase context within which the copying is to take place.
+ * @buf_data: Pointer to the information about external resources:
+ * pages pertaining to the external resource, number of
+ * pages to copy.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+ struct kbase_debug_copy_buffer *buf_data)
{
unsigned int i;
unsigned int target_page_nr = 0;
@@ -848,7 +856,6 @@ out_unlock:
return ret;
}
-#if !MALI_USE_CSF
static int kbase_debug_copy(struct kbase_jd_atom *katom)
{
struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
@@ -866,6 +873,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom)
return 0;
}
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
#endif /* !MALI_USE_CSF */
#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7)
@@ -963,11 +971,6 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
ret = -EINVAL;
goto free_info;
}
- /* Clear any remaining bytes when user struct is smaller than
- * kernel struct. For jit version 1, this also clears the
- * padding bytes
- */
- memset(((u8 *)info) + sizeof(*info), 0, sizeof(*info) - sizeof(*info));
ret = kbasep_jit_alloc_validate(kctx, info);
if (ret)
@@ -1541,6 +1544,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
case BASE_JD_REQ_SOFT_EVENT_RESET:
kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_JD_REQ_SOFT_DEBUG_COPY:
{
int res = kbase_debug_copy(katom);
@@ -1549,6 +1553,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
break;
}
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
case BASE_JD_REQ_SOFT_JIT_ALLOC:
ret = kbase_jit_allocate_process(katom);
break;
@@ -1654,8 +1659,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
if (katom->jc == 0)
return -EINVAL;
break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_JD_REQ_SOFT_DEBUG_COPY:
return kbase_debug_copy_prepare(katom);
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
case BASE_JD_REQ_SOFT_EXT_RES_MAP:
return kbase_ext_res_prepare(katom);
case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
@@ -1687,9 +1694,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom)
kbase_sync_fence_in_remove(katom);
break;
#endif /* CONFIG_SYNC_FILE */
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_JD_REQ_SOFT_DEBUG_COPY:
kbase_debug_copy_finish(katom);
break;
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
case BASE_JD_REQ_SOFT_JIT_ALLOC:
kbase_jit_allocate_finish(katom);
break;
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index d1e4078..4a09265 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -551,7 +551,7 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
}
KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt);
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
{
kbdev->as[i].number = i;
kbdev->as[i].bf_data.addr = 0ULL;
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index 22786f0..83605c3 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -328,7 +328,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
while (bf_bits | pf_bits) {
struct kbase_as *as;
- int as_no;
+ unsigned int as_no;
struct kbase_context *kctx;
struct kbase_fault *fault;
@@ -423,13 +423,13 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx,
return kbase_job_slot_softstop_start_rp(kctx, reg);
}
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
{
kbdev->as[i].number = i;
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
- kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
if (!kbdev->as[i].pf_wq)
return -ENOMEM;
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index e39c8ad..41876ff 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -340,15 +340,7 @@ static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_cont
phys_addr_t phys, size_t size,
enum kbase_mmu_op_type flush_op)
{
-#if MALI_USE_CSF
- unsigned long irq_flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
- kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
- mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
-#endif
+ kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
}
static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
@@ -398,9 +390,9 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context
* a 4kB physical page.
*/
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr, unsigned long flags,
- int group_id, u64 *dirty_pgds);
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int group_id, u64 *dirty_pgds);
/**
* kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
@@ -755,8 +747,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
}
/* Now make this faulting page writable to GPU. */
- kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags,
- region->gpu_alloc->group_id, &dirty_pgds);
+ kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
+ region->flags, region->gpu_alloc->group_id, &dirty_pgds);
kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
kctx->id, dirty_pgds);
@@ -1834,7 +1826,8 @@ next:
static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, const u64 vpfn,
size_t nr, u64 dirty_pgds,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool insert_pages_failed)
{
struct kbase_mmu_hw_op_param op_param;
int as_nr = 0;
@@ -1859,8 +1852,12 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
*
* Operations that affect the whole GPU cache shall only be done if it's
* impossible to update physical ranges.
+ *
+ * On GPUs where flushing by physical address range is supported,
+ * full cache flush is done when an error occurs during
+ * insert_pages() to keep the error handling simpler.
*/
- if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
else
mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
@@ -2027,7 +2024,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
mutex_unlock(&kctx->mmu.mmu_lock);
mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
- mmu_sync_info);
+ mmu_sync_info, false);
return 0;
@@ -2035,7 +2032,7 @@ fail_unlock:
mutex_unlock(&kctx->mmu.mmu_lock);
mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
- mmu_sync_info);
+ mmu_sync_info, true);
kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list);
return err;
@@ -2304,7 +2301,7 @@ fail_unlock:
mutex_unlock(&mmut->mmu_lock);
mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
- dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC);
+ dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
return err;
@@ -2332,7 +2329,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
if (err)
return err;
- mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info);
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
return 0;
}
@@ -2781,9 +2778,11 @@ out:
KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
/**
- * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries
+ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
+ * page table entries
*
- * @kctx: Kbase context
+ * @kbdev: Pointer to kbase device.
+ * @mmut: The involved MMU table
* @vpfn: Virtual PFN (Page Frame Number) of the first page to update
* @phys: Pointer to the array of tagged physical addresses of the physical
* pages that are pointed to by the page table entries (that need to
@@ -2796,26 +2795,22 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
* @dirty_pgds: Flags to track every level where a PGD has been updated.
*
* This will update page table entries that already exist on the GPU based on
- * the new flags that are passed (the physical pages pointed to by the page
- * table entries remain unchanged). It is used as a response to the changes of
- * the memory attributes.
+ * new flags and replace any existing phy pages that are passed (the PGD pages
+ * remain unchanged). It is used as a response to the changes of phys as well
+ * as the the memory attributes.
*
* The caller is responsible for validating the memory attributes.
*
* Return: 0 if the attributes data in page table entries were updated
* successfully, otherwise an error code.
*/
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr, unsigned long flags,
- int const group_id, u64 *dirty_pgds)
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds)
{
phys_addr_t pgd;
u64 *pgd_page;
int err;
- struct kbase_device *kbdev;
-
- if (WARN_ON(kctx == NULL))
- return -EINVAL;
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2823,9 +2818,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (nr == 0)
return 0;
- mutex_lock(&kctx->mmu.mmu_lock);
-
- kbdev = kctx->kbdev;
+ mutex_lock(&mmut->mmu_lock);
while (nr) {
unsigned int i;
@@ -2841,8 +2834,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (is_huge(*phys) && (index == index_in_large_page(*phys)))
cur_level = MIDGARD_MMU_LEVEL(2);
- err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL,
- dirty_pgds);
+ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds);
if (WARN_ON(err))
goto fail_unlock;
@@ -2869,7 +2861,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
*target_phys, flags, MIDGARD_MMU_LEVEL(2),
group_id);
- kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)),
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
kbase_dma_addr(p) + (level_index * sizeof(u64)),
sizeof(u64), KBASE_MMU_OP_NONE);
} else {
@@ -2887,7 +2879,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
/* MMU cache flush strategy is NONE because GPU cache maintenance
* will be done by the caller.
*/
- kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
kbase_dma_addr(p) + (index * sizeof(u64)),
count * sizeof(u64), KBASE_MMU_OP_NONE);
}
@@ -2905,45 +2897,81 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
kunmap(p);
}
- mutex_unlock(&kctx->mmu.mmu_lock);
+ mutex_unlock(&mmut->mmu_lock);
return 0;
fail_unlock:
- mutex_unlock(&kctx->mmu.mmu_lock);
+ mutex_unlock(&mmut->mmu_lock);
return err;
}
-int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id)
+static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id)
{
int err;
struct kbase_mmu_hw_op_param op_param;
u64 dirty_pgds = 0;
-
+ struct kbase_mmu_table *mmut;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ int as_nr;
+
+#if !MALI_USE_CSF
+ if (unlikely(kctx == NULL))
+ return -EINVAL;
+
+ as_nr = kctx->as_nr;
+ mmut = &kctx->mmu;
+#else
+ if (kctx) {
+ mmut = &kctx->mmu;
+ as_nr = kctx->as_nr;
+ } else {
+ mmut = &kbdev->csf.mcu_mmu;
+ as_nr = MCU_AS_NR;
+ }
+#endif
- err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds);
+ err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds);
op_param = (const struct kbase_mmu_hw_op_param){
.vpfn = vpfn,
.nr = nr,
.op = KBASE_MMU_OP_FLUSH_MEM,
- .kctx_id = kctx->id,
+ .kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
.mmu_sync_info = mmu_sync_info,
.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
};
- if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev))
- mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
else
- mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+ mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
+
return err;
}
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id)
+{
+ if (unlikely(kctx == NULL))
+ return -EINVAL;
+
+ return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
+}
+
+#if MALI_USE_CSF
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id)
+{
+ return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
+}
+#endif /* MALI_USE_CSF */
+
static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -3137,6 +3165,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
if (ret < 0) {
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ mutex_unlock(&kbdev->pm.lock);
dev_err(kbdev->dev,
"%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.",
__func__);
@@ -3291,9 +3321,8 @@ gpu_reset:
return ret;
}
-static void mmu_teardown_level(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, phys_addr_t pgd,
- int level)
+static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t pgd, unsigned int level)
{
u64 *pgd_page;
int i;
@@ -3426,11 +3455,26 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
mutex_destroy(&mmut->mmu_lock);
}
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i)
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
{
destroy_workqueue(kbdev->as[i].pf_wq);
}
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op)
+{
+#if MALI_USE_CSF
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
+ kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+ mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+#endif
+}
+
#ifdef CONFIG_MALI_VECTOR_DUMP
static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
int level, char ** const buffer, size_t *size_left)
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 602a3f9..2b3e6c0 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -51,6 +51,26 @@ enum kbase_caller_mmu_sync_info {
};
/**
+ * enum kbase_mmu_op_type - enum for MMU operations
+ * @KBASE_MMU_OP_NONE: To help catch uninitialized struct
+ * @KBASE_MMU_OP_FIRST: The lower boundary of enum
+ * @KBASE_MMU_OP_LOCK: Lock memory region
+ * @KBASE_MMU_OP_UNLOCK: Unlock memory region
+ * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only)
+ * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC)
+ * @KBASE_MMU_OP_COUNT: The upper boundary of enum
+ */
+enum kbase_mmu_op_type {
+ KBASE_MMU_OP_NONE = 0, /* Must be zero */
+ KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
+ KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
+ KBASE_MMU_OP_UNLOCK,
+ KBASE_MMU_OP_FLUSH_PT,
+ KBASE_MMU_OP_FLUSH_MEM,
+ KBASE_MMU_OP_COUNT /* Must be the last in enum */
+};
+
+/**
* kbase_mmu_as_init() - Initialising GPU address space object.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer).
@@ -61,7 +81,7 @@ enum kbase_caller_mmu_sync_info {
*
* Return: 0 on success and non-zero value on failure.
*/
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i);
/**
* kbase_mmu_as_term() - Terminate address space object.
@@ -72,7 +92,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
* This is called upon device termination to destroy
* the address space object of the device.
*/
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i);
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i);
/**
* kbase_mmu_init - Initialise an object representing GPU page tables
@@ -150,6 +170,25 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags, int const group_id);
+#if MALI_USE_CSF
+/**
+ * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags
+ *
+ * @kbdev: Pointer to kbase device.
+ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update
+ * @phys: Pointer to the array of tagged physical addresses of the physical
+ * pages that are pointed to by the page table entries (that need to
+ * be updated).
+ * @nr: Number of pages to update
+ * @flags: Flags
+ * @group_id: The physical memory group in which the page was allocated.
+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id);
+#endif
/**
* kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages
@@ -183,6 +222,25 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level);
/**
+ * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches
+ *
+ * @kbdev: Instance of GPU platform device, allocated from the probe method.
+ * @kctx: Pointer to kbase context, it can be NULL if the physical address
+ * range is not associated with User created context.
+ * @phys: Starting address of the physical range to start the operation on.
+ * @size: Number of bytes to work on.
+ * @flush_op: Type of cache flush operation to perform.
+ *
+ * Issue a cache flush physical range command. This function won't perform any
+ * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be
+ * performed only if the context has a JASID assigned to it.
+ * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait().
+ */
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op);
+
+/**
* kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
*
* @kbdev: Pointer to the kbase device for which bus fault was reported.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 63277bc..50d2ea5 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -55,26 +55,6 @@ enum kbase_mmu_fault_type {
};
/**
- * enum kbase_mmu_op_type - enum for MMU operations
- * @KBASE_MMU_OP_NONE: To help catch uninitialized struct
- * @KBASE_MMU_OP_FIRST: The lower boundary of enum
- * @KBASE_MMU_OP_LOCK: Lock memory region
- * @KBASE_MMU_OP_UNLOCK: Unlock memory region
- * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only)
- * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC)
- * @KBASE_MMU_OP_COUNT: The upper boundary of enum
- */
-enum kbase_mmu_op_type {
- KBASE_MMU_OP_NONE = 0, /* Must be zero */
- KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
- KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
- KBASE_MMU_OP_UNLOCK,
- KBASE_MMU_OP_FLUSH_PT,
- KBASE_MMU_OP_FLUSH_MEM,
- KBASE_MMU_OP_COUNT /* Must be the last in enum */
-};
-
-/**
* struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions
* @vpfn: MMU Virtual Page Frame Number to start the operation on.
* @nr: Number of pages to work on.
diff --git a/mali_kbase/tl/mali_kbase_tlstream.h b/mali_kbase/tl/mali_kbase_tlstream.h
index 6660cf5..c142849 100644
--- a/mali_kbase/tl/mali_kbase_tlstream.h
+++ b/mali_kbase/tl/mali_kbase_tlstream.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,17 +27,13 @@
#include <linux/wait.h>
/* The maximum size of a single packet used by timeline. */
-#define PACKET_SIZE 4096 /* bytes */
+#define PACKET_SIZE 4096 /* bytes */
/* The number of packets used by one timeline stream. */
-#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
- #define PACKET_COUNT 64
-#else
- #define PACKET_COUNT 32
-#endif
+#define PACKET_COUNT 128
/* The maximum expected length of string in tracepoint descriptor. */
-#define STRLEN_MAX 64 /* bytes */
+#define STRLEN_MAX 64 /* bytes */
/**
* struct kbase_tlstream - timeline stream structure
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index fd0d0c0..e8a74e9 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -84,6 +84,7 @@ enum tl_msg_id_obj {
KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
KBASE_TL_ATTRIB_ATOM_JIT,
KBASE_TL_KBASE_NEW_DEVICE,
+ KBASE_TL_KBASE_GPUCMDQUEUE_KICK,
KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
KBASE_TL_KBASE_DEVICE_HALT_CSG,
@@ -352,6 +353,10 @@ enum tl_msg_id_obj {
"New KBase Device", \
"@IIIIIII", \
"kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \
+ "Kernel receives a request to process new GPU queue instructions", \
+ "@IL", \
+ "kernel_ctx_id,buffer_gpu_addr") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
"CSG is programmed to a slot", \
"@IIIII", \
@@ -2092,6 +2097,33 @@ void __kbase_tlstream_tl_kbase_new_device(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick(
+ struct kbase_tlstream *stream,
+ u32 kernel_ctx_id,
+ u64 buffer_gpu_addr
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kernel_ctx_id)
+ + sizeof(buffer_gpu_addr)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index 2c207cd..586fe67 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -396,6 +396,12 @@ void __kbase_tlstream_tl_kbase_new_device(
u32 kbase_device_supports_gpu_sleep
);
+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick(
+ struct kbase_tlstream *stream,
+ u32 kernel_ctx_id,
+ u64 buffer_gpu_addr
+);
+
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
@@ -1982,6 +1988,37 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions
+ *
+ * @kbdev: Kbase device
+ * @kernel_ctx_id: Unique ID for the KBase Context
+ * @buffer_gpu_addr: Address of the GPU queue's command buffer
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \
+ kbdev, \
+ kernel_ctx_id, \
+ buffer_gpu_addr \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_gpucmdqueue_kick( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kernel_ctx_id, \
+ buffer_gpu_addr \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \
+ kbdev, \
+ kernel_ctx_id, \
+ buffer_gpu_addr \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
* KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot
*
* @kbdev: Kbase device