summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorSidath Senanayake <sidaths@google.com>2021-06-15 13:39:30 +0100
committerSidath Senanayake <sidaths@google.com>2021-06-15 14:11:16 +0100
commitfca8613cfcf585bf9113dca96a05daea9fd89794 (patch)
treef2baa14910f83edf00450bc30d3703eb255a0bba /mali_kbase
parent8037b534570814775d79aeddd06b76e5ee941f59 (diff)
downloadgpu-fca8613cfcf585bf9113dca96a05daea9fd89794.tar.gz
Mali Valhall DDK r31p0 KMD
Provenance: 2ea0ef9bd (collaborate/EAC/v_r31p0) VX504X08X-BU-00000-r31p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r31p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r31p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r31p0-01eac0 - Valhall Android Renderscript AOSP parts Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Ide9d5fdc6d9c95fa66a3546b01f619b43c09496d
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/Kbuild10
-rw-r--r--mali_kbase/Makefile9
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbif.c123
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbif.h8
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_defs.h8
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_interface.h41
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_pm.c266
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_pm.h50
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c56
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c32
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c12
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_defs.h2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c81
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c12
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c17
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c14
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h76
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c125
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h25
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h15
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h20
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_metrics.c6
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h37
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c5
-rw-r--r--mali_kbase/csf/mali_base_csf_kernel.h637
-rw-r--r--mali_kbase/csf/mali_gpu_csf_control_registers.h32
-rw-r--r--mali_kbase/csf/mali_gpu_csf_registers.h1401
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c150
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h15
-rw-r--r--mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c5
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h10
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c55
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h32
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c39
-rw-r--r--mali_kbase/csf/mali_kbase_csf_ioctl.h382
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c314
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h35
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c77
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c369
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h6
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c16
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.c5
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c78
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c24
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c20
-rw-r--r--mali_kbase/device/mali_kbase_device.c88
-rw-r--r--mali_kbase/device/mali_kbase_device_internal.h12
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c4
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h334
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h287
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu.h30
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_coherency.h30
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_id.h118
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_regmap.h414
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c11
-rw-r--r--mali_kbase/jm/mali_base_jm_kernel.h1191
-rw-r--r--mali_kbase/jm/mali_kbase_jm_ioctl.h220
-rw-r--r--mali_kbase/jm/mali_kbase_jm_js.h6
-rw-r--r--mali_kbase/jm/mali_kbase_js_defs.h3
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h15
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h2
-rw-r--r--mali_kbase/mali_base_kernel.h812
-rw-r--r--mali_kbase/mali_base_mem_priv.h54
-rw-r--r--mali_kbase/mali_kbase.h8
-rw-r--r--mali_kbase/mali_kbase_cache_policy.h4
-rw-r--r--mali_kbase/mali_kbase_core_linux.c137
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c23
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.h21
-rw-r--r--mali_kbase/mali_kbase_debug_job_fault.c10
-rw-r--r--mali_kbase/mali_kbase_defs.h135
-rw-r--r--mali_kbase/mali_kbase_event.c10
-rw-r--r--mali_kbase/mali_kbase_gpu_memory_debugfs.c2
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c166
-rw-r--r--mali_kbase/mali_kbase_gpuprops.h34
-rw-r--r--mali_kbase/mali_kbase_gpuprops_types.h75
-rw-r--r--mali_kbase/mali_kbase_gwt.h4
-rw-r--r--mali_kbase/mali_kbase_hw.c170
-rw-r--r--mali_kbase/mali_kbase_hwaccess_gpuprops.h17
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf.c613
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf.h26
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if.h127
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c151
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h2
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm.c76
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.c81
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.h57
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.c4
-rw-r--r--mali_kbase/mali_kbase_hwcnt_reader.h105
-rw-r--r--mali_kbase/mali_kbase_ioctl.h841
-rw-r--r--mali_kbase/mali_kbase_jd.c38
-rw-r--r--mali_kbase/mali_kbase_jd_debugfs.c8
-rw-r--r--mali_kbase/mali_kbase_jm.c8
-rw-r--r--mali_kbase/mali_kbase_js.c202
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.c2
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.h4
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm_reader.h69
-rw-r--r--mali_kbase/mali_kbase_mem.c14
-rw-r--r--mali_kbase/mali_kbase_mem.h6
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c8
-rw-r--r--mali_kbase/mali_kbase_mem_pool.c8
-rw-r--r--mali_kbase/mali_kbase_mipe_gen_header.h14
-rw-r--r--mali_kbase/mali_kbase_pm.c6
-rw-r--r--mali_kbase/mali_kbase_reset_gpu.h18
-rw-r--r--mali_kbase/mali_kbase_softjobs.c55
-rw-r--r--mali_kbase/mali_kbase_sync_common.c4
-rw-r--r--mali_kbase/mali_kbase_vinstr.c30
-rw-r--r--mali_kbase/mali_uk.h70
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c23
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c31
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c75
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h17
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c51
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c195
-rw-r--r--mali_kbase/tests/Mconfig8
-rw-r--r--mali_kbase/tl/backend/mali_kbase_timeline_csf.c4
-rw-r--r--mali_kbase/tl/backend/mali_kbase_timeline_jm.c4
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.c5
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.h26
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_io.c46
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_priv.h7
-rw-r--r--mali_kbase/tl/mali_kbase_tlstream.c7
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c27
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h22
128 files changed, 3404 insertions, 9006 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 1c9e109..5463a24 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -20,11 +20,11 @@
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r30p0-01eac0"
+MALI_RELEASE_NAME ?= '"r31p0-01eac0"'
# Paths required for build
-# make $(src) as absolute path if it isn't already, by prefixing $(srctree)
+# make $(src) as absolute path if it is not already, by prefixing $(srctree)
src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
KBASE_PATH = $(src)
KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
@@ -64,7 +64,7 @@ DEFINES = \
-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
- -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \
-DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
-DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING)
@@ -114,7 +114,6 @@ SRC := \
mali_kbase_mem_profile_debugfs.c \
mmu/mali_kbase_mmu.c \
mmu/mali_kbase_mmu_hw_direct.c \
- mmu/mali_kbase_mmu_mode_lpae.c \
mmu/mali_kbase_mmu_mode_aarch64.c \
mali_kbase_disjoint_events.c \
mali_kbase_debug_mem_view.c \
@@ -170,9 +169,6 @@ ifeq ($(CONFIG_MALI_CINSTR_GWT),y)
SRC += mali_kbase_gwt.c
endif
-ifeq ($(MALI_UNIT_TEST),1)
- SRC += tl/mali_kbase_timeline_test.c
-endif
ifeq ($(MALI_CUSTOMER_RELEASE),0)
SRC += mali_kbase_regs_dump_debugfs.c
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 2ba2d77..84103af 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -27,7 +27,7 @@ ifeq ($(KBUILD_EXTMOD),)
export CONFIG_MALI_MIDGARD?=m
ifneq ($(CONFIG_MALI_MIDGARD),n)
-export CONFIF_MALI_CSF_SUPPORT?=n
+export CONFIG_MALI_CSF_SUPPORT?=n
export CONFIG_MALI_KUTF?=m
export CONFIG_MALI_REAL_HW?=y
@@ -39,7 +39,7 @@ export CONFIG_MALI_DEVFREQ?=y
endif
DEFINES += -DCONFIG_MALI_MIDGARD=$(CONFIG_MALI_MIDGARD) \
- -DCONFIF_MALI_CSF_SUPPORT=$(CONFIF_MALI_CSF_SUPPORT) \
+ -DCONFIG_MALI_CSF_SUPPORT=$(CONFIG_MALI_CSF_SUPPORT) \
-DCONFIG_MALI_KUTF=$(CONFIG_MALI_KUTF) \
-DCONFIG_MALI_REAL_HW=$(CONFIG_MALI_REAL_HW) \
-DCONFIG_MALI_GATOR_SUPPORT=$(CONFIG_MALI_GATOR_SUPPORT) \
@@ -50,13 +50,8 @@ export DEFINES
endif
endif
-BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
KBASE_PATH_RELATIVE = $(CURDIR)
-ifeq ($(CONFIG_MALI_BUSLOG),y)
-#Add bus logger symbols
-EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
-endif
# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
all:
diff --git a/mali_kbase/arbiter/mali_kbase_arbif.c b/mali_kbase/arbiter/mali_kbase_arbif.c
index 5ed5f80..7d6ab0c 100644
--- a/mali_kbase/arbiter/mali_kbase_arbif.c
+++ b/mali_kbase/arbiter/mali_kbase_arbif.c
@@ -30,6 +30,66 @@
#include <linux/of_platform.h>
#include "mali_kbase_arbiter_interface.h"
+/* Arbiter interface version against which was implemented this module */
+#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5
+#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \
+ MALI_KBASE_ARBITER_INTERFACE_VERSION
+#error "Unsupported Mali Arbiter interface version."
+#endif
+
+static void on_max_config(struct device *dev, uint32_t max_l2_slices,
+ uint32_t max_core_mask)
+{
+ struct kbase_device *kbdev;
+
+ if (!dev) {
+ pr_err("%s(): dev is NULL", __func__);
+ return;
+ }
+
+ kbdev = dev_get_drvdata(dev);
+ if (!kbdev) {
+ dev_err(dev, "%s(): kbdev is NULL", __func__);
+ return;
+ }
+
+ if (!max_l2_slices || !max_core_mask) {
+ dev_dbg(dev,
+ "%s(): max_config ignored as one of the fields is zero",
+ __func__);
+ return;
+ }
+
+ /* set the max config info in the kbase device */
+ kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask);
+}
+
+/**
+ * on_update_freq() - Updates GPU clock frequency
+ * @dev: arbiter interface device handle
+ * @freq: GPU clock frequency value reported from arbiter
+ *
+ * call back function to update GPU clock frequency with
+ * new value from arbiter
+ */
+static void on_update_freq(struct device *dev, uint32_t freq)
+{
+ struct kbase_device *kbdev;
+
+ if (!dev) {
+ pr_err("%s(): dev is NULL", __func__);
+ return;
+ }
+
+ kbdev = dev_get_drvdata(dev);
+ if (!kbdev) {
+ dev_err(dev, "%s(): kbdev is NULL", __func__);
+ return;
+ }
+
+ kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq);
+}
+
/**
* on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop
* @dev: arbiter interface device handle
@@ -38,7 +98,18 @@
*/
static void on_gpu_stop(struct device *dev)
{
- struct kbase_device *kbdev = dev_get_drvdata(dev);
+ struct kbase_device *kbdev;
+
+ if (!dev) {
+ pr_err("%s(): dev is NULL", __func__);
+ return;
+ }
+
+ kbdev = dev_get_drvdata(dev);
+ if (!kbdev) {
+ dev_err(dev, "%s(): kbdev is NULL", __func__);
+ return;
+ }
KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev);
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT);
@@ -52,7 +123,18 @@ static void on_gpu_stop(struct device *dev)
*/
static void on_gpu_granted(struct device *dev)
{
- struct kbase_device *kbdev = dev_get_drvdata(dev);
+ struct kbase_device *kbdev;
+
+ if (!dev) {
+ pr_err("%s(): dev is NULL", __func__);
+ return;
+ }
+
+ kbdev = dev_get_drvdata(dev);
+ if (!kbdev) {
+ dev_err(dev, "%s(): kbdev is NULL", __func__);
+ return;
+ }
KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev);
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT);
@@ -66,7 +148,18 @@ static void on_gpu_granted(struct device *dev)
*/
static void on_gpu_lost(struct device *dev)
{
- struct kbase_device *kbdev = dev_get_drvdata(dev);
+ struct kbase_device *kbdev;
+
+ if (!dev) {
+ pr_err("%s(): dev is NULL", __func__);
+ return;
+ }
+
+ kbdev = dev_get_drvdata(dev);
+ if (!kbdev) {
+ dev_err(dev, "%s(): kbdev is NULL", __func__);
+ return;
+ }
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT);
}
@@ -122,6 +215,12 @@ int kbase_arbif_init(struct kbase_device *kbdev)
ops.arb_vm_gpu_stop = on_gpu_stop;
ops.arb_vm_gpu_granted = on_gpu_granted;
ops.arb_vm_gpu_lost = on_gpu_lost;
+ ops.arb_vm_max_config = on_max_config;
+ ops.arb_vm_update_freq = on_update_freq;
+
+
+ kbdev->arb.arb_freq.arb_freq = 0;
+ mutex_init(&kbdev->arb.arb_freq.arb_freq_lock);
/* register kbase arbiter_if callbacks */
if (arb_if->vm_ops.vm_arb_register_dev) {
@@ -133,6 +232,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
return err;
}
}
+
#else /* CONFIG_OF */
dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n");
kbdev->arb.arb_dev = NULL;
@@ -162,6 +262,22 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
}
/**
+ * kbase_arbif_get_max_config() - Request max config info
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * call back function from arb interface to arbiter requesting max config info
+ */
+void kbase_arbif_get_max_config(struct kbase_device *kbdev)
+{
+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
+
+ if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) {
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+ arb_if->vm_ops.vm_arb_get_max_config(arb_if);
+ }
+}
+
+/**
* kbase_arbif_gpu_request() - Request GPU from
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
@@ -173,6 +289,7 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev)
if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) {
dev_dbg(kbdev->dev, "%s\n", __func__);
+ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev);
arb_if->vm_ops.vm_arb_gpu_request(arb_if);
}
}
diff --git a/mali_kbase/arbiter/mali_kbase_arbif.h b/mali_kbase/arbiter/mali_kbase_arbif.h
index c6a2031..710559c 100644
--- a/mali_kbase/arbiter/mali_kbase_arbif.h
+++ b/mali_kbase/arbiter/mali_kbase_arbif.h
@@ -72,6 +72,14 @@ int kbase_arbif_init(struct kbase_device *kbdev);
void kbase_arbif_destroy(struct kbase_device *kbdev);
/**
+ * kbase_arbif_get_max_config() - Request max config info
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * call back function from arb interface to arbiter requesting max config info
+ */
+void kbase_arbif_get_max_config(struct kbase_device *kbdev);
+
+/**
* kbase_arbif_gpu_request() - Send GPU request message to the arbiter
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
index c754b6e..586c5d4 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,6 +44,8 @@
* @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU
* @vm_arb_starting: Work queue resume in progress
* @vm_arb_stopping: Work queue suspend in progress
+ * @interrupts_installed: Flag set when interrupts are installed
+ * @vm_request_timer: Timer to monitor GPU request
*/
struct kbase_arbiter_vm_state {
struct kbase_device *kbdev;
@@ -55,6 +57,8 @@ struct kbase_arbiter_vm_state {
struct work_struct vm_resume_work;
bool vm_arb_starting;
bool vm_arb_stopping;
+ bool interrupts_installed;
+ struct hrtimer vm_request_timer;
};
/**
@@ -62,10 +66,12 @@ struct kbase_arbiter_vm_state {
* allocated from the probe method of Mali driver
* @arb_if: Pointer to the arbiter interface device
* @arb_dev: Pointer to the arbiter device
+ * @arb_freq: GPU clock frequency retrieved from arbiter.
*/
struct kbase_arbiter_device {
struct arbiter_if_dev *arb_if;
struct device *arb_dev;
+ struct kbase_arbiter_freq arb_freq;
};
#endif /* _MALI_KBASE_ARBITER_DEFS_H_ */
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
index 958b0a1..84389e8 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
#define _MALI_KBASE_ARBITER_INTERFACE_H_
/**
- * @brief Mali arbiter interface version
+ * Mali arbiter interface version
*
* This specifies the current version of the configuration interface. Whenever
* the arbiter interface changes, so that integration effort is required, the
@@ -39,8 +39,15 @@
* 1 - Added the Mali arbiter configuration interface.
* 2 - Strip out reference code from header
* 3 - Removed DVFS utilization interface (DVFS moved to arbiter side)
+ * 4 - Added max_config support
+ * 5 - Added GPU clock frequency reporting support from arbiter
*/
-#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3
+#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
+
+/**
+ * NO_FREQ is used in case platform doesn't support reporting frequency
+ */
+#define NO_FREQ 0
struct arbiter_if_dev;
@@ -86,6 +93,27 @@ struct arbiter_if_arb_vm_ops {
* If successful, will respond with a vm_arb_gpu_stopped message.
*/
void (*arb_vm_gpu_lost)(struct device *dev);
+
+ /**
+ * arb_vm_max_config() - Send max config info to the VM
+ * @dev: The arbif kernel module device.
+ * @max_l2_slices: The maximum number of L2 slices.
+ * @max_core_mask: The largest core mask.
+ *
+ * Informs KBase the maximum resources that can be allocated to the
+ * partition in use.
+ */
+ void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices,
+ uint32_t max_core_mask);
+
+ /**
+ * arb_vm_update_freq() - GPU clock frequency has been updated
+ * @dev: The arbif kernel module device.
+ * @freq: GPU clock frequency value reported from arbiter
+ *
+ * Informs KBase that the GPU clock frequency has been updated.
+ */
+ void (*arb_vm_update_freq)(struct device *dev, uint32_t freq);
};
/**
@@ -115,6 +143,13 @@ struct arbiter_if_vm_arb_ops {
void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev);
/**
+ * vm_arb_gpu_get_max_config() - Request the max config from the
+ * Arbiter.
+ * @arbif_dev: The arbiter interface we want to issue the request.
+ */
+ void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev);
+
+ /**
* vm_arb_gpu_request() - Ask the arbiter interface for GPU access.
* @arbif_dev: The arbiter interface we want to issue the request.
*/
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
index 08a6872..456cc70 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,14 +20,33 @@
*/
/**
- * @file mali_kbase_arbiter_pm.c
+ * @file
* Mali arbiter power manager state machine and APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
#include <mali_kbase_irq_internal.h>
+#include <mali_kbase_hwcnt_context.h>
+#include <mali_kbase_pm_internal.h>
#include <tl/mali_kbase_tracepoints.h>
+#include <mali_kbase_gpuprops.h>
+
+/* A dmesg warning will occur if the GPU is not granted
+ * after the following time (in milliseconds) has ellapsed.
+ */
+#define GPU_REQUEST_TIMEOUT 1000
+
+#define MAX_L2_SLICES_MASK 0xFF
+
+/* Maximum time in ms, before deferring probe incase
+ * GPU_GRANTED message is not received
+ */
+static int gpu_req_timeout = 1;
+module_param(gpu_req_timeout, int, 0644);
+MODULE_PARM_DESC(gpu_req_timeout,
+ "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe");
static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev);
static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
@@ -195,6 +214,60 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
}
/**
+ * request_timer_callback() - Issue warning on request timer expiration
+ * @timer: Request hr timer data
+ *
+ * Called when the Arbiter takes too long to grant the GPU after a
+ * request has been made. Issues a warning in dmesg.
+ *
+ * Return: Always returns HRTIMER_NORESTART
+ */
+static enum hrtimer_restart request_timer_callback(struct hrtimer *timer)
+{
+ struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer,
+ struct kbase_arbiter_vm_state, vm_request_timer);
+
+ KBASE_DEBUG_ASSERT(arb_vm_state);
+ KBASE_DEBUG_ASSERT(arb_vm_state->kbdev);
+
+ dev_warn(arb_vm_state->kbdev->dev,
+ "Still waiting for GPU to be granted from Arbiter after %d ms\n",
+ GPU_REQUEST_TIMEOUT);
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * start_request_timer() - Start a timer after requesting GPU
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Start a timer to track when kbase is waiting for the GPU from the
+ * Arbiter. If the timer expires before GPU is granted, a warning in
+ * dmesg will be issued.
+ */
+static void start_request_timer(struct kbase_device *kbdev)
+{
+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+
+ hrtimer_start(&arb_vm_state->vm_request_timer,
+ HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT),
+ HRTIMER_MODE_REL);
+}
+
+/**
+ * cancel_request_timer() - Stop the request timer
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Stops the request timer once GPU has been granted. Safe to call
+ * even if timer is no longer running.
+ */
+static void cancel_request_timer(struct kbase_device *kbdev)
+{
+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+
+ hrtimer_cancel(&arb_vm_state->vm_request_timer);
+}
+
+/**
* kbase_arbiter_pm_early_init() - Initialize arbiter for VM
* Paravirtualized use.
* @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -230,6 +303,10 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq);
arb_vm_state->vm_arb_starting = false;
atomic_set(&kbdev->pm.gpu_users_waiting, 0);
+ hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ arb_vm_state->vm_request_timer.function =
+ request_timer_callback;
kbdev->pm.arb_vm_state = arb_vm_state;
err = kbase_arbif_init(kbdev);
@@ -237,17 +314,31 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
dev_err(kbdev->dev, "Failed to initialise arbif module\n");
goto arbif_init_fail;
}
+
if (kbdev->arb.arb_if) {
kbase_arbif_gpu_request(kbdev);
dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n");
- wait_event(arb_vm_state->vm_state_wait,
+ err = wait_event_timeout(arb_vm_state->vm_state_wait,
arb_vm_state->vm_state ==
- KBASE_VM_STATE_INITIALIZING_WITH_GPU);
+ KBASE_VM_STATE_INITIALIZING_WITH_GPU,
+ msecs_to_jiffies(gpu_req_timeout));
+
+ if (!err) {
+ dev_dbg(kbdev->dev,
+ "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n",
+ gpu_req_timeout);
+ err = -EPROBE_DEFER;
+ goto arbif_eprobe_defer;
+ }
+
dev_dbg(kbdev->dev,
"Waiting for initial GPU assignment - done\n");
}
return 0;
+arbif_eprobe_defer:
+ kbase_arbiter_pm_early_term(kbdev);
+ return err;
arbif_init_fail:
destroy_workqueue(arb_vm_state->vm_arb_wq);
kfree(arb_vm_state);
@@ -265,14 +356,15 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
{
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+ cancel_request_timer(kbdev);
mutex_lock(&arb_vm_state->vm_state_lock);
if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) {
kbase_pm_set_gpu_lost(kbdev, false);
kbase_arbif_gpu_stopped(kbdev, false);
}
mutex_unlock(&arb_vm_state->vm_state_lock);
- kbase_arbif_destroy(kbdev);
destroy_workqueue(arb_vm_state->vm_arb_wq);
+ kbase_arbif_destroy(kbdev);
arb_vm_state->vm_arb_wq = NULL;
kfree(kbdev->pm.arb_vm_state);
kbdev->pm.arb_vm_state = NULL;
@@ -282,19 +374,36 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
* kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
- * Releases interrupts if needed (GPU is available) otherwise does nothing
+ * Releases interrupts and set the interrupt flag to false
*/
void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
{
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
mutex_lock(&arb_vm_state->vm_state_lock);
- if (!kbdev->arb.arb_if ||
- arb_vm_state->vm_state >
- KBASE_VM_STATE_STOPPED_GPU_REQUESTED)
+ if (arb_vm_state->interrupts_installed == true) {
+ arb_vm_state->interrupts_installed = false;
kbase_release_interrupts(kbdev);
+ }
+ mutex_unlock(&arb_vm_state->vm_state_lock);
+}
+/**
+ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Install interrupts and set the interrupt_install flag to true.
+ */
+int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
+{
+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+ int err;
+
+ mutex_lock(&arb_vm_state->vm_state_lock);
+ arb_vm_state->interrupts_installed = true;
+ err = kbase_install_interrupts(kbdev);
mutex_unlock(&arb_vm_state->vm_state_lock);
+ return err;
}
/**
@@ -317,7 +426,12 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
dev_dbg(kbdev->dev, "%s %s\n", __func__,
kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
- kbase_release_interrupts(kbdev);
+
+ if (arb_vm_state->interrupts_installed) {
+ arb_vm_state->interrupts_installed = false;
+ kbase_release_interrupts(kbdev);
+ }
+
switch (arb_vm_state->vm_state) {
case KBASE_VM_STATE_STOPPING_ACTIVE:
request_gpu = true;
@@ -338,6 +452,71 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
kbase_pm_set_gpu_lost(kbdev, false);
kbase_arbif_gpu_stopped(kbdev, request_gpu);
+ if (request_gpu)
+ start_request_timer(kbdev);
+}
+
+void kbase_arbiter_set_max_config(struct kbase_device *kbdev,
+ uint32_t max_l2_slices,
+ uint32_t max_core_mask)
+{
+ struct kbase_arbiter_vm_state *arb_vm_state;
+ struct max_config_props max_config;
+
+ if (!kbdev)
+ return;
+
+ /* Mask the max_l2_slices as it is stored as 8 bits into kbase */
+ max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK;
+ max_config.core_mask = max_core_mask;
+ arb_vm_state = kbdev->pm.arb_vm_state;
+
+ mutex_lock(&arb_vm_state->vm_state_lock);
+ /* Just set the max_props in kbase during initialization. */
+ if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING)
+ kbase_gpuprops_set_max_config(kbdev, &max_config);
+ else
+ dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s",
+ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
+
+ mutex_unlock(&arb_vm_state->vm_state_lock);
+}
+
+int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev)
+{
+ struct kbase_arbiter_vm_state *arb_vm_state;
+ int result = -EINVAL;
+
+ if (!kbdev)
+ return result;
+
+ /* First check the GPU_LOST state */
+ kbase_pm_lock(kbdev);
+ if (kbase_pm_is_gpu_lost(kbdev)) {
+ kbase_pm_unlock(kbdev);
+ return 0;
+ }
+ kbase_pm_unlock(kbdev);
+
+ /* Then the arbitration state machine */
+ arb_vm_state = kbdev->pm.arb_vm_state;
+
+ mutex_lock(&arb_vm_state->vm_state_lock);
+ switch (arb_vm_state->vm_state) {
+ case KBASE_VM_STATE_INITIALIZING:
+ case KBASE_VM_STATE_SUSPENDED:
+ case KBASE_VM_STATE_STOPPED:
+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
+ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
+ result = 0;
+ break;
+ default:
+ result = 1;
+ break;
+ }
+ mutex_unlock(&arb_vm_state->vm_state_lock);
+
+ return result;
}
/**
@@ -351,6 +530,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
lockdep_assert_held(&arb_vm_state->vm_state_lock);
+ cancel_request_timer(kbdev);
switch (arb_vm_state->vm_state) {
case KBASE_VM_STATE_INITIALIZING:
kbase_arbiter_pm_vm_set_state(kbdev,
@@ -358,7 +538,14 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
break;
case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING);
+ arb_vm_state->interrupts_installed = true;
kbase_install_interrupts(kbdev);
+ /*
+ * GPU GRANTED received while in stop can be a result of a
+ * repartitioning.
+ */
+ kbase_gpuprops_req_curr_config_update(kbdev);
+ /* curr_config will be updated while resuming the PM. */
queue_work(arb_vm_state->vm_arb_wq,
&arb_vm_state->vm_resume_work);
break;
@@ -591,6 +778,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
kbase_arbiter_pm_vm_set_state(kbdev,
KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
kbase_arbif_gpu_request(kbdev);
+ start_request_timer(kbdev);
/* Release lock and block resume OS function until we have
* asynchronously received the GRANT message from the Arbiter and
@@ -764,6 +952,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
kbase_arbiter_pm_vm_set_state(kbdev,
KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
kbase_arbif_gpu_request(kbdev);
+ start_request_timer(kbdev);
} else if (arb_vm_state->vm_state ==
KBASE_VM_STATE_INITIALIZING_WITH_GPU)
break;
@@ -811,3 +1000,60 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
}
return res;
}
+
+/**
+ * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received
+ * from arbiter.
+ * @arb_freq - Pointer to struchture holding GPU clock frequenecy data
+ * @freq - New frequency value
+ */
+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
+ uint32_t freq)
+{
+ mutex_lock(&arb_freq->arb_freq_lock);
+ arb_freq->arb_freq = freq;
+ mutex_unlock(&arb_freq->arb_freq_lock);
+}
+
+/**
+ * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
+ * @kbdev - kbase_device pointer
+ * @index - GPU clock index
+ *
+ * Returns pointer to structure holding GPU clock frequency data reported from
+ * arbiter, only index 0 is valid.
+ */
+static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
+ unsigned int index)
+{
+ if (index == 0)
+ return &kbdev->arb.arb_freq;
+ return NULL;
+}
+
+/**
+ * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
+ * @kbdev - kbase_device pointer
+ * @index - GPU clock index
+ *
+ * Returns the GPU clock frequency value saved when gpu is granted from arbiter
+ */
+static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
+ void *gpu_clk_handle)
+{
+ uint32_t freq;
+ struct kbase_arbiter_freq *arb_dev_freq =
+ (struct kbase_arbiter_freq *) gpu_clk_handle;
+
+ mutex_lock(&arb_dev_freq->arb_freq_lock);
+ freq = arb_dev_freq->arb_freq;
+ mutex_unlock(&arb_dev_freq->arb_freq_lock);
+ return freq;
+}
+
+struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = {
+ .get_gpu_clk_rate = get_arb_gpu_clk_rate,
+ .enumerate_gpu_clk = enumerate_arb_gpu_clk,
+ .gpu_clk_notifier_register = NULL,
+ .gpu_clk_notifier_unregister = NULL
+};
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
index ef82271..0f74b63 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -93,11 +93,19 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev);
* kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
- * Releases interrupts if needed (GPU is available) otherwise does nothing
+ * Releases interrupts and set the interrupt flag to false
*/
void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
/**
+ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Install interrupts and set the interrupt_install flag to true.
+ */
+int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
+
+/**
* kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
@@ -133,4 +141,42 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
*/
void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev);
+/**
+ * kbase_arbiter_set_max_config() - Set the max config data in kbase device.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer).
+ * @max_l2_slices: The maximum number of L2 slices.
+ * @max_core_mask: The largest core mask.
+ *
+ * This function handles a stop event for the VM.
+ * It will update the VM state and forward the stop event to the driver.
+ */
+void kbase_arbiter_set_max_config(struct kbase_device *kbdev,
+ uint32_t max_l2_slices,
+ uint32_t max_core_mask);
+
+/**
+ * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the VM does not have access, 1 if it does, and a negative number
+ * if an error occurred
+ */
+int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev);
+
+extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops;
+
+/**
+ * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved
+ * from arbiter
+ * @arb_freq: GPU clock frequency value
+ * @arb_freq_lock: Mutex protecting access to arbfreq value
+ */
+struct kbase_arbiter_freq {
+ uint32_t arb_freq;
+ struct mutex arb_freq_lock;
+};
+
+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
+ uint32_t freq);
+
#endif /*_MALI_KBASE_ARBITER_PM_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
index 84fb1fc..fcf4e5b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2014-2016, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,7 @@
#define _KBASE_CACHE_POLICY_BACKEND_H_
#include "mali_kbase.h"
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
/**
* kbase_cache_set_coherency_mode() - Sets the system coherency mode
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index dcd1b02..7076ab4 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,38 @@
#define CLK_RATE_TRACE_OPS (NULL)
#endif
+/**
+ * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops.
+ * @kbdev: Pointer to kbase device, used to check if arbitration is enabled
+ * when compiled with arbiter support.
+ * Return: Pointer to clk trace ops if supported or NULL.
+ */
+static struct kbase_clk_rate_trace_op_conf *
+get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused)
+{
+ /* base case */
+ struct kbase_clk_rate_trace_op_conf *callbacks =
+ (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
+ const void *arbiter_if_node;
+
+ if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev))
+ return callbacks;
+
+ arbiter_if_node =
+ of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+ /* Arbitration enabled, override the callback pointer.*/
+ if (arbiter_if_node)
+ callbacks = &arb_clk_rate_trace_ops;
+ else
+ dev_dbg(kbdev->dev,
+ "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n");
+
+#endif
+
+ return callbacks;
+}
+
static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
@@ -69,12 +101,13 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
static int gpu_clk_data_init(struct kbase_device *kbdev,
void *gpu_clk_handle, unsigned int index)
{
- struct kbase_clk_rate_trace_op_conf *callbacks =
- (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+ struct kbase_clk_rate_trace_op_conf *callbacks;
struct kbase_clk_data *clk_data;
struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
int ret = 0;
+ callbacks = get_clk_rate_trace_callbacks(kbdev);
+
if (WARN_ON(!callbacks) ||
WARN_ON(!gpu_clk_handle) ||
WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS))
@@ -108,8 +141,9 @@ static int gpu_clk_data_init(struct kbase_device *kbdev,
clk_data->clk_rate_change_nb.notifier_call =
gpu_clk_rate_change_notifier;
- ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle,
- &clk_data->clk_rate_change_nb);
+ if (callbacks->gpu_clk_notifier_register)
+ ret = callbacks->gpu_clk_notifier_register(kbdev,
+ gpu_clk_handle, &clk_data->clk_rate_change_nb);
if (ret) {
dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index);
kfree(clk_data);
@@ -120,12 +154,13 @@ static int gpu_clk_data_init(struct kbase_device *kbdev,
int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
{
- struct kbase_clk_rate_trace_op_conf *callbacks =
- (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+ struct kbase_clk_rate_trace_op_conf *callbacks;
struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
unsigned int i;
int ret = 0;
+ callbacks = get_clk_rate_trace_callbacks(kbdev);
+
spin_lock_init(&clk_rtm->lock);
INIT_LIST_HEAD(&clk_rtm->listeners);
@@ -186,9 +221,10 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev)
if (!clk_rtm->clks[i])
break;
- clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
- kbdev, clk_rtm->clks[i]->gpu_clk_handle,
- &clk_rtm->clks[i]->clk_rate_change_nb);
+ if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister)
+ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister
+ (kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+ &clk_rtm->clks[i]->clk_rate_change_nb);
kfree(clk_rtm->clks[i]);
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index 07767c2..9b82184 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -643,7 +643,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
/* Record the maximum frequency possible */
kbdev->gpu_props.props.core_props.gpu_freq_khz_max =
dp->freq_table[0] / 1000;
- };
+ }
err = kbase_devfreq_init_core_mask_table(kbdev);
if (err) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
index 4254a64..7542209 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -121,6 +121,32 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
return -EIO;
}
+int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
+ struct kbase_current_config_regdump *curr_config_regdump)
+{
+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump))
+ return -EINVAL;
+
+ curr_config_regdump->mem_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(MEM_FEATURES));
+
+ curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SHADER_PRESENT_LO));
+ curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SHADER_PRESENT_HI));
+
+ curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_PRESENT_LO));
+ curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_PRESENT_HI));
+
+ if (WARN_ON(kbase_is_gpu_removed(kbdev)))
+ return -EIO;
+
+ return 0;
+
+}
+
int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
@@ -156,11 +182,15 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
u32 l2_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_FEATURES));
+ u32 l2_config =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
+
if (kbase_is_gpu_removed(kbdev))
return -EIO;
regdump->l2_features = l2_features;
+ regdump->l2_config = l2_config;
}
return 0;
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 9cc425e..6868dc3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -107,7 +107,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
err = 0;
- dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx);
return err;
out_err:
return err;
@@ -167,7 +167,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK",
kctx);
err = 0;
@@ -214,7 +214,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_SAMPLE);
- dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+ dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx);
err = 0;
@@ -325,7 +325,7 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
int kbase_instr_backend_init(struct kbase_device *kbdev)
{
- int ret = 0;
+ spin_lock_init(&kbdev->hwcnt.lock);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
@@ -344,12 +344,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
#endif
#endif
- return ret;
+ return 0;
}
void kbase_instr_backend_term(struct kbase_device *kbdev)
{
- (void)kbdev;
+ CSTD_UNUSED(kbdev);
}
#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
index 39b009d..05d5193 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2014, 2016, 2018, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 7cfca97..e84f3a9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -40,10 +40,12 @@
#include <mali_kbase_regs_history_debugfs.h>
static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
+static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
+ const u64 affinity, const u64 limited_core_mask);
static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
base_jd_core_req core_req,
- int js)
+ int js, const u64 limited_core_mask)
{
u64 affinity;
@@ -72,14 +74,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
*/
if (js == 2 && num_core_groups > 1)
affinity &= coherency_info->group[1].core_mask;
- else
+ else if (num_core_groups > 1)
affinity &= coherency_info->group[0].core_mask;
+ else
+ affinity &= kbdev->gpu_props.curr_config.shader_present;
} else {
/* Use all cores */
affinity = kbdev->pm.backend.shaders_avail &
kbdev->pm.debug_core_mask[js];
}
+ if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) {
+ /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */
+ affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
+ }
+
if (unlikely(!affinity)) {
#ifdef CONFIG_MALI_DEBUG
u64 shaders_ready =
@@ -89,6 +98,16 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
#endif
affinity = kbdev->pm.backend.shaders_avail;
+
+ if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) {
+ /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */
+ affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
+
+#ifdef CONFIG_MALI_DEBUG
+ /* affinity should never be 0 */
+ WARN_ON(!affinity);
+#endif
+ }
}
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
@@ -169,7 +188,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
}
dev_dbg(kctx->kbdev->dev,
- "Selected job chain 0x%llx for end atom %p in state %d\n",
+ "Selected job chain 0x%llx for end atom %pK in state %d\n",
jc, (void *)katom, (int)rp->state);
katom->jc = jc;
@@ -193,7 +212,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
/* Command register must be available */
KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
- dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n",
+ dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
jc_head, (void *)katom);
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
@@ -201,7 +220,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
jc_head >> 32);
- affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
+ affinity = kbase_job_write_affinity(kbdev, katom->core_req, js,
+ kctx->limited_core_mask);
/* start MMU, medium priority, cache clean/flush on end, clean/flush on
* start
@@ -257,7 +277,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
katom->start_timestamp = ktime_get();
/* GO ! */
- dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
+ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx",
katom, kctx, js, jc_head);
KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
@@ -431,7 +451,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
- if (kbase_prepare_to_reset_gpu_locked(kbdev))
+ if (kbase_prepare_to_reset_gpu_locked(
+ kbdev,
+ RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
}
}
@@ -789,7 +811,7 @@ static int softstop_start_rp_nolock(
if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) {
dev_dbg(kctx->kbdev->dev,
- "Atom %p on job slot is not start RP\n", (void *)katom);
+ "Atom %pK on job slot is not start RP\n", (void *)katom);
return -EPERM;
}
@@ -802,13 +824,13 @@ static int softstop_start_rp_nolock(
rp->state != KBASE_JD_RP_RETRY))
return -EINVAL;
- dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n",
+ dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n",
(int)rp->state, (void *)reg);
if (WARN_ON(katom != rp->start_katom))
return -EINVAL;
- dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n",
+ dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n",
(void *)reg, (void *)&rp->oom_reg_list);
list_move_tail(&reg->link, &rp->oom_reg_list);
dev_dbg(kctx->kbdev->dev, "Added region to list\n");
@@ -853,7 +875,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
if (timeout != 0)
goto exit;
- if (kbase_prepare_to_reset_gpu(kbdev)) {
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) {
dev_err(kbdev->dev,
"Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
ZAP_TIMEOUT);
@@ -863,7 +885,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
/* Wait for the reset to complete */
kbase_reset_gpu_wait(kbdev);
exit:
- dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+ dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx);
/* Ensure that the signallers of the waitqs have finished */
mutex_lock(&kctx->jctx.lock);
@@ -924,7 +946,7 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags)
{
- dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n",
+ dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
@@ -1337,6 +1359,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
/**
* kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
* @kbdev: kbase device
+ * @flags: Bitfield indicating impact of reset (see flag defines)
*
* This function just soft-stops all the slots to ensure that as many jobs as
* possible are saved.
@@ -1347,10 +1370,12 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
* false - Another thread is performing a reset, kbase_reset_gpu should
* not be called.
*/
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
+ unsigned int flags)
{
int i;
+ CSTD_UNUSED(flags);
KBASE_DEBUG_ASSERT(kbdev);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
@@ -1378,14 +1403,14 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
return true;
}
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
{
- unsigned long flags;
+ unsigned long lock_flags;
bool ret;
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- ret = kbase_prepare_to_reset_gpu_locked(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags);
+ ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags);
return ret;
}
@@ -1506,3 +1531,21 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
}
+
+static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
+ const u64 affinity, const u64 limited_core_mask)
+{
+ const u64 result = affinity & limited_core_mask;
+
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(kbdev->dev,
+ "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
+ (unsigned long int)affinity,
+ (unsigned long int)result,
+ (unsigned long int)limited_core_mask);
+#else
+ CSTD_UNUSED(kbdev);
+#endif
+
+ return result;
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 7104658..5fdf9b6 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1024,7 +1024,7 @@ void kbase_backend_run_atom(struct kbase_device *kbdev,
struct kbase_jd_atom *katom)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom);
+ dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom);
kbase_gpu_enqueue_atom(kbdev, katom);
kbase_backend_slot_update(kbdev);
@@ -1085,7 +1085,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
struct kbase_context *kctx = katom->kctx;
dev_dbg(kbdev->dev,
- "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
+ "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
(void *)katom, completion_code, job_tail, js);
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1205,7 +1205,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
if (job_tail != 0 && job_tail != katom->jc) {
/* Some of the job has been executed */
dev_dbg(kbdev->dev,
- "Update job chain address of atom %p to resume from 0x%llx\n",
+ "Update job chain address of atom %pK to resume from 0x%llx\n",
(void *)katom, job_tail);
katom->jc = job_tail;
@@ -1266,7 +1266,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
if (katom) {
dev_dbg(kbdev->dev,
- "Cross-slot dependency %p has become runnable.\n",
+ "Cross-slot dependency %pK has become runnable.\n",
(void *)katom);
/* Check if there are lower priority jobs to soft stop */
@@ -1666,7 +1666,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
if (katom)
dev_info(kbdev->dev,
- " js%d idx%d : katom=%p gpu_rb_state=%d\n",
+ " js%d idx%d : katom=%pK gpu_rb_state=%d\n",
js, idx, katom, katom->gpu_rb_state);
else
dev_info(kbdev->dev, " js%d idx%d : empty\n",
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index d28e7b0..cab222d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -257,7 +257,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
if (reset_needed) {
dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve.");
- if (kbase_prepare_to_reset_gpu_locked(kbdev))
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
}
/* the timer is re-issued if there is contexts in the run-pool */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index 921849b..0cfa93c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -498,7 +498,15 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
/* PM state was updated while we were doing the disable,
* so we need to undo the disable we just performed.
*/
+#if MALI_USE_CSF
+ unsigned long lock_flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &lock_flags);
+#endif
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+#if MALI_USE_CSF
+ kbase_csf_scheduler_spin_unlock(kbdev, lock_flags);
+#endif
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -664,10 +672,15 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
if (kbdev->pm.backend.hwcnt_disabled) {
unsigned long flags;
-
+#if MALI_USE_CSF
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+#else
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
}
/* Free any resources the policy allocated */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index c546766..3cf7608 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -102,10 +102,18 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->hwaccess_lock);
#ifdef CONFIG_MALI_DEVFREQ
- return kbdev->pm.backend.ca_cores_enabled & debug_core_mask;
+ /*
+ * Although in the init we let the pm_backend->ca_cores_enabled to be
+ * the max config (it uses the base_gpu_props), at this function we need
+ * to limit it to be a subgroup of the curr config, otherwise the
+ * shaders state machine on the PM does not evolve.
+ */
+ return kbdev->gpu_props.curr_config.shader_present &
+ kbdev->pm.backend.ca_cores_enabled &
+ debug_core_mask;
#else
- return kbdev->gpu_props.props.raw_props.shader_present &
- debug_core_mask;
+ return kbdev->gpu_props.curr_config.shader_present &
+ debug_core_mask;
#endif
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index 1b4e141..0687a43 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -61,24 +61,9 @@ enum kbase_pm_core_type {
KBASE_PM_CORE_STACK = STACK_PRESENT_LO
};
-/**
+/*
* enum kbase_l2_core_state - The states used for the L2 cache & tiler power
* state machine.
- *
- * @KBASE_L2_OFF: The L2 cache and tiler are off
- * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on
- * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used.
- * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being
- * enabled
- * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled
- * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being
- * disabled
- * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest
- * clock. Conditionally used.
- * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off
- * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off
- * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state
- * are unknown
*/
enum kbase_l2_core_state {
#define KBASEP_L2_STATE(n) KBASE_L2_ ## n,
@@ -87,26 +72,8 @@ enum kbase_l2_core_state {
};
#if MALI_USE_CSF
-/**
+/*
* enum kbase_mcu_state - The states used for the MCU state machine.
- *
- * @KBASE_MCU_OFF: The MCU is powered off.
- * @KBASE_MCU_PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with
- * firmware reloading) is in progress.
- * @KBASE_MCU_ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration
- * requests have been sent to the firmware.
- * @KBASE_MCU_ON_HWCNT_ENABLE: The Global requests have completed and MCU is
- * now ready for use and hwcnt is being enabled.
- * @KBASE_MCU_ON: The MCU is active and hwcnt has been enabled.
- * @KBASE_MCU_ON_CORE_MASK_UPDATE_PEND: The MCU is active and mask of enabled
- * shader cores is being updated.
- * @KBASE_MCU_ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled.
- * @KBASE_MCU_ON_HALT: The MCU is on and hwcnt has been disabled,
- * MCU halt would be triggered.
- * @KBASE_MCU_ON_PEND_HALT: MCU halt in progress, confirmation pending.
- * @KBASE_MCU_POWER_DOWN: MCU halted operations, pending being disabled.
- * @KBASE_MCU_PEND_OFF: MCU is being disabled, pending on powering off.
- * @KBASE_MCU_RESET_WAIT: The GPU is resetting, MCU state is unknown.
*/
enum kbase_mcu_state {
#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n,
@@ -115,45 +82,8 @@ enum kbase_mcu_state {
};
#endif
-/**
+/*
* enum kbase_shader_core_state - The states used for the shaders' state machine.
- *
- * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off
- * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have
- * been requested to power on and hwcnt
- * is being disabled
- * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
- * requested to power on. Or after doing
- * partial shader on/off, checking whether
- * it's the desired state.
- * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt
- * already enabled.
- * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks
- * are on, hwcnt disabled, and checks
- * to powering down or re-enabling
- * hwcnt.
- * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to
- * power off, but they remain on for the
- * duration of the hysteresis timer
- * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach
- * a state where jobs on the GPU are finished
- * including jobs currently running and in the
- * GPU queue because of GPU2017-861
- * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
- * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the
- * level 2 cache is being flushed.
- * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders
- * are ready to be powered off.
- * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
- * have been requested to power off
- * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
- * have been requested to power off
- * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are
- * off, but the tick timer
- * cancellation is still
- * pending.
- * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power
- * states are unknown
*/
enum kbase_shader_core_state {
#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n,
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index da32510..a2f96b5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -407,9 +407,9 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
switch (type) {
case KBASE_PM_CORE_L2:
- return kbdev->gpu_props.props.raw_props.l2_present;
+ return kbdev->gpu_props.curr_config.l2_present;
case KBASE_PM_CORE_SHADER:
- return kbdev->gpu_props.props.raw_props.shader_present;
+ return kbdev->gpu_props.curr_config.shader_present;
case KBASE_PM_CORE_TILER:
return kbdev->gpu_props.props.raw_props.tiler_present;
case KBASE_PM_CORE_STACK:
@@ -695,8 +695,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_HWCNT_ENABLE:
backend->hwcnt_desired = true;
if (backend->hwcnt_disabled) {
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_hwcnt_context_enable(
kbdev->hwcnt_gpu_ctx);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
backend->hwcnt_disabled = false;
}
backend->mcu_state = KBASE_MCU_ON;
@@ -851,7 +855,7 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
- u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present;
+ u64 l2_present = kbdev->gpu_props.curr_config.l2_present;
#if !MALI_USE_CSF
u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
#endif
@@ -1255,7 +1259,6 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
&kbdev->pm.backend.shader_tick_timer;
enum kbase_shader_core_state prev_state;
u64 stacks_avail = 0;
- int err = 0;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1350,8 +1353,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
backend->pm_shaders_core_mask = shaders_ready;
backend->hwcnt_desired = true;
if (backend->hwcnt_disabled) {
+#if MALI_USE_CSF
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev,
+ &flags);
+#endif
kbase_hwcnt_context_enable(
kbdev->hwcnt_gpu_ctx);
+#if MALI_USE_CSF
+ kbase_csf_scheduler_spin_unlock(kbdev,
+ flags);
+#endif
backend->hwcnt_disabled = false;
}
@@ -1531,8 +1544,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
backend->pm_shaders_core_mask = 0;
backend->hwcnt_desired = true;
if (backend->hwcnt_disabled) {
+#if MALI_USE_CSF
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev,
+ &flags);
+#endif
kbase_hwcnt_context_enable(
kbdev->hwcnt_gpu_ctx);
+#if MALI_USE_CSF
+ kbase_csf_scheduler_spin_unlock(kbdev,
+ flags);
+#endif
backend->hwcnt_disabled = false;
}
backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
@@ -1559,7 +1582,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
} while (backend->shaders_state != prev_state);
- return err;
+ return 0;
}
#endif
@@ -1883,17 +1906,9 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev)
kbase_reg_read(kbdev, GPU_CONTROL_REG(
L2_PWRTRANS_LO)));
-#if MALI_USE_CSF
- /* PM timeout probably means hardware counters will stop working.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
-#endif
-
dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev,
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
@@ -2105,6 +2120,13 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
PM_NO_RESET);
}
}
+ /*
+ * This point means that the GPU trasitioned to ON. So there is a chance
+ * that a repartitioning occurred. In this case the current config
+ * should be read again.
+ */
+ kbase_gpuprops_get_curr_config_props(kbdev,
+ &kbdev->gpu_props.curr_config);
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -2253,7 +2275,7 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
struct kbasep_reset_timeout_data *rtdata =
container_of(timer, struct kbasep_reset_timeout_data, timer);
- rtdata->timed_out = 1;
+ rtdata->timed_out = true;
/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
* hasn't completed
@@ -2263,14 +2285,13 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id)
+static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id)
{
#if MALI_USE_CSF
- kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CSF_CONFIG));
+ kbdev->hw_quirks_gpu =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG));
#else
- u32 hw_quirks_jm = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(JM_CONFIG));
+ u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG));
if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) {
/* Only for tMIx */
@@ -2284,39 +2305,38 @@ static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id)
*/
if (coherency_features ==
COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
- hw_quirks_jm |= (COHERENCY_ACE_LITE |
- COHERENCY_ACE) <<
- JM_FORCE_COHERENCY_FEATURES_SHIFT;
+ hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE)
+ << JM_FORCE_COHERENCY_FEATURES_SHIFT;
}
}
if (kbase_is_gpu_removed(kbdev))
return -EIO;
- kbdev->hw_quirks_jm = hw_quirks_jm;
+ kbdev->hw_quirks_gpu = hw_quirks_gpu;
#endif /* !MALI_USE_CSF */
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
int default_idvs_group_size = 0xF;
- u32 tmp;
+ u32 group_size = 0;
- if (of_property_read_u32(kbdev->dev->of_node,
- "idvs-group-size", &tmp))
- tmp = default_idvs_group_size;
+ if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size",
+ &group_size))
+ group_size = default_idvs_group_size;
- if (tmp > IDVS_GROUP_MAX_SIZE) {
+ if (group_size > IDVS_GROUP_MAX_SIZE) {
dev_err(kbdev->dev,
"idvs-group-size of %d is too large. Maximum value is %d",
- tmp, IDVS_GROUP_MAX_SIZE);
- tmp = default_idvs_group_size;
+ group_size, IDVS_GROUP_MAX_SIZE);
+ group_size = default_idvs_group_size;
}
- kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT;
+ kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT;
}
#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
if (corestack_driver_control)
- kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+ kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL;
return 0;
}
@@ -2370,18 +2390,17 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
int error = 0;
- kbdev->hw_quirks_jm = 0;
+ kbdev->hw_quirks_gpu = 0;
kbdev->hw_quirks_sc = 0;
kbdev->hw_quirks_tiler = 0;
kbdev->hw_quirks_mmu = 0;
- if (!of_property_read_u32(np, "quirks_jm",
- &kbdev->hw_quirks_jm)) {
+ if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) {
dev_info(kbdev->dev,
- "Found quirks_jm = [0x%x] in Devicetree\n",
- kbdev->hw_quirks_jm);
+ "Found quirks_gpu = [0x%x] in Devicetree\n",
+ kbdev->hw_quirks_gpu);
} else {
- error = kbase_set_jm_quirks(kbdev, prod_id);
+ error = kbase_set_gpu_quirks(kbdev, prod_id);
if (error)
return error;
}
@@ -2432,10 +2451,10 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
kbdev->hw_quirks_mmu);
#if MALI_USE_CSF
kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG),
- kbdev->hw_quirks_jm);
+ kbdev->hw_quirks_gpu);
#else
kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
- kbdev->hw_quirks_jm);
+ kbdev->hw_quirks_gpu);
#endif
}
@@ -2466,6 +2485,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
}
}
+#if !MALI_USE_CSF
static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev)
{
unsigned long irq_flags;
@@ -2478,6 +2498,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev)
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
}
+#endif
static int kbase_pm_do_reset(struct kbase_device *kbdev)
{
@@ -2504,7 +2525,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
/* Initialize a structure for tracking the status of the reset */
rtdata.kbdev = kbdev;
- rtdata.timed_out = 0;
+ rtdata.timed_out = false;
/* Create a timer to use as a timeout on the reset */
hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -2516,7 +2537,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
/* Wait for the RESET_COMPLETED interrupt to be raised */
kbase_pm_wait_for_reset(kbdev);
- if (rtdata.timed_out == 0) {
+ if (!rtdata.timed_out) {
/* GPU has been reset */
hrtimer_cancel(&rtdata.timer);
destroy_hrtimer_on_stack(&rtdata.timer);
@@ -2556,7 +2577,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
GPU_COMMAND_HARD_RESET);
/* Restart the timer to wait for the hard reset to complete */
- rtdata.timed_out = 0;
+ rtdata.timed_out = false;
hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
HRTIMER_MODE_REL);
@@ -2564,7 +2585,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
/* Wait for the RESET_COMPLETED interrupt to be raised */
kbase_pm_wait_for_reset(kbdev);
- if (rtdata.timed_out == 0) {
+ if (!rtdata.timed_out) {
/* GPU has been reset */
hrtimer_cancel(&rtdata.timer);
destroy_hrtimer_on_stack(&rtdata.timer);
@@ -2637,8 +2658,13 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
#if MALI_USE_CSF
if (kbdev->protected_mode) {
+ unsigned long flags;
+
kbase_ipa_control_protm_exited(kbdev);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
#endif
kbdev->protected_mode = false;
@@ -2685,12 +2711,14 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
kbase_pm_enable_interrupts(kbdev);
exit:
+#if !MALI_USE_CSF
if (!kbdev->pm.backend.protected_entry_transition_override) {
/* Re-enable GPU hardware counters if we're resetting from
* protected mode.
*/
reenable_protected_mode_hwcnt(kbdev);
}
+#endif
return err;
}
@@ -2726,8 +2754,9 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
/* This might happen after GPU reset.
* Then counter needs to be kicked.
*/
- if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
- GPU_STATUS_CYCLE_COUNT_ACTIVE)) {
+ if (!IS_ENABLED(CONFIG_MALI_NO_MALI) &&
+ (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+ GPU_STATUS_CYCLE_COUNT_ACTIVE))) {
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CYCLE_COUNT_START);
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index f6b8485..500578f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -224,6 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
*
* Return: 0 on success, error code on error
*/
+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
#else
/**
* kbase_pm_wait_for_desired_state - Wait for the desired power state to be
@@ -247,8 +248,8 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
*
* Return: 0 on success, error code on error
*/
-#endif
int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
+#endif
/**
* kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
@@ -534,8 +535,22 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
#ifdef CONFIG_MALI_MIDGARD_DVFS
+#if MALI_USE_CSF
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev: The kbase device structure for the device (must be a
+ * valid pointer)
+ * @utilisation: The current calculated utilisation by the metrics system.
+ * Return: Returns 0 on failure and non zero on success.
+ */
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
+#else
/**
- * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU
*
* Function provided by platform specific code when DVFS is enabled to allow
* the power management metrics system to report utilisation.
@@ -548,10 +563,6 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
* group.
* Return: Returns 0 on failure and non zero on success.
*/
-
-#if MALI_USE_CSF
-int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
-#else
int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
u32 util_gl_share, u32 util_cl_share[2]);
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h
index b9bd364..d66b928 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,19 @@
* The function-like macro KBASEP_L2_STATE() must be defined before including
* this header file. This header file can be included multiple times in the
* same compilation unit with different definitions of KBASEP_L2_STATE().
+ *
+ * @OFF: The L2 cache and tiler are off
+ * @PEND_ON: The L2 cache and tiler are powering on
+ * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used.
+ * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled
+ * @ON: The L2 cache and tiler are on, and hwcnt is enabled
+ * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled
+ * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock.
+ * Conditionally used.
+ * @POWER_DOWN: The L2 cache and tiler are about to be powered off
+ * @PEND_OFF: The L2 cache and tiler are powering off
+ * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are
+ * unknown
*/
KBASEP_L2_STATE(OFF)
KBASEP_L2_STATE(PEND_ON)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
index c03adf3..eab30eb 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,24 @@
* The function-like macro KBASEP_MCU_STATE() must be defined before including
* this header file. This header file can be included multiple times in the
* same compilation unit with different definitions of KBASEP_MCU_STATE().
+ *
+ * @OFF: The MCU is powered off.
+ * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with
+ * firmware reloading) is in progress.
+ * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration
+ * requests have been sent to the firmware.
+ * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now
+ * ready for use and hwcnt is being enabled.
+ * @ON: The MCU is active and hwcnt has been enabled.
+ * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores
+ * is being updated.
+ * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled.
+ * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU
+ * halt would be triggered.
+ * @ON_PEND_HALT: MCU halt in progress, confirmation pending.
+ * @POWER_DOWN: MCU halted operations, pending being disabled.
+ * @PEND_OFF: MCU is being disabled, pending on powering off.
+ * @RESET_WAIT: The GPU is resetting, MCU state is unknown.
*/
KBASEP_MCU_STATE(OFF)
KBASEP_MCU_STATE(PEND_ON_RELOAD)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index e5c7c71..769888f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -360,9 +360,9 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
{
int utilisation;
- int busy;
struct kbasep_pm_metrics *diff;
#if !MALI_USE_CSF
+ int busy;
int util_gl_share;
int util_cl_share[2];
#endif
@@ -377,9 +377,9 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
utilisation = (100 * diff->time_busy) /
max(diff->time_busy + diff->time_idle, 1u);
+#if !MALI_USE_CSF
busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
-#if !MALI_USE_CSF
util_gl_share = (100 * diff->busy_gl) / busy;
util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index 97bcb44..5c2aa0c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -405,7 +405,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
/* Reverse the suspension done */
if (reset_gpu) {
dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n");
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
kbase_reset_gpu_wait(kbdev);
} else if (sched_suspend)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h
index 766bf1d..2276713 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,41 @@
* including this header file. This header file can be included multiple
* times in the same compilation unit with different definitions of
* KBASEP_SHADER_STATE().
+ *
+ * @OFF_CORESTACK_OFF: The shaders and core stacks are off
+ * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been
+ * requested to power on and hwcnt is being
+ * disabled
+ * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
+ * requested to power on. Or after doing
+ * partial shader on/off, checking whether
+ * it's the desired state.
+ * @ON_CORESTACK_ON: The shaders and core stacks are on, and
+ * hwcnt already enabled.
+ * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt
+ * disabled, and checks to powering down or
+ * re-enabling hwcnt.
+ * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power
+ * off, but they remain on for the duration
+ * of the hysteresis timer
+ * @WAIT_GPU_IDLE: The shaders partial poweroff needs to
+ * reach a state where jobs on the GPU are
+ * finished including jobs currently running
+ * and in the GPU queue because of
+ * GPU2017-861
+ * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
+ * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2
+ * cache is being flushed.
+ * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are
+ * ready to be powered off.
+ * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
+ * have been requested to power off
+ * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
+ * have been requested to power off
+ * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the
+ * tick timer cancellation is still pending.
+ * @RESET_WAIT: The GPU is resetting, shader and core
+ * stack power states are unknown
*/
KBASEP_SHADER_STATE(OFF_CORESTACK_OFF)
KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON)
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index f964af0..ea7b21a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -76,6 +76,9 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
*/
static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
{
+#ifdef CONFIG_MALI_NO_MALI
+ return true;
+#else
bool success = false;
const unsigned int timeout = 100;
const unsigned long remaining = jiffies + msecs_to_jiffies(timeout);
@@ -87,8 +90,8 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
break;
}
}
-
return success;
+#endif
}
#endif
diff --git a/mali_kbase/csf/mali_base_csf_kernel.h b/mali_kbase/csf/mali_base_csf_kernel.h
deleted file mode 100644
index 9a13760..0000000
--- a/mali_kbase/csf/mali_base_csf_kernel.h
+++ /dev/null
@@ -1,637 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _BASE_CSF_KERNEL_H_
-#define _BASE_CSF_KERNEL_H_
-
-/* Memory allocation, access/hint flags.
- *
- * See base_mem_alloc_flags.
- */
-
-/* IN */
-/* Read access CPU side
- */
-#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
-
-/* Write access CPU side
- */
-#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
-
-/* Read access GPU side
- */
-#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
-
-/* Write access GPU side
- */
-#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
-
-/* Execute allowed on the GPU side
- */
-#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
-
-/* Will be permanently mapped in kernel space.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
-
-/* The allocation will completely reside within the same 4GB chunk in the GPU
- * virtual space.
- * Since this flag is primarily required only for the TLS memory which will
- * not be used to contain executable code and also not used for Tiler heap,
- * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
- */
-#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
-
-/* Userspace is not allowed to free this memory.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
-
-#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
-
-/* Grow backing store on GPU Page Fault
- */
-#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
-
-/* Page coherence Outer shareable, if available
- */
-#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
-
-/* Page coherence Inner shareable
- */
-#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
-
-/* IN/OUT */
-/* Should be cached on the CPU, returned if actually cached
- */
-#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
-
-/* IN/OUT */
-/* Must have same VA on both the GPU and the CPU
- */
-#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
-
-/* OUT */
-/* Must call mmap to acquire a GPU address for the alloc
- */
-#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
-
-/* IN */
-/* Page coherence Outer shareable, required.
- */
-#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
-
-/* Protected memory
- */
-#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
-
-/* Not needed physical memory
- */
-#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
-
-/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
- * addresses to be the same
- */
-#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
-
-/* CSF event memory
- *
- * If Outer shareable coherence is not specified or not available, then on
- * allocation kbase will automatically use the uncached GPU mapping.
- * There is no need for the client to specify BASE_MEM_UNCACHED_GPU
- * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag.
- *
- * This memory requires a permanent mapping
- *
- * See also kbase_reg_needs_kernel_mapping()
- */
-#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19)
-
-#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20)
-
-/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
- * mode. Some components within the GPU might only be able to access memory
- * that is GPU cacheable. Refer to the specific GPU implementation for more
- * details. The 3 shareability flags will be ignored for GPU uncached memory.
- * If used while importing USER_BUFFER type memory, then the import will fail
- * if the memory is not aligned to GPU and CPU cache line width.
- */
-#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
-
-/*
- * Bits [22:25] for group_id (0~15).
- *
- * base_mem_group_id_set() should be used to pack a memory group ID into a
- * base_mem_alloc_flags value instead of accessing the bits directly.
- * base_mem_group_id_get() should be used to extract the memory group ID from
- * a base_mem_alloc_flags value.
- */
-#define BASEP_MEM_GROUP_ID_SHIFT 22
-#define BASE_MEM_GROUP_ID_MASK \
- ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
-
-/* Must do CPU cache maintenance when imported memory is mapped/unmapped
- * on GPU. Currently applicable to dma-buf type only.
- */
-#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
-
-/* OUT */
-/* Kernel side cache sync ops required */
-#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
-
-/* Number of bits used as flags for base memory management
- *
- * Must be kept in sync with the base_mem_alloc_flags flags
- */
-#define BASE_MEM_FLAGS_NR_BITS 29
-
-/* A mask of all the flags which are only valid for allocations within kbase,
- * and may not be passed from user space.
- */
-#define BASEP_MEM_FLAGS_KERNEL_ONLY \
- (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
-
-/* A mask for all output bits, excluding IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
-
-/* A mask for all input bits, including IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_INPUT_MASK \
- (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
-
-/* A mask of all currently reserved flags
- */
-#define BASE_MEM_FLAGS_RESERVED \
- BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20
-
-#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
-/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
-#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12)
-#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12)
-#define BASE_MEM_COOKIE_BASE (64ul << 12)
-#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
- BASE_MEM_COOKIE_BASE)
-
-#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \
- ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \
- LOCAL_PAGE_SHIFT)
-
-/**
- * Valid set of just-in-time memory allocation flags
- */
-#define BASE_JIT_ALLOC_VALID_FLAGS ((u8)0)
-
-/* Flags to pass to ::base_context_init.
- * Flags can be ORed together to enable multiple things.
- *
- * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
- * not collide with them.
- */
-typedef u32 base_context_create_flags;
-
-/* No flags set */
-#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
-
-/* Base context is embedded in a cctx object (flag used for CINSTR
- * software counter macros)
- */
-#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
-
-/* Base context is a 'System Monitor' context for Hardware counters.
- *
- * One important side effect of this is that job submission is disabled.
- */
-#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
- ((base_context_create_flags)1 << 1)
-
-/* Create CSF event thread.
- *
- * The creation of a CSF event thread is conditional and only allowed in
- * unit tests for the moment, in order to avoid clashes with the existing
- * Base unit tests.
- */
-#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2)
-
-/* Bit-shift used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
-
-/* Bitmask used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
- ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
-
-/* Bitpattern describing the base_context_create_flags that can be
- * passed to the kernel
- */
-#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
- (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
- BASEP_CONTEXT_MMU_GROUP_ID_MASK)
-
-/* Bitpattern describing the ::base_context_create_flags that can be
- * passed to base_context_init()
- */
-#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
- (BASE_CONTEXT_CCTX_EMBEDDED | \
- BASE_CONTEXT_CSF_EVENT_THREAD | \
- BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
-
-/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
- * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
- */
-#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
-
-/* Indicate that job dumping is enabled. This could affect certain timers
- * to account for the performance impact.
- */
-#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
-
-/* Enable KBase tracepoints for CSF builds */
-#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2)
-
-/* Enable additional CSF Firmware side tracepoints */
-#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3)
-
-#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
- BASE_TLSTREAM_JOB_DUMPING_ENABLED | \
- BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \
- BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)
-
-/* Number of pages mapped into the process address space for a bound GPU
- * command queue. A pair of input/output pages and a Hw doorbell page
- * are mapped to enable direct submission of commands to Hw.
- */
-#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3)
-
-#define BASE_QUEUE_MAX_PRIORITY (15U)
-
-/* CQS Sync object is an array of u32 event_mem[2], error field index is 1 */
-#define BASEP_EVENT_VAL_INDEX (0U)
-#define BASEP_EVENT_ERR_INDEX (1U)
-
-/* The upper limit for number of objects that could be waited/set per command.
- * This limit is now enforced as internally the error inherit inputs are
- * converted to 32-bit flags in a u32 variable occupying a previously padding
- * field.
- */
-#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
-
-/**
- * enum base_kcpu_command_type - Kernel CPU queue command type.
- * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal,
- * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait,
- * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait,
- * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set,
- * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import,
- * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import,
- * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
- * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc,
- * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free,
- * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend,
- * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier,
- * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: sample_time,
- */
-enum base_kcpu_command_type {
- BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
- BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
- BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
- BASE_KCPU_COMMAND_TYPE_CQS_SET,
- BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
- BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
- BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
- BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
- BASE_KCPU_COMMAND_TYPE_JIT_FREE,
- BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
- BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
-#if MALI_UNIT_TEST
- BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME,
-#endif /* MALI_UNIT_TEST */
-};
-
-/**
- * enum base_queue_group_priority - Priority of a GPU Command Queue Group.
- * @BASE_QUEUE_GROUP_PRIORITY_HIGH: GPU Command Queue Group is of high
- * priority.
- * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM: GPU Command Queue Group is of medium
- * priority.
- * @BASE_QUEUE_GROUP_PRIORITY_LOW: GPU Command Queue Group is of low
- * priority.
- * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time
- * priority.
- * @BASE_QUEUE_GROUP_PRIORITY_COUNT: Number of GPU Command Queue Group
- * priority levels.
- *
- * Currently this is in order of highest to lowest, but if new levels are added
- * then those new levels may be out of order to preserve the ABI compatibility
- * with previous releases. At that point, ensure assignment to
- * the 'priority' member in &kbase_queue_group is updated to ensure it remains
- * a linear ordering.
- *
- * There should be no gaps in the enum, otherwise use of
- * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated.
- */
-enum base_queue_group_priority {
- BASE_QUEUE_GROUP_PRIORITY_HIGH = 0,
- BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
- BASE_QUEUE_GROUP_PRIORITY_LOW,
- BASE_QUEUE_GROUP_PRIORITY_REALTIME,
- BASE_QUEUE_GROUP_PRIORITY_COUNT
-};
-
-struct base_kcpu_command_fence_info {
- u64 fence;
-};
-
-struct base_cqs_wait_info {
- u64 addr;
- u32 val;
- u32 padding;
-};
-
-struct base_kcpu_command_cqs_wait_info {
- u64 objs;
- u32 nr_objs;
- u32 inherit_err_flags;
-};
-
-struct base_cqs_set {
- u64 addr;
-};
-
-struct base_kcpu_command_cqs_set_info {
- u64 objs;
- u32 nr_objs;
- u32 propagate_flags;
-};
-
-/**
- * struct base_kcpu_command_import_info - structure which contains information
- * about the imported buffer.
- *
- * @handle: Address of imported user buffer.
- */
-struct base_kcpu_command_import_info {
- u64 handle;
-};
-
-/**
- * struct base_kcpu_command_jit_alloc_info - structure which contains
- * information about jit memory allocation.
- *
- * @info: An array of elements of the
- * struct base_jit_alloc_info type.
- * @count: The number of elements in the info array.
- * @padding: Padding to a multiple of 64 bits.
- */
-struct base_kcpu_command_jit_alloc_info {
- u64 info;
- u8 count;
- u8 padding[7];
-};
-
-/**
- * struct base_kcpu_command_jit_free_info - structure which contains
- * information about jit memory which is to be freed.
- *
- * @ids: An array containing the JIT IDs to free.
- * @count: The number of elements in the ids array.
- * @padding: Padding to a multiple of 64 bits.
- */
-struct base_kcpu_command_jit_free_info {
- u64 ids;
- u8 count;
- u8 padding[7];
-};
-
-/**
- * struct base_kcpu_command_group_suspend_info - structure which contains
- * suspend buffer data captured for a suspended queue group.
- *
- * @buffer: Pointer to an array of elements of the type char.
- * @size: Number of elements in the @buffer array.
- * @group_handle: Handle to the mapping of CSG.
- * @padding: padding to a multiple of 64 bits.
- */
-struct base_kcpu_command_group_suspend_info {
- u64 buffer;
- u32 size;
- u8 group_handle;
- u8 padding[3];
-};
-
-#if MALI_UNIT_TEST
-struct base_kcpu_command_sample_time_info {
- u64 time;
-};
-#endif /* MALI_UNIT_TEST */
-
-/**
- * struct base_kcpu_command - kcpu command.
- * @type: type of the kcpu command, one enum base_kcpu_command_type
- * @padding: padding to a multiple of 64 bits
- * @info: structure which contains information about the kcpu command;
- * actual type is determined by @p type
- * @info.fence: Fence
- * @info.cqs_wait: CQS wait
- * @info.cqs_set: CQS set
- * @info.import: import
- * @info.jit_alloc: jit allocation
- * @info.jit_free: jit deallocation
- * @info.suspend_buf_copy: suspend buffer copy
- * @info.sample_time: sample time
- * @info.padding: padding
- */
-struct base_kcpu_command {
- u8 type;
- u8 padding[sizeof(u64) - sizeof(u8)];
- union {
- struct base_kcpu_command_fence_info fence;
- struct base_kcpu_command_cqs_wait_info cqs_wait;
- struct base_kcpu_command_cqs_set_info cqs_set;
- struct base_kcpu_command_import_info import;
- struct base_kcpu_command_jit_alloc_info jit_alloc;
- struct base_kcpu_command_jit_free_info jit_free;
- struct base_kcpu_command_group_suspend_info suspend_buf_copy;
-#if MALI_UNIT_TEST
- struct base_kcpu_command_sample_time_info sample_time;
-#endif /* MALI_UNIT_TEST */
- u64 padding[2]; /* No sub-struct should be larger */
- } info;
-};
-
-/**
- * struct basep_cs_stream_control - CSI capabilities.
- *
- * @features: Features of this stream
- * @padding: Padding to a multiple of 64 bits.
- */
-struct basep_cs_stream_control {
- u32 features;
- u32 padding;
-};
-
-/**
- * struct basep_cs_group_control - CSG interface capabilities.
- *
- * @features: Features of this group
- * @stream_num: Number of streams in this group
- * @suspend_size: Size in bytes of the suspend buffer for this group
- * @padding: Padding to a multiple of 64 bits.
- */
-struct basep_cs_group_control {
- u32 features;
- u32 stream_num;
- u32 suspend_size;
- u32 padding;
-};
-
-/**
- * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault
- * error information associated with GPU command queue group.
- *
- * @sideband: Additional information of the unrecoverable fault.
- * @status: Unrecoverable fault information.
- * This consists of exception type (least significant byte) and
- * data (remaining bytes). One example of exception type is
- * CS_INVALID_INSTRUCTION (0x49).
- * @padding: Padding to make multiple of 64bits
- */
-struct base_gpu_queue_group_error_fatal_payload {
- u64 sideband;
- u32 status;
- u32 padding;
-};
-
-/**
- * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault
- * error information related to GPU command queue.
- *
- * @sideband: Additional information about this unrecoverable fault.
- * @status: Unrecoverable fault information.
- * This consists of exception type (least significant byte) and
- * data (remaining bytes). One example of exception type is
- * CS_INVALID_INSTRUCTION (0x49).
- * @csi_index: Index of the CSF interface the queue is bound to.
- * @padding: Padding to make multiple of 64bits
- */
-struct base_gpu_queue_error_fatal_payload {
- u64 sideband;
- u32 status;
- u8 csi_index;
- u8 padding[3];
-};
-
-/**
- * enum base_gpu_queue_group_error_type - GPU Fatal error type.
- *
- * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU
- * command queue group.
- * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU
- * command queue.
- * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT: Fatal error associated with
- * progress timeout.
- * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out
- * of tiler heap memory.
- * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types
- *
- * This type is used for &struct_base_gpu_queue_group_error.error_type.
- */
-enum base_gpu_queue_group_error_type {
- BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0,
- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
- BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
- BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
- BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT
-};
-
-/**
- * struct base_gpu_queue_group_error - Unrecoverable fault information
- * @error_type: Error type of @base_gpu_queue_group_error_type
- * indicating which field in union payload is filled
- * @padding: Unused bytes for 64bit boundary
- * @payload: Input Payload
- * @payload.fatal_group: Unrecoverable fault error associated with
- * GPU command queue group
- * @payload.fatal_queue: Unrecoverable fault error associated with command queue
- */
-struct base_gpu_queue_group_error {
- u8 error_type;
- u8 padding[7];
- union {
- struct base_gpu_queue_group_error_fatal_payload fatal_group;
- struct base_gpu_queue_error_fatal_payload fatal_queue;
- } payload;
-};
-
-/**
- * enum base_csf_notification_type - Notification type
- *
- * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event
- * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal
- * error
- * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu
- * queue
- * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type
- *
- * This type is used for &struct_base_csf_notification.type.
- */
-enum base_csf_notification_type {
- BASE_CSF_NOTIFICATION_EVENT = 0,
- BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
- BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP,
- BASE_CSF_NOTIFICATION_COUNT
-};
-
-/**
- * struct base_csf_notification - Event or error notification
- *
- * @type: Notification type of @base_csf_notification_type
- * @padding: Padding for 64bit boundary
- * @payload: Input Payload
- * @payload.align: To fit the struct into a 64-byte cache line
- * @payload.csg_error: CSG error
- * @payload.csg_error.handle: Handle of GPU command queue group associated with
- * fatal error
- * @payload.csg_error.padding: Padding
- * @payload.csg_error.error: Unrecoverable fault error
- *
- */
-struct base_csf_notification {
- u8 type;
- u8 padding[7];
- union {
- struct {
- u8 handle;
- u8 padding[7];
- struct base_gpu_queue_group_error error;
- } csg_error;
-
- u8 align[56];
- } payload;
-};
-
-#endif /* _BASE_CSF_KERNEL_H_ */
diff --git a/mali_kbase/csf/mali_gpu_csf_control_registers.h b/mali_kbase/csf/mali_gpu_csf_control_registers.h
deleted file mode 100644
index 8c4fc82..0000000
--- a/mali_kbase/csf/mali_gpu_csf_control_registers.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * This header was autogenerated, it should not be edited.
- */
-
-#ifndef _GPU_CSF_CONTROL_REGISTERS_H_
-#define _GPU_CSF_CONTROL_REGISTERS_H_
-
-/* GPU_REGISTERS register offsets */
-#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */
-
-#endif /* _GPU_CSF_CONTROL_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_gpu_csf_registers.h b/mali_kbase/csf/mali_gpu_csf_registers.h
deleted file mode 100644
index d37b9cc..0000000
--- a/mali_kbase/csf/mali_gpu_csf_registers.h
+++ /dev/null
@@ -1,1401 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * This header was autogenerated, it should not be edited.
- */
-
-#ifndef _GPU_CSF_REGISTERS_H_
-#define _GPU_CSF_REGISTERS_H_
-
-/*
- * Begin register sets
- */
-
-/* DOORBELLS base address */
-#define DOORBELLS_BASE 0x0080000
-#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
-
-/* CS_KERNEL_INPUT_BLOCK base address */
-#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
-#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
-
-/* CS_KERNEL_OUTPUT_BLOCK base address */
-#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000
-#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r))
-
-/* CS_USER_INPUT_BLOCK base address */
-#define CS_USER_INPUT_BLOCK_BASE 0x0000
-#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r))
-
-/* CS_USER_OUTPUT_BLOCK base address */
-#define CS_USER_OUTPUT_BLOCK_BASE 0x0000
-#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r))
-
-/* CSG_INPUT_BLOCK base address */
-#define CSG_INPUT_BLOCK_BASE 0x0000
-#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r))
-
-/* CSG_OUTPUT_BLOCK base address */
-#define CSG_OUTPUT_BLOCK_BASE 0x0000
-#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r))
-
-/* GLB_CONTROL_BLOCK base address */
-#define GLB_CONTROL_BLOCK_BASE 0x04000000
-#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r))
-
-/* GLB_INPUT_BLOCK base address */
-#define GLB_INPUT_BLOCK_BASE 0x0000
-#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r))
-
-/* GLB_OUTPUT_BLOCK base address */
-#define GLB_OUTPUT_BLOCK_BASE 0x0000
-#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
-
-/* USER base address */
-#define USER_BASE 0x0010000
-#define USER_REG(r) (USER_BASE + (r))
-
-/* End register sets */
-
-/*
- * Begin register offsets
- */
-
-/* DOORBELLS register offsets */
-#define DOORBELL_0 0x0000 /* () Doorbell 0 register */
-#define DOORBELL(n) (DOORBELL_0 + (n)*65536)
-#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r))
-#define DOORBELL_COUNT 1024
-
-/* DOORBELL_BLOCK register offsets */
-#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */
-
-/* CS_KERNEL_INPUT_BLOCK register offsets */
-#define CS_REQ 0x0000 /* () CS request flags */
-#define CS_CONFIG 0x0004 /* () CS configuration */
-#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */
-#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */
-#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */
-#define CS_SIZE 0x0018 /* () Size of the ring buffer */
-#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */
-#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */
-#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */
-#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */
-#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */
-#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */
-#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */
-#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */
-
-/* CS_KERNEL_OUTPUT_BLOCK register offsets */
-#define CS_ACK 0x0000 /* () CS acknowledge flags */
-#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */
-#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */
-#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */
-#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */
-#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */
-#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */
-#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */
-#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */
-#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */
-#define CS_FAULT 0x0080 /* () Recoverable fault information */
-#define CS_FATAL 0x0084 /* () Unrecoverable fault information */
-#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */
-#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */
-#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */
-#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */
-#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */
-#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */
-#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */
-#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */
-#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */
-
-/* CS_USER_INPUT_BLOCK register offsets */
-#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */
-#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */
-#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */
-#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */
-
-/* CS_USER_OUTPUT_BLOCK register offsets */
-#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */
-#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */
-#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */
-
-/* CSG_INPUT_BLOCK register offsets */
-#define CSG_REQ 0x0000 /* () CSG request */
-#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
-#define CSG_DB_REQ 0x0008 /* () Global doorbell request */
-#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
-#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
-#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
-#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
-#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */
-#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */
-#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */
-#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */
-#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */
-#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */
-#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
-#define CSG_CONFIG 0x0050 /* () CSG configuration options */
-
-/* CSG_OUTPUT_BLOCK register offsets */
-#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
-#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */
-#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */
-#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */
-#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */
-#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */
-
-/* GLB_CONTROL_BLOCK register offsets */
-#define GLB_VERSION 0x0000 /* () Global interface version */
-#define GLB_FEATURES 0x0004 /* () Global interface features */
-#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */
-#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */
-#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */
-#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */
-#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */
-#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */
-#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256)
-#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r))
-#define GROUP_CONTROL_COUNT 16
-
-/* STREAM_CONTROL_BLOCK register offsets */
-#define STREAM_FEATURES 0x0000 /* () CSI features */
-#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */
-#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */
-
-/* GROUP_CONTROL_BLOCK register offsets */
-#define GROUP_FEATURES 0x0000 /* () CSG interface features */
-#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */
-#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */
-#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */
-#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */
-#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */
-#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */
-#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */
-#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12)
-#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r))
-#define STREAM_CONTROL_COUNT 16
-
-/* GLB_INPUT_BLOCK register offsets */
-#define GLB_REQ 0x0000 /* () Global request */
-#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
-#define GLB_DB_REQ 0x0008 /* () Global doorbell request */
-#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */
-#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */
-#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */
-#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */
-#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */
-
-#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */
-#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */
-#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */
-#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */
-#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */
-#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */
-#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */
-#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */
-#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */
-#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */
-#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
-#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
-
-#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
-#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
-#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
-#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
-#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
-#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
-
-/* GLB_OUTPUT_BLOCK register offsets */
-#define GLB_ACK 0x0000 /* () Global acknowledge */
-#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
-#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
-#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
-#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
-#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
-#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
-
-/* USER register offsets */
-#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
-
-/* End register offsets */
-
-/* CS_KERNEL_INPUT_BLOCK register set definitions */
-/* GLB_VERSION register */
-#define GLB_VERSION_PATCH_SHIFT (0)
-#define GLB_VERSION_MINOR_SHIFT (16)
-#define GLB_VERSION_MAJOR_SHIFT (24)
-
-/* CS_REQ register */
-#define CS_REQ_STATE_SHIFT 0
-#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT)
-#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT)
-#define CS_REQ_STATE_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK))
-/* CS_REQ_STATE values */
-#define CS_REQ_STATE_STOP 0x0
-#define CS_REQ_STATE_START 0x1
-/* End of CS_REQ_STATE values */
-#define CS_REQ_EXTRACT_EVENT_SHIFT 4
-#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT)
-#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT)
-#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK))
-
-#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8
-#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT)
-#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT)
-#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \
- (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK))
-#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9
-#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT)
-#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT)
-#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \
- (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK))
-#define CS_REQ_IDLE_EMPTY_SHIFT 10
-#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT)
-#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT)
-#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK))
-#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
-#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
-#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
- (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
-#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \
- (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
-#define CS_REQ_TILER_OOM_SHIFT 26
-#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
-#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
-#define CS_REQ_TILER_OOM_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK))
-#define CS_REQ_PROTM_PEND_SHIFT 27
-#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT)
-#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT)
-#define CS_REQ_PROTM_PEND_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK))
-#define CS_REQ_FATAL_SHIFT 30
-#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT)
-#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT)
-#define CS_REQ_FATAL_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK))
-#define CS_REQ_FAULT_SHIFT 31
-#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT)
-#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT)
-#define CS_REQ_FAULT_SET(reg_val, value) \
- (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK))
-
-/* CS_CONFIG register */
-#define CS_CONFIG_PRIORITY_SHIFT 0
-#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT)
-#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT)
-#define CS_CONFIG_PRIORITY_SET(reg_val, value) \
- (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK))
-#define CS_CONFIG_USER_DOORBELL_SHIFT 8
-#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT)
-#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT)
-#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \
- (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \
- (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK))
-
-/* CS_ACK_IRQ_MASK register */
-#define CS_ACK_IRQ_MASK_STATE_SHIFT 0
-#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT)
-#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT)
-#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \
- (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK))
-/* CS_ACK_IRQ_MASK_STATE values */
-#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0
-#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7
-/* End of CS_ACK_IRQ_MASK_STATE values */
-#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4
-#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT)
-#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \
- (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT)
-#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \
- (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK))
-#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26
-#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT)
-#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \
- (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT)
-#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \
- (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK))
-#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27
-#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT)
-#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \
- (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT)
-#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \
- (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK))
-#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30
-#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT)
-#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT)
-#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \
- (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK))
-#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31
-#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT)
-#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT)
-#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \
- (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK))
-
-/* CS_BASE register */
-#define CS_BASE_POINTER_SHIFT 0
-#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT)
-#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT)
-#define CS_BASE_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK))
-
-/* CS_SIZE register */
-#define CS_SIZE_SIZE_SHIFT 0
-#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT)
-#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT)
-#define CS_SIZE_SIZE_SET(reg_val, value) \
- (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK))
-
-/* CS_TILER_HEAP_START register */
-#define CS_TILER_HEAP_START_POINTER_SHIFT 0
-#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT)
-#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \
- (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT)
-#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \
- (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK))
-/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */
-/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */
-
-/* CS_TILER_HEAP_END register */
-#define CS_TILER_HEAP_END_POINTER_SHIFT 0
-#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT)
-#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \
- (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT)
-#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \
- (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK))
-/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */
-/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */
-
-/* CS_USER_INPUT register */
-#define CS_USER_INPUT_POINTER_SHIFT 0
-#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT)
-#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT)
-#define CS_USER_INPUT_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \
- (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK))
-
-/* CS_USER_OUTPUT register */
-#define CS_USER_OUTPUT_POINTER_SHIFT 0
-#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT)
-#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT)
-#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \
- (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK))
-/* End of CS_KERNEL_INPUT_BLOCK register set definitions */
-
-/* CS_KERNEL_OUTPUT_BLOCK register set definitions */
-
-/* CS_ACK register */
-#define CS_ACK_STATE_SHIFT 0
-#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT)
-#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT)
-#define CS_ACK_STATE_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK))
-/* CS_ACK_STATE values */
-#define CS_ACK_STATE_STOP 0x0
-#define CS_ACK_STATE_START 0x1
-/* End of CS_ACK_STATE values */
-#define CS_ACK_EXTRACT_EVENT_SHIFT 4
-#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT)
-#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT)
-#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK))
-#define CS_ACK_TILER_OOM_SHIFT 26
-#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT)
-#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT)
-#define CS_ACK_TILER_OOM_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK))
-#define CS_ACK_PROTM_PEND_SHIFT 27
-#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT)
-#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT)
-#define CS_ACK_PROTM_PEND_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK))
-#define CS_ACK_FATAL_SHIFT 30
-#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT)
-#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT)
-#define CS_ACK_FATAL_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK))
-#define CS_ACK_FAULT_SHIFT 31
-#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT)
-#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT)
-#define CS_ACK_FAULT_SET(reg_val, value) \
- (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK))
-
-/* CS_STATUS_CMD_PTR register */
-#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0
-#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT)
-#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \
- (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT)
-#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \
- (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK))
-
-/* CS_STATUS_WAIT register */
-#define CS_STATUS_WAIT_SB_MASK_SHIFT 0
-#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT)
-#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT)
-#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \
- (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK))
-#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24
-#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
-#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \
- (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
-#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \
- (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK))
-/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
-#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0
-#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1
-/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
-#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28
-#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
-#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \
- (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
-#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \
- (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK))
-#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29
-#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT)
-#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \
- (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT)
-#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \
- (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK))
-#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31
-#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
-#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \
- (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
-#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \
- (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK))
-
-/* CS_STATUS_REQ_RESOURCE register */
-#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0
-#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \
- (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \
- (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK))
-#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1
-#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \
- (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \
- (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK))
-#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2
-#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \
- (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \
- (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK))
-#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3
-#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \
- (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT)
-#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \
- (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK))
-
-/* CS_STATUS_WAIT_SYNC_POINTER register */
-#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
-#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
-#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \
- (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
-#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \
- (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK))
-
-/* CS_STATUS_WAIT_SYNC_VALUE register */
-#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0
-#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT)
-#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \
- (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT)
-#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \
- (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK))
-
-/* CS_STATUS_SCOREBOARDS register */
-#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0)
-#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \
- ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
-#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \
- (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \
- CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
-#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \
- (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \
- CS_STATUS_SCOREBOARDS_NONZERO_MASK))
-
-/* CS_STATUS_BLOCKED_REASON register */
-#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0)
-#define CS_STATUS_BLOCKED_REASON_REASON_MASK \
- ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
-#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \
- (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \
- CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
-#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \
- (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \
- (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \
- CS_STATUS_BLOCKED_REASON_REASON_MASK))
-/* CS_STATUS_BLOCKED_REASON_reason values */
-#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0
-#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1
-#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2
-#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3
-#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4
-#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5
-#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6
-/* End of CS_STATUS_BLOCKED_REASON_reason values */
-
-/* CS_FAULT register */
-#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0
-#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT)
-#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT)
-#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \
- (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \
- (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK))
-/* CS_FAULT_EXCEPTION_TYPE values */
-#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F
-#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B
-#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50
-#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51
-#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55
-#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58
-#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59
-#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
-#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
-#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
-/* End of CS_FAULT_EXCEPTION_TYPE values */
-#define CS_FAULT_EXCEPTION_DATA_SHIFT 8
-#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
-#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT)
-#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \
- (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \
- (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK))
-
-/* CS_FATAL register */
-#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0
-#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT)
-#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT)
-#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \
- (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \
- (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK))
-/* CS_FATAL_EXCEPTION_TYPE values */
-#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40
-#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44
-#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48
-#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49
-#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A
-#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68
-/* End of CS_FATAL_EXCEPTION_TYPE values */
-#define CS_FATAL_EXCEPTION_DATA_SHIFT 8
-#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT)
-#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT)
-#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \
- (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \
- (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK))
-
-/* CS_FAULT_INFO register */
-#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0
-#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
-#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \
- (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
-#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \
- (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \
- (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK))
-
-/* CS_FATAL_INFO register */
-#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0
-#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
-#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \
- (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
-#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \
- (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \
- (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK))
-
-/* CS_HEAP_VT_START register */
-#define CS_HEAP_VT_START_VALUE_SHIFT 0
-#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT)
-#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT)
-#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \
- (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK))
-
-/* CS_HEAP_VT_END register */
-#define CS_HEAP_VT_END_VALUE_SHIFT 0
-#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT)
-#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT)
-#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK))
-
-/* CS_HEAP_FRAG_END register */
-#define CS_HEAP_FRAG_END_VALUE_SHIFT 0
-#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT)
-#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT)
-#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \
- (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK))
-
-/* CS_HEAP_ADDRESS register */
-#define CS_HEAP_ADDRESS_POINTER_SHIFT 0
-#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT)
-#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT)
-#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \
- (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK))
-/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */
-
-/* CS_USER_INPUT_BLOCK register set definitions */
-
-/* CS_INSERT register */
-#define CS_INSERT_VALUE_SHIFT 0
-#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT)
-#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT)
-#define CS_INSERT_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK))
-
-/* CS_EXTRACT_INIT register */
-#define CS_EXTRACT_INIT_VALUE_SHIFT 0
-#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT)
-#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT)
-#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \
- (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK))
-/* End of CS_USER_INPUT_BLOCK register set definitions */
-
-/* CS_USER_OUTPUT_BLOCK register set definitions */
-
-/* CS_EXTRACT register */
-#define CS_EXTRACT_VALUE_SHIFT 0
-#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT)
-#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT)
-#define CS_EXTRACT_VALUE_SET(reg_val, value) \
- (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK))
-
-/* CS_ACTIVE register */
-#define CS_ACTIVE_HW_ACTIVE_SHIFT 0
-#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT)
-#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT)
-#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \
- (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK))
-/* End of CS_USER_OUTPUT_BLOCK register set definitions */
-
-/* CSG_INPUT_BLOCK register set definitions */
-
-/* CSG_REQ register */
-#define CSG_REQ_STATE_SHIFT 0
-#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT)
-#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT)
-#define CSG_REQ_STATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK))
-/* CSG_REQ_STATE values */
-#define CSG_REQ_STATE_TERMINATE 0x0
-#define CSG_REQ_STATE_START 0x1
-#define CSG_REQ_STATE_SUSPEND 0x2
-#define CSG_REQ_STATE_RESUME 0x3
-/* End of CSG_REQ_STATE values */
-#define CSG_REQ_EP_CFG_SHIFT 4
-#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT)
-#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT)
-#define CSG_REQ_EP_CFG_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK))
-#define CSG_REQ_STATUS_UPDATE_SHIFT 5
-#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT)
-#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT)
-#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \
- (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK))
-#define CSG_REQ_SYNC_UPDATE_SHIFT 28
-#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT)
-#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT)
-#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK))
-#define CSG_REQ_IDLE_SHIFT 29
-#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT)
-#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT)
-#define CSG_REQ_IDLE_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK))
-#define CSG_REQ_DOORBELL_SHIFT 30
-#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT)
-#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT)
-#define CSG_REQ_DOORBELL_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK))
-#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31
-#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
-#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \
- (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
-#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \
- (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK))
-
-/* CSG_ACK_IRQ_MASK register */
-#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0
-#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT)
-#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT)
-#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK))
-/* CSG_ACK_IRQ_MASK_STATE values */
-#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0
-#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7
-/* End of CSG_ACK_IRQ_MASK_STATE values */
-#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4
-#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT)
-#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT)
-#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK))
-#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5
-#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT)
-#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \
- (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT)
-#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK))
-#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28
-#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT)
-#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \
- (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT)
-#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK))
-#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29
-#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT)
-#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT)
-#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK))
-#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30
-#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT)
-#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \
- (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT)
-#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK))
-#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31
-#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT)
-#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \
- (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT)
-#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \
- (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK))
-
-/* CSG_EP_REQ register */
-#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0
-#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
-#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT)
-#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \
- (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
-#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8
-#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
-#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT)
-#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \
- (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
-#define CSG_EP_REQ_TILER_EP_SHIFT 16
-#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT)
-#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT)
-#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
- (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \
- (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
- (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \
- (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
-#define CSG_EP_REQ_PRIORITY_SHIFT 28
-#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT)
-#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT)
-#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
-
-/* CSG_SUSPEND_BUF register */
-#define CSG_SUSPEND_BUF_POINTER_SHIFT 0
-#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT)
-#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT)
-#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \
- (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK))
-
-/* CSG_PROTM_SUSPEND_BUF register */
-#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0
-#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
-#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \
- (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
-#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \
- (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \
- (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
-
-/* End of CSG_INPUT_BLOCK register set definitions */
-
-/* CSG_OUTPUT_BLOCK register set definitions */
-
-/* CSG_ACK register */
-#define CSG_ACK_STATE_SHIFT 0
-#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT)
-#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT)
-#define CSG_ACK_STATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK))
-/* CSG_ACK_STATE values */
-#define CSG_ACK_STATE_TERMINATE 0x0
-#define CSG_ACK_STATE_START 0x1
-#define CSG_ACK_STATE_SUSPEND 0x2
-#define CSG_ACK_STATE_RESUME 0x3
-/* End of CSG_ACK_STATE values */
-#define CSG_ACK_EP_CFG_SHIFT 4
-#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT)
-#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT)
-#define CSG_ACK_EP_CFG_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK))
-#define CSG_ACK_STATUS_UPDATE_SHIFT 5
-#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT)
-#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT)
-#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \
- (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK))
-#define CSG_ACK_SYNC_UPDATE_SHIFT 28
-#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT)
-#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT)
-#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK))
-#define CSG_ACK_IDLE_SHIFT 29
-#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT)
-#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT)
-#define CSG_ACK_IDLE_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK))
-#define CSG_ACK_DOORBELL_SHIFT 30
-#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT)
-#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT)
-#define CSG_ACK_DOORBELL_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK))
-#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31
-#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT)
-#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \
- (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT)
-#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
- (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \
- (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK))
-
-/* CSG_STATUS_EP_CURRENT register */
-#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0
-#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT)
-#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT)
-#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \
- (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK))
-#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8
-#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT)
-#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT)
-#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \
- (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK))
-#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16
-#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT)
-#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT)
-#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \
- (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK))
-
-/* CSG_STATUS_EP_REQ register */
-#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0
-#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
-#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
-#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \
- (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK))
-#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8
-#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT)
-#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT)
-#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \
- (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK))
-#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16
-#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT)
-#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT)
-#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \
- (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK))
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \
- (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
- (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
-#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \
- (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
-
-/* End of CSG_OUTPUT_BLOCK register set definitions */
-
-/* STREAM_CONTROL_BLOCK register set definitions */
-
-/* STREAM_FEATURES register */
-#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0
-#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT)
-#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \
- (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT)
-#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \
- (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \
- (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK))
-#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8
-#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT)
-#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \
- (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT)
-#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \
- (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \
- (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK))
-#define STREAM_FEATURES_COMPUTE_SHIFT 16
-#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT)
-#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT)
-#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \
- (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \
- (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK))
-#define STREAM_FEATURES_FRAGMENT_SHIFT 17
-#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT)
-#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \
- (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT)
-#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \
- (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \
- (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK))
-#define STREAM_FEATURES_TILER_SHIFT 18
-#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT)
-#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT)
-#define STREAM_FEATURES_TILER_SET(reg_val, value) \
- (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \
- (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK))
-
-/* STREAM_INPUT_VA register */
-#define STREAM_INPUT_VA_VALUE_SHIFT 0
-#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT)
-#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT)
-#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \
- (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \
- (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK))
-
-/* STREAM_OUTPUT_VA register */
-#define STREAM_OUTPUT_VA_VALUE_SHIFT 0
-#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT)
-#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT)
-#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \
- (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \
- (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK))
-/* End of STREAM_CONTROL_BLOCK register set definitions */
-
-/* GLB_INPUT_BLOCK register set definitions */
-
-/* GLB_REQ register */
-#define GLB_REQ_HALT_SHIFT 0
-#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT)
-#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT)
-#define GLB_REQ_HALT_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK))
-#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1
-#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT)
-#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \
- (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT)
-#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \
- (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK))
-#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2
-#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT)
-#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT)
-#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK))
-#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3
-#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT)
-#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \
- (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT)
-#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \
- (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK))
-#define GLB_REQ_PROTM_ENTER_SHIFT 4
-#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT)
-#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT)
-#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK))
-#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5
-#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT)
-#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT)
-#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \
- (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK))
-#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6
-#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT)
-#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT)
-#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \
- (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK))
-#define GLB_REQ_COUNTER_ENABLE_SHIFT 7
-#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT)
-#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT)
-#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \
- (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK))
-#define GLB_REQ_PING_SHIFT 8
-#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT)
-#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT)
-#define GLB_REQ_PING_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK))
-#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9
-#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \
- (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
-#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \
- (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \
- GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
-#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \
- (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \
- GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK))
-#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20
-#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT)
-#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \
- (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT)
-#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \
- (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK))
-#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21
-#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT)
-#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \
- (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT)
-#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \
- (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK))
-#define GLB_REQ_INACTIVE_TILER_SHIFT 22
-#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT)
-#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT)
-#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \
- (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK))
-#define GLB_REQ_PROTM_EXIT_SHIFT 23
-#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT)
-#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT)
-#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK))
-#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24
-#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
-#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \
- (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
-#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \
- (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & GLB_REQ_PRFCNT_THRESHOLD_MASK))
-#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25
-#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
-#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
-#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \
- (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK))
-#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30
-#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT)
-#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT)
-#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \
- (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK))
-#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31
-#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT)
-#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT)
-#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \
- (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \
- (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK))
-
-/* GLB_ACK_IRQ_MASK register */
-#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0
-#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT)
-#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT)
-#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK))
-#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1
-#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT)
-#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT)
-#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK))
-#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2
-#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT)
-#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT)
-#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK))
-#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3
-#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT)
-#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT)
-#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK))
-#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4
-#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT)
-#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT)
-#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK))
-#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5
-#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK))
-#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6
-#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK))
-#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7
-#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT)
-#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT)
-#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK))
-#define GLB_ACK_IRQ_MASK_PING_SHIFT 8
-#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT)
-#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT)
-#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK))
-#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9
-#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \
- (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
-#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \
- GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
-#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \
- GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK))
-#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20
-#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
-#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
-#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK))
-#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21
-#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT)
-#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT)
-#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK))
-#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22
-#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT)
-#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT)
-#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK))
-#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23
-#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT)
-#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT)
-#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK))
-#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24
-#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK))
-#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25
-#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
-#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK))
-#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30
-#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
-#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
-#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK))
-#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31
-#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT)
-#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \
- (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT)
-#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \
- (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK))
-
-/* GLB_PROGRESS_TIMER register */
-#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0
-#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT)
-#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \
- (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT)
-#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \
- (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \
- (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK))
-
-/* GLB_PWROFF_TIMER register */
-#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0
-#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
-#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \
- (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
-#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \
- (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \
- (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK))
-#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31
-#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
-#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \
- (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
-#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \
- (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \
- (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK))
-/* GLB_PWROFF_TIMER_TIMER_SOURCE values */
-#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
-#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
-/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
-
-/* GLB_ALLOC_EN register */
-#define GLB_ALLOC_EN_MASK_SHIFT 0
-#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
-#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT)
-#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \
- (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK))
-
-/* GLB_PROTM_COHERENCY register */
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \
- (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT)
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \
- (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \
- GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT)
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \
- (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \
- (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \
- GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK))
-/* End of GLB_INPUT_BLOCK register set definitions */
-
-/* GLB_OUTPUT_BLOCK register set definitions */
-
-/* GLB_ACK register */
-#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1
-#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT)
-#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \
- (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT)
-#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \
- (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK))
-#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2
-#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT)
-#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT)
-#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \
- (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK))
-/* End of GLB_OUTPUT_BLOCK register set definitions */
-
-/* The following register and fields are for headers before 10.x.7/11.x.4 */
-#define GLB_REQ_IDLE_ENABLE_SHIFT (10)
-#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT)
-#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT)
-#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
-#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
-#define GLB_REQ_IDLE_EVENT_SHIFT (26)
-#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
-#define GLB_ACK_IDLE_ENABLE_SHIFT (10)
-#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT)
-#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT)
-#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT)
-#define GLB_ACK_IDLE_EVENT_SHIFT (26)
-#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
-
-#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
-#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
-
-#define GLB_IDLE_TIMER (0x0080)
-/* GLB_IDLE_TIMER register */
-#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
-#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT)
-#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT)
-#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \
- (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \
- (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK))
-#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31)
-#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
-#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \
- (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
-#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \
- (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \
- (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK))
-/* GLB_IDLE_TIMER_TIMER_SOURCE values */
-#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
-#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
-/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
-
-#define CSG_STATUS_STATE (0x0018) /* CSG state status register */
-/* CSG_STATUS_STATE register */
-#define CSG_STATUS_STATE_IDLE_SHIFT (0)
-#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT)
-#define CSG_STATUS_STATE_IDLE_GET(reg_val) \
- (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT)
-#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \
- (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \
- (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK))
-
-#endif /* _GPU_CSF_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index e35c570..e3e046c 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -27,7 +27,7 @@
#include <linux/export.h>
#include <linux/priority_control_manager.h>
#include <linux/shmem_fs.h>
-#include "mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
#include "mali_kbase_csf_tiler_heap.h"
#include <mmu/mali_kbase_mmu.h>
#include "mali_kbase_csf_timeout.h"
@@ -588,7 +588,7 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
spin_lock_irqsave(&kctx->csf.event_lock, flags);
dev_dbg(kctx->kbdev->dev,
- "Remove any pending command queue fatal from context %p\n",
+ "Remove any pending command queue fatal from context %pK\n",
(void *)kctx);
list_del_init(&queue->error.link);
spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
@@ -1132,6 +1132,26 @@ static int create_suspend_buffers(struct kbase_context *const kctx,
}
/**
+ * generate_group_uid() - Makes an ID unique to all kernel base devices
+ * and contexts, for a queue group and CSG.
+ *
+ * Return: A unique ID in the form of an unsigned 32-bit integer
+ */
+static u32 generate_group_uid(void)
+{
+ /* use first KBase device to store max UID */
+ struct kbase_device *kbdev = kbase_find_device(-1);
+ u32 uid = 1;
+
+ if (kbdev)
+ uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
+ else
+ WARN(1, "NULL kbase device pointer in group UID generation");
+
+ return uid;
+}
+
+/**
* create_queue_group() - Create a queue group
*
* @kctx: Address of the kbase context within which the queue group
@@ -1142,7 +1162,7 @@ static int create_suspend_buffers(struct kbase_context *const kctx,
* Return: a queue group handle on success, or a negative error code on failure.
*/
static int create_queue_group(struct kbase_context *const kctx,
- const union kbase_ioctl_cs_queue_group_create *const create)
+ union kbase_ioctl_cs_queue_group_create *const create)
{
int group_handle = find_free_group_handle(kctx);
@@ -1178,6 +1198,9 @@ static int create_queue_group(struct kbase_context *const kctx,
group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
group->faulted = false;
+ group->group_uid = generate_group_uid();
+ create->out.group_uid = group->group_uid;
+
INIT_LIST_HEAD(&group->link);
INIT_LIST_HEAD(&group->link_to_schedule);
INIT_LIST_HEAD(&group->error_fatal.link);
@@ -1409,7 +1432,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
spin_lock_irqsave(&kctx->csf.event_lock, flags);
dev_dbg(kbdev->dev,
- "Remove any pending group fatal error from context %p\n",
+ "Remove any pending group fatal error from context %pK\n",
(void *)group->kctx);
list_del_init(&group->error_tiler_oom.link);
@@ -1503,7 +1526,7 @@ static void add_error(struct kbase_context *const kctx,
error->data = *data;
list_add_tail(&error->link, &kctx->csf.error_list);
dev_dbg(kctx->kbdev->dev,
- "Added error %p of type %d in context %p\n",
+ "Added error %pK of type %d in context %pK\n",
(void *)error, data->type, (void *)kctx);
}
@@ -1796,7 +1819,7 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx,
spin_lock_irqsave(&kctx->csf.event_lock, flags);
list_add_tail(&event->link, &kctx->csf.event_callback_list);
dev_dbg(kctx->kbdev->dev,
- "Added event handler %p with param %p\n", event,
+ "Added event handler %pK with param %pK\n", event,
event->param);
spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
@@ -1818,7 +1841,7 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx,
if ((event->callback == callback) && (event->param == param)) {
list_del(&event->link);
dev_dbg(kctx->kbdev->dev,
- "Removed event handler %p with param %p\n",
+ "Removed event handler %pK with param %pK\n",
event, event->param);
kfree(event);
break;
@@ -1841,7 +1864,7 @@ bool kbase_csf_read_error(struct kbase_context *kctx,
struct kbase_csf_notification, link);
list_del_init(&error_data->link);
*event_data = error_data->data;
- dev_dbg(kctx->kbdev->dev, "Dequeued error %p in context %p\n",
+ dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
(void *)error_data, (void *)kctx);
} else {
got_event = false;
@@ -1859,7 +1882,7 @@ bool kbase_csf_error_pending(struct kbase_context *kctx)
spin_lock_irqsave(&kctx->csf.event_lock, flags);
event_pended = !list_empty(&kctx->csf.error_list);
- dev_dbg(kctx->kbdev->dev, "%s error is pending in context %p\n",
+ dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
event_pended ? "An" : "No", (void *)kctx);
spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
@@ -1872,7 +1895,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
unsigned long flags;
dev_dbg(kctx->kbdev->dev,
- "Signal event (%s GPU notify) for context %p\n",
+ "Signal event (%s GPU notify) for context %pK\n",
notify_gpu ? "with" : "without", (void *)kctx);
/* First increment the signal count and wake up event thread.
@@ -1903,7 +1926,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
enum kbase_csf_event_callback_action action;
dev_dbg(kctx->kbdev->dev,
- "Calling event handler %p with param %p\n",
+ "Calling event handler %pK with param %pK\n",
(void *)event, event->param);
action = event->callback(event->param);
if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
@@ -1926,7 +1949,7 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
event, next_event, &kctx->csf.event_callback_list, link) {
list_del(&event->link);
dev_dbg(kctx->kbdev->dev,
- "Removed event handler %p with param %p\n",
+ "Removed event handler %pK with param %pK\n",
(void *)event, event->param);
kfree(event);
}
@@ -2231,6 +2254,31 @@ static void protm_event_worker(struct work_struct *data)
kbase_csf_scheduler_group_protm_enter(group);
}
+static void report_queue_fatal_error(struct kbase_queue *const queue,
+ u32 cs_fatal, u64 cs_fatal_info,
+ u8 group_handle)
+{
+ struct base_csf_notification error =
+ { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group_handle,
+ .error = {
+ .error_type =
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+ .payload = {
+ .fatal_queue = {
+ .sideband =
+ cs_fatal_info,
+ .status = cs_fatal,
+ .csi_index =
+ queue->csi_index,
+ } } } } } };
+
+ add_error(queue->kctx, &queue->error, &error);
+ kbase_event_wakeup(queue->kctx);
+}
+
/**
* handle_fault_event - Handler for CS fault.
*
@@ -2268,51 +2316,10 @@ handle_fault_event(struct kbase_queue *const queue,
kbase_gpu_exception_name(cs_fault_exception_type),
cs_fault_exception_data, cs_fault_info_exception_data);
- /* TODO GPUCORE-26291: We've'identified an issue with faulted CSIs not
- * making progress in some cases. Until the issue is resolved,
- * RESOURCE_EVICTION_TIMEOUT error shall be treated as a fatal error
- * to give userspace a chance to terminate the group. This is intended
- * to be a temporary workaround.
- */
if (cs_fault_exception_type ==
CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
- kbase_csf_add_queue_fatal_error(
- queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, 0);
-}
-
-static void report_queue_fatal_error(struct kbase_queue *const queue,
- u32 cs_fatal, u64 cs_fatal_info,
- u8 group_handle)
-{
- struct base_csf_notification error = {
- .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
- .payload = {
- .csg_error = {
- .handle = group_handle,
- .error = {
- .error_type =
- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
- .payload = {
- .fatal_queue = {
- .sideband = cs_fatal_info,
- .status = cs_fatal,
- .csi_index = queue->csi_index,
- }
- }
- }
- }
- }
- };
-
- add_error(queue->kctx, &queue->error, &error);
- kbase_event_wakeup(queue->kctx);
-}
-
-void kbase_csf_add_queue_fatal_error(struct kbase_queue *const queue,
- u32 cs_fatal, u64 cs_fatal_info)
-{
- report_queue_fatal_error(queue, cs_fatal, cs_fatal_info,
- queue->group->handle);
+ report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
+ 0, queue->group->handle);
}
/**
@@ -2643,8 +2650,20 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
process_cs_interrupts(group, ginfo, irqreq, irqack);
}
+/**
+ * process_prfcnt_interrupts - Process performance counter interrupts.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @glb_req: Global request register value.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * Handles interrupts issued by the firmware that relate to the performance
+ * counters. For example, on completion of a performance counter sample. It is
+ * expected that the scheduler spinlock is already held on calling this
+ * function.
+ */
static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
- u32 glb_ack, unsigned long *flags)
+ u32 glb_ack)
{
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -2656,14 +2675,11 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) ==
(glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) {
kbdev->csf.hwcnt.request_pending = false;
- kbase_csf_scheduler_spin_unlock(kbdev, *flags);
dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received.");
kbase_hwcnt_backend_csf_on_prfcnt_sample(
&kbdev->hwcnt_gpu_iface);
-
- kbase_csf_scheduler_spin_lock(kbdev, flags);
}
/* Process PRFCNT_ENABLE interrupt. */
@@ -2671,32 +2687,25 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) ==
(glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) {
kbdev->csf.hwcnt.enable_pending = false;
- kbase_csf_scheduler_spin_unlock(kbdev, *flags);
dev_dbg(kbdev->dev,
"PRFCNT_ENABLE status changed interrupt received.");
- if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) {
+ if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK)
kbase_hwcnt_backend_csf_on_prfcnt_enable(
&kbdev->hwcnt_gpu_iface);
- } else {
+ else
kbase_hwcnt_backend_csf_on_prfcnt_disable(
&kbdev->hwcnt_gpu_iface);
- }
-
- kbase_csf_scheduler_spin_lock(kbdev, flags);
}
/* Process PRFCNT_THRESHOLD interrupt. */
if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) {
- kbase_csf_scheduler_spin_unlock(kbdev, *flags);
dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received.");
kbase_hwcnt_backend_csf_on_prfcnt_threshold(
&kbdev->hwcnt_gpu_iface);
- kbase_csf_scheduler_spin_lock(kbdev, flags);
-
/* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to
* the same value as GLB_ACK.PRFCNT_THRESHOLD
* flag in order to enable reporting of another
@@ -2709,13 +2718,11 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
/* Process PRFCNT_OVERFLOW interrupt. */
if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) {
- kbase_csf_scheduler_spin_unlock(kbdev, *flags);
dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received.");
kbase_hwcnt_backend_csf_on_prfcnt_overflow(
&kbdev->hwcnt_gpu_iface);
- kbase_csf_scheduler_spin_lock(kbdev, flags);
/* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to
* the same value as GLB_ACK.PRFCNT_OVERFLOW
* flag in order to enable reporting of another
@@ -2790,8 +2797,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
}
}
- process_prfcnt_interrupts(kbdev, glb_req, glb_ack,
- &flags);
+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 6252515..effd468 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -366,19 +366,6 @@ void kbase_csf_add_group_fatal_error(
struct base_gpu_queue_group_error const *const err_payload);
/**
- * kbase_csf_add_queue_fatal_error - Report a fatal queue error to userspace
- *
- * @queue: Pointer to queue for which fatal event was received.
- * @cs_fatal: Fault information
- * @cs_fatal_info: Additional fault information
- *
- * If a queue has already been in fatal error status,
- * subsequent fatal error on the queue should never take place.
- */
-void kbase_csf_add_queue_fatal_error(struct kbase_queue *const queue,
- u32 cs_fatal, u64 cs_fatal_info);
-
-/**
* kbase_csf_interrupt - Handle interrupts issued by CSF firmware.
*
* @kbdev: The kbase device to handle an IRQ for
diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
index fb3a718..b54b2fc 100644
--- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -154,8 +154,7 @@ int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
mutex_lock(&kctx->csf.lock);
- if (kctx->csf.cpu_queue.buffer)
- kfree(kctx->csf.cpu_queue.buffer);
+ kfree(kctx->csf.cpu_queue.buffer);
if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
BASE_CSF_CPU_QUEUE_DUMP_PENDING) {
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index a6f1958..0517399 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -401,6 +401,8 @@ struct kbase_protected_suspend_buffer {
* @tiler_mask: Mask of tiler endpoints the group is allowed to use.
* @fragment_mask: Mask of fragment endpoints the group is allowed to use.
* @compute_mask: Mask of compute endpoints the group is allowed to use.
+ * @group_uid: 32-bit wide unsigned identifier for the group, unique
+ * across all kbase devices and contexts.
* @link: Link to this queue group in the 'runnable_groups' list of
* the corresponding kctx.
* @link_to_schedule: Link to this queue group in the list of prepared groups
@@ -449,6 +451,8 @@ struct kbase_queue_group {
u64 fragment_mask;
u64 compute_mask;
+ u32 group_uid;
+
struct list_head link;
struct list_head link_to_schedule;
enum kbase_csf_group_state run_state;
@@ -801,9 +805,6 @@ struct kbase_csf_csg_slot {
* other phases.
* @non_idle_scanout_grps: Count on the non-idle groups in the scan-out
* list at the scheduling prepare stage.
- * @apply_async_protm: Signalling the internal scheduling apply stage to
- * act with some special handling for entering the
- * protected mode asynchronously.
* @pm_active_count: Count indicating if the scheduler is owning a power
* management reference count. Reference is taken when
* the count becomes 1 and is dropped when the count
@@ -853,7 +854,6 @@ struct kbase_csf_scheduler {
struct work_struct gpu_idle_work;
atomic_t non_idle_offslot_grps;
u32 non_idle_scanout_grps;
- bool apply_async_protm;
u32 pm_active_count;
unsigned int csg_scheduling_period_ms;
bool tick_timer_active;
@@ -1055,7 +1055,7 @@ struct kbase_csf_firmware_interface {
struct protected_memory_allocation **pma;
};
-/**
+/*
* struct kbase_csf_hwcnt - Object containing members for handling the dump of
* HW counters.
*
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index ae039aa..73b8e03 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -48,10 +48,17 @@
#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
+
static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
MODULE_PARM_DESC(fw_name, "firmware image");
+/* The waiting time for firmware to boot */
+static unsigned int csf_firmware_boot_timeout_ms = 500;
+module_param(csf_firmware_boot_timeout_ms, uint, 0444);
+MODULE_PARM_DESC(csf_firmware_boot_timeout_ms,
+ "Maximum time to wait for firmware to boot.");
+
#ifdef CONFIG_MALI_DEBUG
/* Makes Driver wait indefinitely for an acknowledgment for the different
* requests it sends to firmware. Otherwise the timeouts interfere with the
@@ -93,7 +100,6 @@ MODULE_PARM_DESC(fw_debug,
#define TL_METADATA_ENTRY_NAME_OFFSET (0x8)
-#define CSF_FIRMWARE_BOOT_TIMEOUT_MS (500)
#define CSF_MAX_FW_STOP_LOOPS (100000)
#define CSF_GLB_REQ_CFG_MASK \
@@ -232,7 +238,7 @@ static void stop_csf_firmware(struct kbase_device *kbdev)
static void wait_for_firmware_boot(struct kbase_device *kbdev)
{
const long wait_timeout =
- kbase_csf_timeout_in_jiffies(CSF_FIRMWARE_BOOT_TIMEOUT_MS);
+ kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms);
long remaining;
/* Firmware will generate a global interface interrupt once booting
@@ -987,6 +993,7 @@ static int parse_capabilities(struct kbase_device *kbdev)
iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];
+ iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
if ((GROUP_CONTROL_0 +
(unsigned long)iface->group_num * iface->group_stride) >
@@ -1239,14 +1246,8 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
kbase_ctx_sched_release_ctx_lock(kctx);
}
- /* Internal FW error could mean hardware counters will stop working.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
-
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev,
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
@@ -1669,6 +1670,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
return pwroff;
}
+
int kbase_csf_firmware_init(struct kbase_device *kbdev)
{
const struct firmware *firmware;
@@ -1836,6 +1838,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
if (ret != 0)
goto error;
+
/* Firmware loaded successfully */
release_firmware(firmware);
KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL,
@@ -1987,7 +1990,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
-int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
+void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
{
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -1997,7 +2000,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
set_global_request(global_iface, GLB_REQ_PING_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+{
+ kbase_csf_firmware_ping(kbdev);
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
}
@@ -2040,11 +2047,17 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
if (!err) {
+ unsigned long irq_flags;
+
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->protected_mode = true;
kbase_ipa_protection_mode_switch_event(kbdev);
kbase_ipa_control_protm_entered(kbdev);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &irq_flags);
kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
+ kbase_csf_scheduler_spin_unlock(kbdev, irq_flags);
+
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
@@ -2139,26 +2152,28 @@ static u32 copy_grp_and_stm(
return total_stream_num;
}
-u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
+u32 kbase_csf_firmware_get_glb_iface(
+ struct kbase_device *kbdev,
struct basep_cs_group_control *const group_data,
u32 const max_group_num,
struct basep_cs_stream_control *const stream_data,
u32 const max_total_stream_num, u32 *const glb_version,
- u32 *const features, u32 *const group_num, u32 *const prfcnt_size)
+ u32 *const features, u32 *const group_num, u32 *const prfcnt_size,
+ u32 *instr_features)
{
const struct kbase_csf_global_iface * const iface =
&kbdev->csf.global_iface;
- if (WARN_ON(!glb_version) ||
- WARN_ON(!features) ||
- WARN_ON(!group_num) ||
- WARN_ON(!prfcnt_size))
+ if (WARN_ON(!glb_version) || WARN_ON(!features) ||
+ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) ||
+ WARN_ON(!instr_features))
return 0;
*glb_version = iface->version;
*features = iface->features;
*group_num = iface->group_num;
*prfcnt_size = iface->prfcnt_size;
+ *instr_features = iface->instr_features;
return copy_grp_and_stm(iface, group_data, max_group_num,
stream_data, max_total_stream_num);
@@ -2237,9 +2252,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1);
va_reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
if (ret)
goto va_region_add_error;
+ mutex_unlock(&kbdev->csf.reg_lock);
gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
gpu_map_properties |= gpu_map_prot;
@@ -2261,9 +2276,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
mmu_insert_pages_error:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(va_reg);
- mutex_unlock(&kbdev->csf.reg_lock);
va_region_add_error:
kbase_free_alloced_region(va_reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
va_region_alloc_error:
vunmap(cpu_addr);
vmap_error:
@@ -2293,8 +2308,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
if (csf_mapping->va_reg) {
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(csf_mapping->va_reg);
- mutex_unlock(&kbdev->csf.reg_lock);
kbase_free_alloced_region(csf_mapping->va_reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
}
if (csf_mapping->phys) {
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index a2dc4fd..13ff701 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -23,7 +23,7 @@
#define _KBASE_CSF_FIRMWARE_H_
#include "device/mali_kbase_device.h"
-#include "mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
/*
* PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
@@ -266,6 +266,7 @@ u32 kbase_csf_firmware_csg_output(
* @group_stride: Stride in bytes in JASID0 virtual address between
* CSG capability structures.
* @prfcnt_size: Performance counters size.
+ * @instr_features: Instrumentation features.
* @groups: Address of an array of CSG capability structures.
*/
struct kbase_csf_global_iface {
@@ -277,6 +278,7 @@ struct kbase_csf_global_iface {
u32 group_num;
u32 group_stride;
u32 prfcnt_size;
+ u32 instr_features;
struct kbase_csf_cmd_stream_group_info *groups;
};
@@ -397,13 +399,23 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
*
- * The function sends the ping request to firmware to confirm it is alive.
+ * The function sends the ping request to firmware.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_firmware_ping(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
+ *
+ * The function sends the ping request to firmware and waits to confirm it is
+ * alive.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* Return: 0 on success, or negative on failure.
*/
-int kbase_csf_firmware_ping(struct kbase_device *kbdev);
+int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout.
@@ -570,12 +582,14 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);
* in bytes. Bits 31:16 hold the size of firmware
* performance counter data and 15:0 hold the size of
* hardware performance counter data.
- */
-u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
- struct basep_cs_group_control *group_data, u32 max_group_num,
- struct basep_cs_stream_control *stream_data, u32 max_total_stream_num,
- u32 *glb_version, u32 *features, u32 *group_num, u32 *prfcnt_size);
-
+ * @instr_features: Instrumentation features. Bits 7:4 hold the max size
+ * of events. Bits 3:0 hold the offset update rate.
+ */
+u32 kbase_csf_firmware_get_glb_iface(
+ struct kbase_device *kbdev, struct basep_cs_group_control *group_data,
+ u32 max_group_num, struct basep_cs_stream_control *stream_data,
+ u32 max_total_stream_num, u32 *glb_version, u32 *features,
+ u32 *group_num, u32 *prfcnt_size, u32 *instr_features);
/**
* Get CSF firmware header timeline metadata content
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 6349917..a3901cd 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -237,6 +237,9 @@ static int invent_capabilities(struct kbase_device *kbdev)
iface->kbdev = kbdev;
iface->features = 0;
iface->prfcnt_size = 64;
+ iface->instr_features =
+ 0x81; /* update rate=1, max event size = 1<<8 = 256 */
+
iface->group_num = ARRAY_SIZE(interface->csg);
iface->group_stride = 0;
@@ -463,14 +466,8 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
kbase_ctx_sched_release_ctx_lock(kctx);
}
- /* Internal FW error could mean hardware counters will stop working.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
-
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev,
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
@@ -1032,7 +1029,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
-int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
+void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
{
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -1042,7 +1039,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
set_global_request(global_iface, GLB_REQ_PING_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+{
+ kbase_csf_firmware_ping(kbdev);
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
}
@@ -1170,26 +1171,28 @@ static u32 copy_grp_and_stm(
return total_stream_num;
}
-u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
+u32 kbase_csf_firmware_get_glb_iface(
+ struct kbase_device *kbdev,
struct basep_cs_group_control *const group_data,
u32 const max_group_num,
struct basep_cs_stream_control *const stream_data,
u32 const max_total_stream_num, u32 *const glb_version,
- u32 *const features, u32 *const group_num, u32 *const prfcnt_size)
+ u32 *const features, u32 *const group_num, u32 *const prfcnt_size,
+ u32 *const instr_features)
{
const struct kbase_csf_global_iface * const iface =
&kbdev->csf.global_iface;
- if (WARN_ON(!glb_version) ||
- WARN_ON(!features) ||
- WARN_ON(!group_num) ||
- WARN_ON(!prfcnt_size))
+ if (WARN_ON(!glb_version) || WARN_ON(!features) ||
+ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) ||
+ WARN_ON(!instr_features))
return 0;
*glb_version = iface->version;
*features = iface->features;
*group_num = iface->group_num;
*prfcnt_size = iface->prfcnt_size;
+ *instr_features = iface->instr_features;
return copy_grp_and_stm(iface, group_data, max_group_num,
stream_data, max_total_stream_num);
@@ -1269,9 +1272,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1);
va_reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
if (ret)
goto va_region_add_error;
+ mutex_unlock(&kbdev->csf.reg_lock);
gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
gpu_map_properties |= gpu_map_prot;
@@ -1293,9 +1296,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
mmu_insert_pages_error:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(va_reg);
- mutex_unlock(&kbdev->csf.reg_lock);
va_region_add_error:
kbase_free_alloced_region(va_reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
va_region_alloc_error:
vunmap(cpu_addr);
vmap_error:
@@ -1325,8 +1328,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
if (csf_mapping->va_reg) {
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(csf_mapping->va_reg);
- mutex_unlock(&kbdev->csf.reg_lock);
kbase_free_alloced_region(csf_mapping->va_reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
}
if (csf_mapping->phys) {
diff --git a/mali_kbase/csf/mali_kbase_csf_ioctl.h b/mali_kbase/csf/mali_kbase_csf_ioctl.h
deleted file mode 100644
index 8c63e1c..0000000
--- a/mali_kbase/csf/mali_kbase_csf_ioctl.h
+++ /dev/null
@@ -1,382 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_CSF_IOCTL_H_
-#define _KBASE_CSF_IOCTL_H_
-
-#include <asm-generic/ioctl.h>
-#include <linux/types.h>
-
-/*
- * 1.0:
- * - CSF IOCTL header separated from JM
- * 1.1:
- * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME
- * - Add ioctl 54: This controls the priority setting.
- * 1.2:
- * - Add new CSF GPU_FEATURES register into the property structure
- * returned by KBASE_IOCTL_GET_GPUPROPS
- */
-
-#define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 2
-
-/**
- * struct kbase_ioctl_version_check - Check version compatibility between
- * kernel and userspace
- *
- * @major: Major version number
- * @minor: Minor version number
- */
-struct kbase_ioctl_version_check {
- __u16 major;
- __u16 minor;
-};
-
-#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
- _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
-
-
-/**
- * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the
- * base back-end
- *
- * @buffer_gpu_addr: GPU address of the buffer backing the queue
- * @buffer_size: Size of the buffer in bytes
- * @priority: Priority of the queue within a group when run within a process
- * @padding: Currently unused, must be zero
- */
-struct kbase_ioctl_cs_queue_register {
- __u64 buffer_gpu_addr;
- __u32 buffer_size;
- __u8 priority;
- __u8 padding[3];
-};
-
-#define KBASE_IOCTL_CS_QUEUE_REGISTER \
- _IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register)
-
-/**
- * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler
- * to notify that a queue has been updated
- *
- * @buffer_gpu_addr: GPU address of the buffer backing the queue
- */
-struct kbase_ioctl_cs_queue_kick {
- __u64 buffer_gpu_addr;
-};
-
-#define KBASE_IOCTL_CS_QUEUE_KICK \
- _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick)
-
-/**
- * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group
- *
- * @in: Input parameters
- * @in.buffer_gpu_addr: GPU address of the buffer backing the queue
- * @in.group_handle: Handle of the group to which the queue should be bound
- * @in.csi_index: Index of the CSF interface the queue should be bound to
- * @in.padding: Currently unused, must be zero
- * @out: Output parameters
- * @out.mmap_handle: Handle to be used for creating the mapping of CS
- * input/output pages
- */
-union kbase_ioctl_cs_queue_bind {
- struct {
- __u64 buffer_gpu_addr;
- __u8 group_handle;
- __u8 csi_index;
- __u8 padding[6];
- } in;
- struct {
- __u64 mmap_handle;
- } out;
-};
-
-#define KBASE_IOCTL_CS_QUEUE_BIND \
- _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind)
-
-/* ioctl 40 is free to use */
-
-/**
- * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue
- *
- * @buffer_gpu_addr: GPU address of the buffer backing the queue
- */
-struct kbase_ioctl_cs_queue_terminate {
- __u64 buffer_gpu_addr;
-};
-
-#define KBASE_IOCTL_CS_QUEUE_TERMINATE \
- _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate)
-
-/**
- * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
- * @in: Input parameters
- * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
- * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
- * @in.compute_mask: Mask of compute endpoints the group is allowed to use.
- * @in.cs_min: Minimum number of CSs required.
- * @in.priority: Queue group's priority within a process.
- * @in.tiler_max: Maximum number of tiler endpoints the group is allowed
- * to use.
- * @in.fragment_max: Maximum number of fragment endpoints the group is
- * allowed to use.
- * @in.compute_max: Maximum number of compute endpoints the group is allowed
- * to use.
- * @in.padding: Currently unused, must be zero
- * @out: Output parameters
- * @out.group_handle: Handle of a newly created queue group.
- * @out.padding: Currently unused, must be zero
- */
-union kbase_ioctl_cs_queue_group_create {
- struct {
- __u64 tiler_mask;
- __u64 fragment_mask;
- __u64 compute_mask;
- __u8 cs_min;
- __u8 priority;
- __u8 tiler_max;
- __u8 fragment_max;
- __u8 compute_max;
- __u8 padding[3];
-
- } in;
- struct {
- __u8 group_handle;
- __u8 padding[7];
- } out;
-};
-
-#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
- _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create)
-
-/**
- * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group
- *
- * @group_handle: Handle of the queue group to be terminated
- * @padding: Padding to round up to a multiple of 8 bytes, must be zero
- */
-struct kbase_ioctl_cs_queue_group_term {
- __u8 group_handle;
- __u8 padding[7];
-};
-
-#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \
- _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term)
-
-#define KBASE_IOCTL_CS_EVENT_SIGNAL \
- _IO(KBASE_IOCTL_TYPE, 44)
-
-typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */
-
-/**
- * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue
- *
- * @id: ID of the new command queue returned by the kernel
- * @padding: Padding to round up to a multiple of 8 bytes, must be zero
- */
-struct kbase_ioctl_kcpu_queue_new {
- base_kcpu_queue_id id;
- __u8 padding[7];
-};
-
-#define KBASE_IOCTL_KCPU_QUEUE_CREATE \
- _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new)
-
-/**
- * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue
- *
- * @id: ID of the command queue to be destroyed
- * @padding: Padding to round up to a multiple of 8 bytes, must be zero
- */
-struct kbase_ioctl_kcpu_queue_delete {
- base_kcpu_queue_id id;
- __u8 padding[7];
-};
-
-#define KBASE_IOCTL_KCPU_QUEUE_DELETE \
- _IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete)
-
-/**
- * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue
- *
- * @addr: Memory address of an array of struct base_kcpu_queue_command
- * @nr_commands: Number of commands in the array
- * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl
- * @padding: Padding to round up to a multiple of 8 bytes, must be zero
- */
-struct kbase_ioctl_kcpu_queue_enqueue {
- __u64 addr;
- __u32 nr_commands;
- base_kcpu_queue_id id;
- __u8 padding[3];
-};
-
-#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \
- _IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue)
-
-/**
- * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap
- * @in: Input parameters
- * @in.chunk_size: Size of each chunk.
- * @in.initial_chunks: Initial number of chunks that heap will be created with.
- * @in.max_chunks: Maximum number of chunks that the heap is allowed to use.
- * @in.target_in_flight: Number of render-passes that the driver should attempt to
- * keep in flight for which allocation of new chunks is
- * allowed.
- * @in.group_id: Group ID to be used for physical allocations.
- * @in.padding: Padding
- * @out: Output parameters
- * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up
- * for the heap.
- * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
- * actually points to the header of heap chunk and not to
- * the low address of free memory in the chunk.
- */
-union kbase_ioctl_cs_tiler_heap_init {
- struct {
- __u32 chunk_size;
- __u32 initial_chunks;
- __u32 max_chunks;
- __u16 target_in_flight;
- __u8 group_id;
- __u8 padding;
- } in;
- struct {
- __u64 gpu_heap_va;
- __u64 first_chunk_va;
- } out;
-};
-
-#define KBASE_IOCTL_CS_TILER_HEAP_INIT \
- _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init)
-
-/**
- * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap
- * instance
- *
- * @gpu_heap_va: GPU VA of Heap context that was set up for the heap.
- */
-struct kbase_ioctl_cs_tiler_heap_term {
- __u64 gpu_heap_va;
-};
-
-#define KBASE_IOCTL_CS_TILER_HEAP_TERM \
- _IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term)
-
-/**
- * union kbase_ioctl_cs_get_glb_iface - Request the global control block
- * of CSF interface capabilities
- *
- * @in: Input parameters
- * @in.max_group_num: The maximum number of groups to be read. Can be 0, in
- * which case groups_ptr is unused.
- * @in.max_total_stream_num: The maximum number of CSs to be read. Can be 0, in
- * which case streams_ptr is unused.
- * @in.groups_ptr: Pointer where to store all the group data (sequentially).
- * @in.streams_ptr: Pointer where to store all the CS data (sequentially).
- * @out: Output parameters
- * @out.glb_version: Global interface version.
- * @out.features: Bit mask of features (e.g. whether certain types of job
- * can be suspended).
- * @out.group_num: Number of CSGs supported.
- * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16
- * hold the size of firmware performance counter data
- * and 15:0 hold the size of hardware performance counter
- * data.
- * @out.total_stream_num: Total number of CSs, summed across all groups.
- * @out.padding: Will be zeroed.
- *
- *
- */
-union kbase_ioctl_cs_get_glb_iface {
- struct {
- __u32 max_group_num;
- __u32 max_total_stream_num;
- __u64 groups_ptr;
- __u64 streams_ptr;
- } in;
- struct {
- __u32 glb_version;
- __u32 features;
- __u32 group_num;
- __u32 prfcnt_size;
- __u32 total_stream_num;
- __u32 padding;
- } out;
-};
-
-#define KBASE_IOCTL_CS_GET_GLB_IFACE \
- _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface)
-
-struct kbase_ioctl_cs_cpu_queue_info {
- __u64 buffer;
- __u64 size;
-};
-
-#define KBASE_IOCTL_VERSION_CHECK \
- _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
-
-#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
- _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
-
-/***************
- * test ioctls *
- ***************/
-#if MALI_UNIT_TEST
-/* These ioctls are purely for test purposes and are not used in the production
- * driver, they therefore may change without notice
- */
-
-/**
- * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
- * @cpu_addr: Memory address to write
- * @value: Value to write
- * @padding: Currently unused, must be zero
- */
-struct kbase_ioctl_cs_event_memory_write {
- __u64 cpu_addr;
- __u8 value;
- __u8 padding[7];
-};
-
-/**
- * union kbase_ioctl_cs_event_memory_read - Read an event memory address
- * @in: Input parameters
- * @in.cpu_addr: Memory address to read
- * @out: Output parameters
- * @out.value: Value read
- * @out.padding: Currently unused, must be zero
- */
-union kbase_ioctl_cs_event_memory_read {
- struct {
- __u64 cpu_addr;
- } in;
- struct {
- __u8 value;
- __u8 padding[7];
- } out;
-};
-
-#endif /* MALI_UNIT_TEST */
-
-#endif /* _KBASE_CSF_IOCTL_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index e5aee61..1203d2c 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -257,7 +257,7 @@ static int kbase_kcpu_jit_allocate_process(
* No prior JIT_FREE command is active. Roll
* back previous allocations and fail.
*/
- dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %p\n", cmd);
+ dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd);
ret = -ENOMEM;
goto fail;
}
@@ -858,10 +858,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
"Sync memory %llx already freed", cqs_set->objs[i].addr);
queue->has_error = true;
} else {
- if (cqs_set->propagate_flags & (1 << i))
- evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
- else
- evt[BASEP_EVENT_ERR_INDEX] = false;
+ evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
/* Set to signaled */
evt[BASEP_EVENT_VAL_INDEX]++;
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
@@ -908,8 +905,267 @@ static int kbase_kcpu_cqs_set_prepare(
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET;
current_command->info.cqs_set.nr_objs = nr_objs;
current_command->info.cqs_set.objs = objs;
- current_command->info.cqs_set.propagate_flags =
- cqs_set_info->propagate_flags;
+
+ return 0;
+}
+
+static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue,
+ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation)
+{
+ WARN_ON(!cqs_wait_operation->nr_objs);
+ WARN_ON(!cqs_wait_operation->objs);
+ WARN_ON(!cqs_wait_operation->signaled);
+ WARN_ON(!queue->cqs_wait_count);
+
+ if (--queue->cqs_wait_count == 0) {
+ kbase_csf_event_wait_remove(queue->kctx,
+ event_cqs_callback, queue);
+ }
+
+ kfree(cqs_wait_operation->signaled);
+ kfree(cqs_wait_operation->objs);
+ cqs_wait_operation->signaled = NULL;
+ cqs_wait_operation->objs = NULL;
+}
+
+static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
+ struct kbase_kcpu_command_queue *queue,
+ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation)
+{
+ u32 i;
+
+ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+
+ if (WARN_ON(!cqs_wait_operation->objs))
+ return -EINVAL;
+
+ /* Skip the CQS waits that have already been signaled when processing */
+ for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) {
+ if (!test_bit(i, cqs_wait_operation->signaled)) {
+ struct kbase_vmap_struct *mapping;
+ bool sig_set;
+ u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx,
+ cqs_wait_operation->objs[i].addr, &mapping);
+
+ /* GPUCORE-28172 RDT to review */
+ if (!queue->command_started)
+ queue->command_started = true;
+
+ if (!evt) {
+ dev_warn(kbdev->dev,
+ "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr);
+ queue->has_error = true;
+ return -EINVAL;
+ }
+
+ switch (cqs_wait_operation->objs[i].operation) {
+ case BASEP_CQS_WAIT_OPERATION_LE:
+ sig_set = *evt <= cqs_wait_operation->objs[i].val;
+ break;
+ case BASEP_CQS_WAIT_OPERATION_GT:
+ sig_set = *evt > cqs_wait_operation->objs[i].val;
+ break;
+ default:
+ dev_warn(kbdev->dev,
+ "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation);
+
+ kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+ queue->has_error = true;
+
+ return -EINVAL;
+ }
+
+ /* Increment evt up to the error_state value depending on the CQS data type */
+ switch (cqs_wait_operation->objs[i].data_type) {
+ default:
+ dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
+ /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ case BASEP_CQS_DATA_TYPE_U32:
+ evt = (u64 *)((u8 *)evt + sizeof(u32));
+ break;
+ case BASEP_CQS_DATA_TYPE_U64:
+ evt = (u64 *)((u8 *)evt + sizeof(u64));
+ break;
+ }
+
+ if (sig_set) {
+ bitmap_set(cqs_wait_operation->signaled, i, 1);
+ if ((cqs_wait_operation->inherit_err_flags & (1U << i)) &&
+ *evt > 0) {
+ queue->has_error = true;
+ }
+
+ /* GPUCORE-28172 RDT to review */
+
+ queue->command_started = false;
+ }
+
+ kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+
+ if (!sig_set)
+ break;
+ }
+ }
+
+ /* For the queue to progress further, all cqs objects should get
+ * signaled.
+ */
+ return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs);
+}
+
+static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue,
+ struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info,
+ struct kbase_kcpu_command *current_command)
+{
+ struct base_cqs_wait_operation_info *objs;
+ unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
+
+ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+
+ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+ return -EINVAL;
+
+ if (!nr_objs)
+ return -EINVAL;
+
+ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
+ if (!objs)
+ return -ENOMEM;
+
+ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs),
+ nr_objs * sizeof(*objs))) {
+ kfree(objs);
+ return -ENOMEM;
+ }
+
+ if (++queue->cqs_wait_count == 1) {
+ if (kbase_csf_event_wait_add(queue->kctx,
+ event_cqs_callback, queue)) {
+ kfree(objs);
+ queue->cqs_wait_count--;
+ return -ENOMEM;
+ }
+ }
+
+ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION;
+ current_command->info.cqs_wait_operation.nr_objs = nr_objs;
+ current_command->info.cqs_wait_operation.objs = objs;
+ current_command->info.cqs_wait_operation.inherit_err_flags =
+ cqs_wait_operation_info->inherit_err_flags;
+
+ current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs),
+ sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL);
+ if (!current_command->info.cqs_wait_operation.signaled) {
+ if (--queue->cqs_wait_count == 0) {
+ kbase_csf_event_wait_remove(queue->kctx,
+ event_cqs_callback, queue);
+ }
+
+ kfree(objs);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void kbase_kcpu_cqs_set_operation_process(
+ struct kbase_device *kbdev,
+ struct kbase_kcpu_command_queue *queue,
+ struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation)
+{
+ unsigned int i;
+
+ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+
+ if (WARN_ON(!cqs_set_operation->objs))
+ return;
+
+ for (i = 0; i < cqs_set_operation->nr_objs; i++) {
+ struct kbase_vmap_struct *mapping;
+ u64 *evt;
+
+ evt = (u64 *)kbase_phy_alloc_mapping_get(
+ queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
+
+ /* GPUCORE-28172 RDT to review */
+
+ if (!evt) {
+ dev_warn(kbdev->dev,
+ "Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
+ queue->has_error = true;
+ } else {
+ switch (cqs_set_operation->objs[i].operation) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ *evt += cqs_set_operation->objs[i].val;
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ *evt = cqs_set_operation->objs[i].val;
+ break;
+ default:
+ dev_warn(kbdev->dev,
+ "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
+ queue->has_error = true;
+ break;
+ }
+
+ /* Increment evt up to the error_state value depending on the CQS data type */
+ switch (cqs_set_operation->objs[i].data_type) {
+ default:
+ dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
+ /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ case BASEP_CQS_DATA_TYPE_U32:
+ evt = (u64 *)((u8 *)evt + sizeof(u32));
+ break;
+ case BASEP_CQS_DATA_TYPE_U64:
+ evt = (u64 *)((u8 *)evt + sizeof(u64));
+ break;
+ }
+
+ /* GPUCORE-28172 RDT to review */
+
+ /* Always propagate errors */
+ *evt = queue->has_error;
+
+ kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+ }
+ }
+
+ kbase_csf_event_signal_notify_gpu(queue->kctx);
+
+ kfree(cqs_set_operation->objs);
+ cqs_set_operation->objs = NULL;
+}
+
+static int kbase_kcpu_cqs_set_operation_prepare(
+ struct kbase_kcpu_command_queue *kcpu_queue,
+ struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info,
+ struct kbase_kcpu_command *current_command)
+{
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+ struct base_cqs_set_operation_info *objs;
+ unsigned int nr_objs = cqs_set_operation_info->nr_objs;
+
+ lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+
+ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+ return -EINVAL;
+
+ if (!nr_objs)
+ return -EINVAL;
+
+ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
+ if (!objs)
+ return -ENOMEM;
+
+ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs),
+ nr_objs * sizeof(*objs))) {
+ kfree(objs);
+ return -ENOMEM;
+ }
+
+ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION;
+ current_command->info.cqs_set_operation.nr_objs = nr_objs;
+ current_command->info.cqs_set_operation.objs = objs;
return 0;
}
@@ -1365,6 +1621,28 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
&cmd->info.cqs_set);
break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+ status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue,
+ &cmd->info.cqs_wait_operation);
+
+ if (!status && !ignore_waits) {
+ process_next = false;
+ } else {
+ /* Either all CQS objects were signaled or
+ * there was an error or the queue itself is
+ * being deleted.
+ * In all cases can move to the next command.
+ * TBD: handle the error
+ */
+ cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation);
+ }
+
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+ kbase_kcpu_cqs_set_operation_process(kbdev, queue,
+ &cmd->info.cqs_set_operation);
+
+ break;
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
/* Clear the queue's error state */
queue->has_error = false;
@@ -1404,7 +1682,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
queue->kctx, NULL, cmd->info.import.gpu_va);
kbase_gpu_vm_unlock(queue->kctx);
- if (ret == false) {
+ if (!ret) {
queue->has_error = true;
dev_warn(kbdev->dev,
"failed to release the reference. resource not found\n");
@@ -1425,7 +1703,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
queue->kctx, NULL, cmd->info.import.gpu_va);
kbase_gpu_vm_unlock(queue->kctx);
- if (ret == false) {
+ if (!ret) {
queue->has_error = true;
dev_warn(kbdev->dev,
"failed to release the reference. resource not found\n");
@@ -1591,6 +1869,16 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
}
break;
}
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+ {
+ /* GPUCORE-28172 RDT to review */
+ break;
+ }
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+ {
+ /* GPUCORE-28172 RDT to review */
+ break;
+ }
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
queue);
@@ -1758,6 +2046,14 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
ret = kbase_kcpu_cqs_set_prepare(queue,
&command.info.cqs_set, kcpu_cmd);
break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+ ret = kbase_kcpu_cqs_wait_operation_prepare(queue,
+ &command.info.cqs_wait_operation, kcpu_cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+ ret = kbase_kcpu_cqs_set_operation_prepare(queue,
+ &command.info.cqs_set_operation, kcpu_cmd);
+ break;
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER;
ret = 0;
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index a528572..86aa7dc 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -69,13 +69,10 @@ struct kbase_kcpu_command_fence_info {
* @objs: Array of structures which define CQS objects to be used by
* the kcpu command.
* @nr_objs: Number of CQS objects in the array.
- * @propagate_flags: Bit-pattern for the CQSs in the array that are set
- * to propagate queue error-state to the flagged CQSs.
*/
struct kbase_kcpu_command_cqs_set_info {
struct base_cqs_set *objs;
unsigned int nr_objs;
- u32 propagate_flags;
};
/**
@@ -99,6 +96,36 @@ struct kbase_kcpu_command_cqs_wait_info {
};
/**
+ * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information
+ * about CQS objects for the kcpu CQS timeline set command
+ *
+ * @objs: Array of structures which define CQS timeline objects to be used by
+ * the kcpu command.
+ * @nr_objs: Number of CQS objects in the array.
+ */
+struct kbase_kcpu_command_cqs_set_operation_info {
+ struct base_cqs_set_operation_info *objs;
+ unsigned int nr_objs;
+};
+
+/**
+ * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information
+ * about CQS objects for the kcpu CQS timeline wait command
+ *
+ * @objs: Array of structures which define CQS timeline objects to be used by
+ * the kcpu command.
+ * @signaled: Bit array used to report the status of the CQS wait objects.
+ * 1 is signaled, 0 otherwise.
+ * @nr_objs: Number of CQS objects in the array.
+ */
+struct kbase_kcpu_command_cqs_wait_operation_info {
+ struct base_cqs_wait_operation_info *objs;
+ unsigned long *signaled;
+ unsigned int nr_objs;
+ u32 inherit_err_flags;
+};
+
+/**
* struct kbase_kcpu_command_jit_alloc_info - Structure which holds information
* needed for the kcpu command for jit allocations
*
@@ -200,6 +227,8 @@ struct kbase_kcpu_command {
struct kbase_kcpu_command_fence_info fence;
struct kbase_kcpu_command_cqs_wait_info cqs_wait;
struct kbase_kcpu_command_cqs_set_info cqs_set;
+ struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation;
+ struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation;
struct kbase_kcpu_command_import_info import;
struct kbase_kcpu_command_jit_alloc_info jit_alloc;
struct kbase_kcpu_command_jit_free_info jit_free;
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c
index 5c2e8e3..d59e77c 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -95,7 +95,7 @@ static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file,
struct kbase_sync_fence_info info;
kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
- seq_printf(file, ", Fence %p %s %s",
+ seq_printf(file, ", Fence %pK %s %s",
info.fence, info.name,
kbase_sync_status_string(info.status));
break;
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index b59ffd4..e8da0f3 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -307,6 +307,31 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
kfree(buf);
}
+/**
+ * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the
+ * event of an error during GPU reset.
+ * @kbdev: Pointer to KBase device
+ */
+static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ /* Treat this as an unrecoverable error for HWCNT */
+ kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
+
+ /* Re-enable counters to ensure matching enable/disable pair.
+ * This might reduce the hwcnt disable count to 0, and therefore
+ * trigger actual re-enabling of hwcnt.
+ * However, as the backend is now in the unrecoverable error state,
+ * re-enabling will immediately fail and put the context into the error
+ * state, preventing the hardware from being touched (which could have
+ * risked a hang).
+ */
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
bool firmware_inited, bool silent)
{
@@ -396,8 +421,10 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
mutex_unlock(&kbdev->pm.lock);
- if (WARN_ON(err))
- goto error;
+ if (WARN_ON(err)) {
+ kbase_csf_hwcnt_on_reset_error(kbdev);
+ return err;
+ }
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -414,40 +441,20 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
err = kbase_pm_wait_for_desired_state(kbdev);
mutex_unlock(&kbdev->pm.lock);
- if (err)
- goto error;
+ if (WARN_ON(err)) {
+ kbase_csf_hwcnt_on_reset_error(kbdev);
+ return err;
+ }
/* Re-enable GPU hardware counters */
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
if (!silent)
dev_err(kbdev->dev, "Reset complete");
return 0;
-error:
- WARN_ON(!err);
-
- /* If hardware init failed, we assume hardware counters will
- * not work and put the backend into the unrecoverable error
- * state.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
-
- /* Re-enable counters to ensure matching enable/disable pair.
- * This might reduce the hwcnt disable count to 0, and therefore
- * trigger actual re-enabling of hwcnt.
- * However, as the backend is now in the unrecoverable error state,
- * re-enabling will immediately fail and put the context into the error
- * state, preventing the hardware from being touched (which could have
- * risked a hang).
- */
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- return err;
}
static void kbase_csf_reset_gpu_worker(struct work_struct *data)
@@ -484,25 +491,29 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited);
}
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
{
+ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
+ kbase_hwcnt_backend_csf_on_unrecoverable_error(
+ &kbdev->hwcnt_gpu_iface);
+
if (atomic_cmpxchg(&kbdev->csf.reset.state,
KBASE_CSF_RESET_GPU_NOT_PENDING,
KBASE_CSF_RESET_GPU_PREPARED) !=
- KBASE_CSF_RESET_GPU_NOT_PENDING) {
+ KBASE_CSF_RESET_GPU_NOT_PENDING)
/* Some other thread is already resetting the GPU */
return false;
- }
return true;
}
KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
+ unsigned int flags)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
- return kbase_prepare_to_reset_gpu(kbdev);
+ return kbase_prepare_to_reset_gpu(kbdev, flags);
}
void kbase_reset_gpu(struct kbase_device *kbdev)
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index b9dc59c..84d6f81 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -28,8 +28,8 @@
#include "../tl/mali_kbase_tracepoints.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
#include <linux/export.h>
-#include "mali_gpu_csf_registers.h"
-#include <mali_base_kernel.h>
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -373,6 +373,45 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
}
/**
+ * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
+ *
+ * @kbdev: Pointer to the GPU device
+ *
+ * This function waits for the GPU to exit protected mode which is confirmed
+ * when active_protm_grp is set to NULL.
+ */
+static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+ long remaining;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ remaining = wait_event_timeout(kbdev->csf.event_wait,
+ !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
+
+ if (!remaining)
+ dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
+}
+
+/**
+ * scheduler_force_protm_exit() - Force GPU to exit protected mode.
+ *
+ * @kbdev: Pointer to the GPU device
+ *
+ * This function sends a ping request to the firmware and waits for the GPU
+ * to exit protected mode.
+ */
+static void scheduler_force_protm_exit(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ kbase_csf_firmware_ping(kbdev);
+ scheduler_wait_protm_quit(kbdev);
+}
+
+/**
* scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
* automatically for periodic tasks.
*
@@ -607,7 +646,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
if (!remaining) {
dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d",
csi_index, group->handle, group->csg_nr);
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
return -ETIMEDOUT;
@@ -629,26 +668,14 @@ static int halt_stream_sync(struct kbase_queue *queue)
(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
== CS_ACK_STATE_STOP), remaining);
- /* Queues that have failed to stop in time shall raise a fatal error
- * as their group would fail to suspend which could no longer be safely
- * resumed.
- */
if (!remaining) {
- unsigned long flags;
-
dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d",
queue->csi_index, group->handle, group->csg_nr);
- spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
- kbase_csf_add_queue_fatal_error(
- queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, 0);
- spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
- flags);
-
/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
* will be reset as a work-around.
*/
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
return (remaining) ? 0 : -ETIMEDOUT;
@@ -722,27 +749,6 @@ static int sched_halt_stream(struct kbase_queue *queue)
}
}
retry:
- /* First wait for the group to reach a stable state. IDLE state is
- * an intermediate state that is only set by Scheduler at the start
- * of a tick (prior to scanout) for groups that received idle
- * notification, then later the idle group is moved to one of the
- * suspended states or the runnable state.
- */
- while (group->run_state == KBASE_CSF_GROUP_IDLE) {
- mutex_unlock(&scheduler->lock);
- remaining = wait_event_timeout(kbdev->csf.event_wait,
- group->run_state !=
- KBASE_CSF_GROUP_IDLE,
- kbdev->csf.fw_timeout_ms);
- mutex_lock(&scheduler->lock);
- if (!remaining) {
- dev_warn(kbdev->dev,
- "Timed out waiting for state change of Group-%d when stopping a queue on csi %d",
- group->handle, queue->csi_index);
- }
- }
-
- WARN_ON(group->run_state == KBASE_CSF_GROUP_IDLE);
/* Update the group state so that it can get scheduled soon */
update_idle_suspended_group_state(group);
@@ -1559,7 +1565,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
lockdep_assert_held(&scheduler->lock);
- if (scheduler->state == SCHED_BUSY || scheduler->apply_async_protm) {
+ if (scheduler->state == SCHED_BUSY) {
/* active phase or, async entering the protected mode */
if (group->prepared_seq_num >=
scheduler->non_idle_scanout_grps) {
@@ -1731,7 +1737,6 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
/* The csg does not need cleanup other than drop its AS */
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT);
- WARN_ON(kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_NOT_PEND);
kbase_ctx_sched_release_ctx(kctx);
if (unlikely(group->faulted))
as_fault = true;
@@ -1779,11 +1784,12 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
ginfo = &kbdev->csf.global_iface.groups[slot];
+ /* CSGs remaining on-slot can be either idle or runnable.
+ * This also applies in protected mode.
+ */
WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
(group->run_state == KBASE_CSF_GROUP_IDLE)));
- group->run_state = KBASE_CSF_GROUP_RUNNABLE;
-
/* Update consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
@@ -1858,12 +1864,11 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_ctx_sched_retain_ctx(kctx);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- kbase_mmu_deferred_flush_invalidate(kctx);
mutex_unlock(&kbdev->mmu_hw_mutex);
if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
- dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
- group->handle, kctx->tgid, kctx->id, slot);
+ dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
+ group->handle, kctx->tgid, kctx->id, slot);
return;
}
@@ -1896,6 +1901,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
tiler_mask & U32_MAX);
+
ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
@@ -2043,7 +2049,7 @@ static int term_group_sync(struct kbase_queue_group *group)
if (!remaining) {
dev_warn(kbdev->dev, "term request timed out for group %d on slot %d",
group->handle, group->csg_nr);
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
err = -ETIMEDOUT;
}
@@ -2112,9 +2118,10 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&kctx->csf.lock);
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ lockdep_assert_held(&scheduler->lock);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
@@ -2125,8 +2132,39 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
update_idle_suspended_group_state(group);
- else
+ else {
+ struct kbase_queue_group *protm_grp;
+ unsigned long flags;
+
+ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
+ group));
+
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+
+ /* A normal mode CSG could be idle onslot during
+ * protected mode. In this case clear the
+ * appropriate bit in csg_slots_idle_mask.
+ */
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ protm_grp = scheduler->active_protm_grp;
+ if (protm_grp && protm_grp != group)
+ clear_bit((unsigned int)group->csg_nr,
+ scheduler->csg_slots_idle_mask);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock,
+ flags);
+
+ /* If GPU is in protected mode then any doorbells rang
+ * would have no effect. Check if GPU is in protected
+ * mode and if this group has higher priority than the
+ * active protected mode group. If so prompt the FW
+ * to exit protected mode.
+ */
+ if (protm_grp &&
+ group->scan_seq_num < protm_grp->scan_seq_num) {
+ /* Prompt the FW to exit protected mode */
+ scheduler_force_protm_exit(kbdev);
+ }
+ }
} else if (!queue_group_scheduled_locked(group)) {
insert_group_to_runnable(&kbdev->csf.scheduler, group,
KBASE_CSF_GROUP_RUNNABLE);
@@ -2511,7 +2549,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
*/
dev_warn(
kbdev->dev,
- "Group %p on slot %u failed to suspend\n",
+ "Group %pK on slot %u failed to suspend\n",
(void *)group, i);
/* The group has failed suspension, stop
@@ -2541,11 +2579,13 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
if (WARN_ON(i == num_groups))
break;
program_vacant_csg_slot(kbdev, (s8)i);
- if (WARN_ON(!csg_slot_in_use(kbdev, (int)i)))
+ if (!csg_slot_in_use(kbdev, (int)i)) {
+ dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i);
break;
+ }
}
} else {
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
}
@@ -2611,7 +2651,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n",
num_groups, slot_mask);
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
break;
}
@@ -3287,7 +3327,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
continue;
if (WARN_ON(!group))
continue;
- if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE))
+ if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE &&
+ group->run_state != KBASE_CSF_GROUP_IDLE))
continue;
if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
continue;
@@ -3295,7 +3336,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
if (group_on_slot_is_idle(kbdev, i)) {
group->run_state = KBASE_CSF_GROUP_IDLE;
set_bit(i, scheduler->csg_slots_idle_mask);
- }
+ } else
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
}
bitmap_or(scheduler->csg_slots_idle_mask,
@@ -3381,7 +3423,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
kbdev->csf.global_iface.group_num, slot_mask);
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
if (is_suspend) {
@@ -3526,21 +3568,6 @@ static int scheduler_prepare(struct kbase_device *kbdev)
return 0;
}
-static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
- long remaining;
-
- lockdep_assert_held(&scheduler->lock);
-
- remaining = wait_event_timeout(kbdev->csf.event_wait,
- !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
-
- if (!remaining)
- dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
-}
-
static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
@@ -3572,6 +3599,8 @@ static void schedule_actions(struct kbase_device *kbdev)
unsigned long flags;
struct kbase_queue_group *protm_grp;
int ret;
+ bool skip_idle_slots_update;
+ bool new_protm_top_grp = false;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&scheduler->lock);
@@ -3582,7 +3611,14 @@ static void schedule_actions(struct kbase_device *kbdev)
return;
}
- scheduler_handle_idle_slots(kbdev);
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ /* Skip updating on-slot idle CSGs if GPU is in protected mode. */
+ if (!skip_idle_slots_update)
+ scheduler_handle_idle_slots(kbdev);
+
scheduler_prepare(kbdev);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
@@ -3613,12 +3649,12 @@ static void schedule_actions(struct kbase_device *kbdev)
scheduler->top_grp->kctx->tgid,
scheduler->top_grp->kctx->id);
- /* Due to GPUCORE-24491 only the top-group is allowed
- * to be on slot and all other on slot groups have to
- * be suspended before entering protected mode.
- * This would change in GPUCORE-24492.
+ /* When entering protected mode all CSG slots can be occupied
+ * but only the protected mode CSG will be running. Any event
+ * that would trigger the execution of an on-slot idle CSG will
+ * need to be handled by the host during protected mode.
*/
- scheduler->num_csg_slots_for_tick = 1;
+ new_protm_top_grp = true;
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
@@ -3635,12 +3671,12 @@ static void schedule_actions(struct kbase_device *kbdev)
* locked in the secure mode.
*/
if (protm_grp)
- scheduler_wait_protm_quit(kbdev);
+ scheduler_force_protm_exit(kbdev);
wait_csg_slots_start(kbdev);
wait_csg_slots_finish_prio_update(kbdev);
- if (scheduler->num_csg_slots_for_tick == 1) {
+ if (new_protm_top_grp) {
scheduler_group_check_protm_enter(kbdev,
scheduler->top_grp);
}
@@ -3913,8 +3949,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
- if (!kbase_csf_scheduler_protected_mode_in_use(kbdev) &&
- !suspend_active_queue_groups_on_reset(kbdev)) {
+ if (!suspend_active_queue_groups_on_reset(kbdev)) {
/* As all groups have been successfully evicted from the CSG
* slots, clear out thee scheduler data fields and return
*/
@@ -4002,21 +4037,14 @@ static void firmware_aliveness_monitor(struct work_struct *work)
kbase_pm_wait_for_desired_state(kbdev);
- err = kbase_csf_firmware_ping(kbdev);
+ err = kbase_csf_firmware_ping_wait(kbdev);
if (err) {
- /* FW not responding means hardware counters will stop working.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(
- &kbdev->hwcnt_gpu_iface);
-
/* It is acceptable to enqueue a reset whilst we've prevented
* them, it will happen after we've allowed them again
*/
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(
+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
} else if (get_nr_active_csgs(kbdev) == 1) {
queue_delayed_work(system_long_wq,
@@ -4132,7 +4160,9 @@ static bool group_sync_updated(struct kbase_queue_group *group)
bool updated = false;
int stream;
- WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
+ /* Groups can also be blocked on-slot during protected mode. */
+ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC &&
+ group->run_state != KBASE_CSF_GROUP_IDLE);
for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) {
struct kbase_queue *const queue = group->bound_queues[stream];
@@ -4233,40 +4263,159 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
mutex_lock(&scheduler->lock);
- /* Check if the group is now eligible for execution in protected mode
- * and accordingly undertake full scheduling actions as due to
- * GPUCORE-24491 the on slot groups other than the top group have to
- * be suspended first before entering protected mode.
- */
- if (scheduler_get_protm_enter_async_group(kbdev, group)) {
- scheduler->apply_async_protm = true;
- schedule_actions(kbdev);
- scheduler->apply_async_protm = false;
- }
+ /* Check if the group is now eligible for execution in protected mode. */
+ if (scheduler_get_protm_enter_async_group(kbdev, group))
+ scheduler_group_check_protm_enter(kbdev, group);
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
}
/**
+ * check_sync_update_for_idle_group_protm() - Check the sync wait condition
+ * for all the queues bound to
+ * the given group.
+ *
+ * @group: Pointer to the group that requires evaluation.
+ *
+ * This function is called if the GPU is in protected mode and there are on
+ * slot idle groups with higher priority than the active protected mode group.
+ * This function will evaluate the sync condition, if any, of all the queues
+ * bound to the given group.
+ *
+ * Return true if the sync condition of at least one queue has been satisfied.
+ */
+static bool check_sync_update_for_idle_group_protm(
+ struct kbase_queue_group *group)
+{
+ struct kbase_device *const kbdev = group->kctx->kbdev;
+ struct kbase_csf_scheduler *const scheduler =
+ &kbdev->csf.scheduler;
+ bool sync_update_done = false;
+ int i;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
+ struct kbase_queue *queue = group->bound_queues[i];
+
+ if (queue && queue->enabled && !sync_update_done) {
+ struct kbase_csf_cmd_stream_group_info *const ginfo =
+ &kbdev->csf.global_iface.groups[group->csg_nr];
+ struct kbase_csf_cmd_stream_info *const stream =
+ &ginfo->streams[queue->csi_index];
+ u32 status = kbase_csf_firmware_cs_output(
+ stream, CS_STATUS_WAIT);
+ unsigned long flags;
+
+ if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
+ continue;
+
+ /* Save the information of sync object of the command
+ * queue so the callback function, 'group_sync_updated'
+ * can evaluate the sync object when it gets updated
+ * later.
+ */
+ queue->status_wait = status;
+ queue->sync_ptr = kbase_csf_firmware_cs_output(
+ stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
+ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(
+ stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
+ queue->sync_value = kbase_csf_firmware_cs_output(
+ stream, CS_STATUS_WAIT_SYNC_VALUE);
+
+ if (!evaluate_sync_update(queue))
+ continue;
+
+ /* Update csg_slots_idle_mask and group's run_state */
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ clear_bit((unsigned int)group->csg_nr,
+ scheduler->csg_slots_idle_mask);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock,
+ flags);
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
+ sync_update_done = true;
+ }
+ }
+
+ return sync_update_done;
+}
+
+/**
+ * check_sync_update_for_idle_groups_protm() - Check the sync wait condition
+ * for the idle groups on slot
+ * during protected mode.
+ *
+ * @kbdev: Pointer to the GPU device
+ *
+ * This function checks the gpu queues of all the idle groups on slot during
+ * protected mode that has a higher priority than the active protected mode
+ * group.
+ *
+ * Return true if the sync condition of at least one queue in a group has been
+ * satisfied.
+ */
+static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ struct kbase_queue_group *protm_grp;
+ bool exit_protm = false;
+ unsigned long flags;
+ u32 num_groups;
+ u32 i;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ protm_grp = scheduler->active_protm_grp;
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ if (!protm_grp)
+ return exit_protm;
+
+ num_groups = kbdev->csf.global_iface.group_num;
+
+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
+ struct kbase_csf_csg_slot *csg_slot =
+ &scheduler->csg_slots[i];
+ struct kbase_queue_group *group = csg_slot->resident_group;
+
+ if (group->scan_seq_num < protm_grp->scan_seq_num) {
+ /* If sync update has been performed for the group that
+ * has a higher priority than the protm group, then we
+ * need to exit protected mode.
+ */
+ if (check_sync_update_for_idle_group_protm(group))
+ exit_protm = true;
+ }
+ }
+
+ return exit_protm;
+}
+
+/**
* check_group_sync_update_worker() - Check the sync wait condition for all the
* blocked queue groups
*
* @work: Pointer to the context-specific work item for evaluating the wait
* condition for all the queue groups in idle_wait_groups list.
*
- * This function checks the gpu queues of all the groups present in
- * idle_wait_groups list of a context. If the sync wait condition
- * for at least one queue bound to the group has been satisfied then
- * the group is moved to the per context list of runnable groups so
- * that Scheduler can consider scheduling the group in next tick.
+ * This function checks the gpu queues of all the groups present in both
+ * idle_wait_groups list of a context and all on slot idle groups (if GPU
+ * is in protected mode).
+ * If the sync wait condition for at least one queue bound to the group has
+ * been satisfied then the group is moved to the per context list of
+ * runnable groups so that Scheduler can consider scheduling the group
+ * in next tick or exit protected mode.
*/
static void check_group_sync_update_worker(struct work_struct *work)
{
struct kbase_context *const kctx = container_of(work,
struct kbase_context, csf.sched.sync_update_work);
- struct kbase_csf_scheduler *const scheduler =
- &kctx->kbdev->csf.scheduler;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
mutex_lock(&scheduler->lock);
@@ -4280,13 +4429,16 @@ static void check_group_sync_update_worker(struct work_struct *work)
* groups list of the context.
*/
update_idle_suspended_group_state(group);
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
}
}
} else {
WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
}
+ if (check_sync_update_for_idle_groups_protm(kbdev))
+ scheduler_force_protm_exit(kbdev);
+
mutex_unlock(&scheduler->lock);
}
@@ -4402,7 +4554,6 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
scheduler->tock_pending_request = false;
scheduler->active_protm_grp = NULL;
scheduler->gpu_idle_fw_timer_enabled = false;
- scheduler->apply_async_protm = false;
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
scheduler_doorbell_init(kbdev);
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 20d1bc9..1607ff6 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -125,7 +125,7 @@ struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
* kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue
* group from the firmware.
*
- * @group: Pointer to the queue group to be scheduled.
+ * @group: Pointer to the queue group to be descheduled.
*
* This function would disable the scheduling of GPU command queue group on
* firmware.
@@ -174,7 +174,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx);
int kbase_csf_scheduler_init(struct kbase_device *kbdev);
/**
- * kbase_csf_scheduler_context_init() - Terminate the context-specific part
+ * kbase_csf_scheduler_context_term() - Terminate the context-specific part
* for CSF scheduler.
*
* @kctx: Pointer to kbase context that is being terminated.
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 0b4fb5a..9e4ed17 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -596,14 +596,14 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
if (likely(heap)) {
err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
new_chunk_ptr);
- }
- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
- kctx->kbdev, kctx->id, heap->heap_id,
- PFN_UP(heap->chunk_size * heap->max_chunks),
- PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks,
- heap->chunk_size, heap->chunk_count, heap->target_in_flight,
- nr_in_flight);
+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
+ kctx->kbdev, kctx->id, heap->heap_id,
+ PFN_UP(heap->chunk_size * heap->max_chunks),
+ PFN_UP(heap->chunk_size * heap->chunk_count),
+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
+ heap->target_in_flight, nr_in_flight);
+ }
mutex_unlock(&kctx->csf.tiler_heaps.lock);
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index 7e9eb75..afcc90b 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -289,10 +289,6 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
trace_buffer->trace_enable_entry_count = entry[6];
trace_buffer->num_pages = trace_buffer_data[i].size;
- /* Temporary workaround until handled by GPUCORE-27330 */
- if (!strcmp(trace_buffer_data[i].name, "timeline"))
- trace_buffer->updatable = 0;
-
for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) {
trace_buffer->trace_enable_init_mask[j] =
trace_buffer_data[i].trace_enable_init_mask[j];
@@ -456,6 +452,7 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
dev_warn(
kbdev->dev,
"GPU reset already in progress when enabling firmware timeline.");
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return -EAGAIN;
}
}
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index f657bcb..cb2c2e2 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -23,6 +23,7 @@
#include "../mali_kbase_device.h"
#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <csf/mali_kbase_csf.h>
@@ -170,6 +171,77 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
kbase_hwaccess_pm_term(kbdev);
}
+/**
+ * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend
+ * firmware interface.
+ * @kbdev: Device pointer
+ */
+static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev)
+{
+ return kbase_hwcnt_backend_csf_if_fw_create(
+ kbdev, &kbdev->hwcnt_backend_csf_if_fw);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend
+ * firmware interface.
+ * @kbdev: Device pointer
+ */
+static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
+{
+ kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend.
+ * @kbdev: Device pointer
+ */
+
+static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
+{
+ return kbase_hwcnt_backend_csf_create(
+ &kbdev->hwcnt_backend_csf_if_fw,
+ KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT,
+ &kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend.
+ * @kbdev: Device pointer
+ */
+static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
+{
+ kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_metadata_init - Initialize hardware counter
+ * metadata.
+ * @kbdev: Device pointer
+ */
+static int
+kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev)
+{
+ /* For CSF GPUs, HWC metadata needs to query information from CSF
+ * firmware, so the initialization of HWC metadata only can be called
+ * after firmware initialized, but firmware initialization depends on
+ * HWC backend initialization, so we need to separate HWC backend
+ * metadata initialization from HWC backend initialization.
+ */
+ return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_metadata_term - Terminate hardware counter
+ * metadata.
+ * @kbdev: Device pointer
+ */
+static void
+kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev)
+{
+ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
+}
+
static const struct kbase_device_init dev_init[] = {
#ifdef CONFIG_MALI_NO_MALI
{kbase_gpu_device_create, kbase_gpu_device_destroy,
@@ -244,12 +316,10 @@ static const struct kbase_device_init dev_init[] = {
* paragraph that starts with "Word of warning", currently the
* second-last paragraph.
*/
- {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"},
+ {kbase_sysfs_init, kbase_sysfs_term,
+ "SysFS group creation failed"},
{kbase_device_misc_register, kbase_device_misc_deregister,
"Misc device registration failed"},
-#ifdef CONFIG_MALI_BUSLOG
- {buslog_init, buslog_term, "Bus log client registration failed"},
-#endif
{kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed"},
#endif
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 4d11a82..259e42a 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -69,17 +69,9 @@ static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev)
if (!as_valid || (as_nr == MCU_AS_NR)) {
kbase_report_gpu_fault(kbdev, status, as_nr, as_valid);
- /* MCU bus fault could mean hardware counters will stop
- * working.
- * Put the backend into the unrecoverable error state to
- * cause current and subsequent counter operations to
- * immediately fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(
- &kbdev->hwcnt_gpu_iface);
-
dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n");
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(
+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
} else {
/* Handle Bus fault */
@@ -133,16 +125,8 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
}
kbase_csf_scheduler_spin_unlock(kbdev, flags);
- /* Protected fault means we're unlikely to have the counter
- * operations we might do during reset acknowledged.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(
- &kbdev->hwcnt_gpu_iface);
-
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(
+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 8052fba..9301310 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -21,6 +21,7 @@
#include "../mali_kbase_device_internal.h"
#include "../mali_kbase_device.h"
+#include "../mali_kbase_hwaccess_instr.h"
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_hwaccess_backend.h>
@@ -107,6 +108,7 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
return 0;
fail_update_l2_features:
+ kbase_backend_devfreq_term(kbdev);
fail_devfreq_init:
kbase_job_slot_term(kbdev);
fail_job_slot:
@@ -144,6 +146,16 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
kbase_hwaccess_pm_term(kbdev);
}
+static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
+{
+ return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
+}
+
+static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
+{
+ kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
+}
+
static const struct kbase_device_init dev_init[] = {
#ifdef CONFIG_MALI_NO_MALI
{kbase_gpu_device_create, kbase_gpu_device_destroy,
@@ -183,6 +195,8 @@ static const struct kbase_device_init dev_init[] = {
{kbase_clk_rate_trace_manager_init,
kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed"},
+ {kbase_instr_backend_init, kbase_instr_backend_term,
+ "Instrumentation backend initialization failed"},
{kbase_device_hwcnt_backend_jm_init,
kbase_device_hwcnt_backend_jm_term,
"GPU hwcnt backend creation failed"},
@@ -215,9 +229,6 @@ static const struct kbase_device_init dev_init[] = {
{kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"},
{kbase_device_misc_register, kbase_device_misc_deregister,
"Misc device registration failed"},
-#ifdef CONFIG_MALI_BUSLOG
- {buslog_init, buslog_term, "Bus log client registration failed"},
-#endif
{kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed"},
#endif
@@ -254,7 +265,8 @@ int kbase_device_init(struct kbase_device *kbdev)
for (i = 0; i < ARRAY_SIZE(dev_init); i++) {
err = dev_init[i].init(kbdev);
if (err) {
- dev_err(kbdev->dev, "%s error = %d\n",
+ if (err != -EPROBE_DEFER)
+ dev_err(kbdev->dev, "%s error = %d\n",
dev_init[i].err_mes, err);
kbase_device_term_partial(kbdev, i);
break;
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index a90c8cd..5e900d0 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -40,9 +40,6 @@
#include <tl/mali_kbase_timeline.h>
#include "mali_kbase_vinstr.h"
-#if MALI_USE_CSF
-#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
-#endif
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_virtualizer.h"
@@ -227,10 +224,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
if (err)
goto dma_set_mask_failed;
-#if !MALI_USE_CSF
- spin_lock_init(&kbdev->hwcnt.lock);
-#endif
-
err = kbase_ktrace_init(kbdev);
if (err)
goto term_as;
@@ -241,20 +234,11 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
atomic_set(&kbdev->ctx_num, 0);
-#if !MALI_USE_CSF
- err = kbase_instr_backend_init(kbdev);
- if (err)
- goto term_trace;
-#endif
-
kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
- kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
- else
- kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
mutex_init(&kbdev->kctx_list_lock);
INIT_LIST_HEAD(&kbdev->kctx_list);
@@ -263,11 +247,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
return 0;
-#if !MALI_USE_CSF
-term_trace:
- kbase_ktrace_term(kbdev);
-#endif
-
term_as:
kbase_device_all_as_term(kbdev);
dma_set_mask_failed:
@@ -285,10 +264,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
kbase_debug_assert_register_hook(NULL, NULL);
#endif
-#if !MALI_USE_CSF
- kbase_instr_backend_term(kbdev);
-#endif
-
kbase_ktrace_term(kbdev);
kbase_device_all_as_term(kbdev);
@@ -311,60 +286,6 @@ void kbase_increment_device_id(void)
kbase_dev_nr++;
}
-#if MALI_USE_CSF
-
-int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev)
-{
- return kbase_hwcnt_backend_csf_if_fw_create(
- kbdev, &kbdev->hwcnt_backend_csf_if_fw);
-}
-
-void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
-{
- kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw);
-}
-
-int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
-{
- return kbase_hwcnt_backend_csf_create(
- &kbdev->hwcnt_backend_csf_if_fw,
- KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT,
- &kbdev->hwcnt_gpu_iface);
-}
-
-void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
-{
- kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface);
-}
-
-int kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev)
-{
- /* For CSF GPUs, HWC metadata needs to query informatoin from CSF
- * firmware, so the initialization of HWC metadata only can be called
- * after firmware initialised, but firmware initialization depends on
- * HWC backend initialization, so we need to separate HWC backend
- * metadata initialization from HWC backend initialization.
- */
- return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface);
-}
-
-void kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev)
-{
- kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
-}
-#else
-
-int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
-{
- return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
-}
-
-void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
-{
- kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
-}
-#endif /* MALI_USE_CSF */
-
int kbase_device_hwcnt_context_init(struct kbase_device *kbdev)
{
return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface,
@@ -484,7 +405,14 @@ int kbase_device_early_init(struct kbase_device *kbdev)
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ if (kbdev->arb.arb_if)
+ err = kbase_arbiter_pm_install_interrupts(kbdev);
+ else
+ err = kbase_install_interrupts(kbdev);
+#else
err = kbase_install_interrupts(kbdev);
+#endif
if (err)
goto fail_interrupts;
diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h
index 2705e67..067f33c 100644
--- a/mali_kbase/device/mali_kbase_device_internal.h
+++ b/mali_kbase/device/mali_kbase_device_internal.h
@@ -42,18 +42,6 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev);
int kbase_device_timeline_init(struct kbase_device *kbdev);
void kbase_device_timeline_term(struct kbase_device *kbdev);
-#if MALI_USE_CSF
-int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev);
-int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev);
-int kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev);
-#else
-int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev);
-#endif
-
int kbase_device_hwcnt_context_init(struct kbase_device *kbdev);
void kbase_device_hwcnt_context_term(struct kbase_device *kbdev);
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index fa70afc..16eae0a 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
#include <mali_kbase.h>
-#include "csf/mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
#include "../mali_kbase_gpu_fault.h"
const char *kbase_gpu_exception_name(u32 const exception_code)
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
deleted file mode 100644
index 65a06d2..0000000
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ /dev/null
@@ -1,334 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_GPU_REGMAP_CSF_H_
-#define _KBASE_GPU_REGMAP_CSF_H_
-
-#if !MALI_USE_CSF
-#error "Cannot be compiled with JM"
-#endif
-
-/* IPA control registers */
-
-#define IPA_CONTROL_BASE 0x40000
-#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r))
-#define COMMAND 0x000 /* (WO) Command register */
-#define STATUS 0x004 /* (RO) Status register */
-#define TIMER 0x008 /* (RW) Timer control register */
-
-#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */
-#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */
-#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */
-#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */
-#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */
-#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */
-#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */
-#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */
-
-/* Accumulated counter values for CS hardware */
-#define VALUE_CSHW_BASE 0x100
-#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-
-/* Accumulated counter values for memory system */
-#define VALUE_MEMSYS_BASE 0x140
-#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-
-#define VALUE_TILER_BASE 0x180
-#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-
-#define VALUE_SHADER_BASE 0x1C0
-#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-
-
-#include "csf/mali_gpu_csf_control_registers.h"
-
-/* Set to implementation defined, outer caching */
-#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
-/* Set to write back memory, outer caching */
-#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull
-/* Set to inner non-cacheable, outer-non-cacheable
- * Setting defined by the alloc bits is ignored, but set to a valid encoding:
- * - no-alloc on read
- * - no alloc on write
- */
-#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull
-/* Set to shared memory, that is inner cacheable on ACE and inner or outer
- * shared, otherwise inner non-cacheable.
- * Outer cacheable if inner or outer shared, otherwise outer non-cacheable.
- */
-#define AS_MEMATTR_AARCH64_SHARED 0x8ull
-
-/* Symbols for default MEMATTR to use
- * Default is - HW implementation defined caching
- */
-#define AS_MEMATTR_INDEX_DEFAULT 0
-#define AS_MEMATTR_INDEX_DEFAULT_ACE 3
-
-/* HW implementation defined caching */
-#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
-/* Force cache on */
-#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1
-/* Write-alloc */
-#define AS_MEMATTR_INDEX_WRITE_ALLOC 2
-/* Outer coherent, inner implementation defined policy */
-#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3
-/* Outer coherent, write alloc inner */
-#define AS_MEMATTR_INDEX_OUTER_WA 4
-/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
-#define AS_MEMATTR_INDEX_NON_CACHEABLE 5
-/* Normal memory, shared between MCU and Host */
-#define AS_MEMATTR_INDEX_SHARED 6
-
-/* Configuration bits for the CSF. */
-#define CSF_CONFIG 0xF00
-
-/* CSF_CONFIG register */
-#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2
-
-/* GPU control registers */
-#define CORE_FEATURES 0x008 /* () Shader Core Features */
-#define MCU_CONTROL 0x700
-#define MCU_STATUS 0x704
-
-#define MCU_CNTRL_ENABLE (1 << 0)
-#define MCU_CNTRL_AUTO (1 << 1)
-#define MCU_CNTRL_DISABLE (0)
-
-#define MCU_STATUS_HALTED (1 << 1)
-
-#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
- * region base address, low word
- */
-#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
- * region base address, high word
- */
-#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter
- * configuration
- */
-
-#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter
- * enable for CS Hardware
- */
-
-#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
- * flags for shader cores
- */
-#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
- * flags for tiler
- */
-#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
- * flags for MMU/L2 cache
- */
-
-/* JOB IRQ flags */
-#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */
-
-/* GPU_COMMAND codes */
-#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */
-#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */
-#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */
-#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */
-#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */
-#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */
-#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */
-#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */
-
-/* GPU_COMMAND_RESET payloads */
-
-/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state.
- * Power domains will remain powered on.
- */
-#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00
-
-/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and
- * idle state.
- */
-#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01
-
-/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave
- * the system bus in an inconsistent state. Use only as a last resort when nothing else works.
- */
-#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02
-
-/* GPU_COMMAND_PRFCNT payloads */
-#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */
-#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */
-
-/* GPU_COMMAND_TIME payloads */
-#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */
-#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */
-
-/* GPU_COMMAND_FLUSH_CACHES payloads */
-#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */
-#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */
-
-/* GPU_COMMAND command + payload */
-#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
- ((u32)opcode | ((u32)payload << 8))
-
-/* Final GPU_COMMAND form */
-/* No operation, nothing happens */
-#define GPU_COMMAND_NOP \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0)
-
-/* Stop all external bus interfaces, and then reset the entire GPU. */
-#define GPU_COMMAND_SOFT_RESET \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET)
-
-/* Immediately reset the entire GPU. */
-#define GPU_COMMAND_HARD_RESET \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET)
-
-/* Clear all performance counters, setting them all to zero. */
-#define GPU_COMMAND_PRFCNT_CLEAR \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR)
-
-/* Sample all performance counters, writing them out to memory */
-#define GPU_COMMAND_PRFCNT_SAMPLE \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE)
-
-/* Starts the cycle counter, and system timestamp propagation */
-#define GPU_COMMAND_CYCLE_COUNT_START \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE)
-
-/* Stops the cycle counter, and system timestamp propagation */
-#define GPU_COMMAND_CYCLE_COUNT_STOP \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
-
-/* Clean all caches */
-#define GPU_COMMAND_CLEAN_CACHES \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN)
-
-/* Clean and invalidate all caches */
-#define GPU_COMMAND_CLEAN_INV_CACHES \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE)
-
-/* Places the GPU in protected mode */
-#define GPU_COMMAND_SET_PROTECTED_MODE \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0)
-
-/* Halt CSF */
-#define GPU_COMMAND_FINISH_HALT \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0)
-
-/* Clear GPU faults */
-#define GPU_COMMAND_CLEAR_FAULT \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0)
-
-/* End Command Values */
-
-/* GPU_FAULTSTATUS register */
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul)
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
- (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \
- >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
-#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
-#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \
- (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT)
-
-#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10
-#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \
- (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT)
-
-#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11
-#define GPU_FAULTSTATUS_JASID_VALID_FLAG \
- (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
-
-#define GPU_FAULTSTATUS_JASID_SHIFT 12
-#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT)
-#define GPU_FAULTSTATUS_JASID_GET(reg_val) \
- (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT)
-#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \
- (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \
- (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK))
-
-#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16
-#define GPU_FAULTSTATUS_SOURCE_ID_MASK \
- (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT)
-/* End GPU_FAULTSTATUS register */
-
-/* GPU_FAULTSTATUS_ACCESS_TYPE values */
-#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0
-#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1
-#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2
-#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3
-/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */
-
-/* Implementation-dependent exception codes used to indicate CSG
- * and CS errors that are not specified in the specs.
- */
-#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((u8)0x70)
-#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((u8)0x71)
-#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((u8)0x72)
-
-/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89
-#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A
-/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */
-
-#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10)
-#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
-#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \
- (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
-#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \
- (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \
- (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK))
-
-/* IRQ flags */
-#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
-#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */
-#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
-#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
-#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
-#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
-#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */
-#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */
-
-/*
- * In Debug build,
- * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ
- * by writing it onto GPU_IRQ_CLEAR/MASK registers.
- *
- * In Release build,
- * GPU_IRQ_REG_COMMON is used.
- *
- * Note:
- * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
- * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON
- * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen
- */
-#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \
- | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)
-
-/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
-#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */
-
-#endif /* _KBASE_GPU_REGMAP_CSF_H_ */
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
deleted file mode 100644
index 1669d5a..0000000
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_GPU_REGMAP_JM_H_
-#define _KBASE_GPU_REGMAP_JM_H_
-
-#if MALI_USE_CSF
-#error "Cannot be compiled with CSF"
-#endif
-
-/* Set to implementation defined, outer caching */
-#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
-/* Set to write back memory, outer caching */
-#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull
-/* Set to inner non-cacheable, outer-non-cacheable
- * Setting defined by the alloc bits is ignored, but set to a valid encoding:
- * - no-alloc on read
- * - no alloc on write
- */
-#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull
-
-/* Symbols for default MEMATTR to use
- * Default is - HW implementation defined caching
- */
-#define AS_MEMATTR_INDEX_DEFAULT 0
-#define AS_MEMATTR_INDEX_DEFAULT_ACE 3
-
-/* HW implementation defined caching */
-#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
-/* Force cache on */
-#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1
-/* Write-alloc */
-#define AS_MEMATTR_INDEX_WRITE_ALLOC 2
-/* Outer coherent, inner implementation defined policy */
-#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3
-/* Outer coherent, write alloc inner */
-#define AS_MEMATTR_INDEX_OUTER_WA 4
-/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
-#define AS_MEMATTR_INDEX_NON_CACHEABLE 5
-
-/* GPU control registers */
-
-#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */
-#define JS_PRESENT 0x01C /* (RO) Job slots present */
-#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest
- * clean-and-invalidate operation
- */
-
-#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
- * region base address, low word
- */
-#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
- * region base address, high word
- */
-#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter
- * configuration
- */
-#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable
- * flags for Job Manager
- */
-#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
- * flags for shader cores
- */
-#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
- * flags for tiler
- */
-#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
- * flags for MMU/L2 cache
- */
-
-#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */
-#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */
-#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */
-#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */
-#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */
-#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */
-#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */
-#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */
-#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */
-#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */
-#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */
-#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */
-#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */
-#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */
-#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */
-#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */
-
-#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
-
-#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */
-
-/* Job control registers */
-
-#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
-#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
-
-#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
-#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
-#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
-#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
-#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */
-#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */
-#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */
-#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */
-#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */
-#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */
-#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */
-#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */
-#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */
-#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */
-#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
-#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
-
-#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
-#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
-#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
-#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
-#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
-#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
-#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
-#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
-/* (RO) Extended affinity mask for job slot n*/
-#define JS_XAFFINITY 0x1C
-
-#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
-#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
-
-#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
-#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
-
-#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
-#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
-#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
-/* (RW) Next extended affinity mask for job slot n */
-#define JS_XAFFINITY_NEXT 0x5C
-
-#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
-
-#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
-
-/* No JM-specific MMU control registers */
-/* No JM-specific MMU address space control registers */
-
-/* JS_COMMAND register commands */
-#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */
-#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */
-#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */
-#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */
-#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
-#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
-
-#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */
-
-/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
-#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0)
-#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8)
-#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
-#define JS_CONFIG_START_MMU (1u << 10)
-#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11)
-#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION
-#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12)
-#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12)
-#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14)
-#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15)
-#define JS_CONFIG_THREAD_PRI(n) ((n) << 16)
-
-/* JS_XAFFINITY register values */
-#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
-#define JS_XAFFINITY_TILER_ENABLE (1u << 8)
-#define JS_XAFFINITY_CACHE_ENABLE (1u << 16)
-
-/* JS_STATUS register values */
-
-/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
- * The values are separated to avoid dependency of userspace and kernel code.
- */
-
-/* Group of values representing the job status instead of a particular fault */
-#define JS_STATUS_NO_EXCEPTION_BASE 0x00
-#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */
-#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */
-#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */
-
-/* General fault values */
-#define JS_STATUS_FAULT_BASE 0x40
-#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */
-#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */
-#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */
-#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */
-#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */
-#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */
-
-/* Instruction or data faults */
-#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50
-#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */
-#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */
-#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */
-#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */
-#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */
-#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */
-#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */
-/* NOTE: No fault with 0x57 code defined in spec. */
-#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */
-#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */
-#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */
-
-/* Other faults */
-#define JS_STATUS_MEMORY_FAULT_BASE 0x60
-#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */
-#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */
-
-/* JS<n>_FEATURES register */
-#define JS_FEATURE_NULL_JOB (1u << 1)
-#define JS_FEATURE_SET_VALUE_JOB (1u << 2)
-#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3)
-#define JS_FEATURE_COMPUTE_JOB (1u << 4)
-#define JS_FEATURE_VERTEX_JOB (1u << 5)
-#define JS_FEATURE_GEOMETRY_JOB (1u << 6)
-#define JS_FEATURE_TILER_JOB (1u << 7)
-#define JS_FEATURE_FUSED_JOB (1u << 8)
-#define JS_FEATURE_FRAGMENT_JOB (1u << 9)
-
-/* JM_CONFIG register */
-#define JM_TIMESTAMP_OVERRIDE (1ul << 0)
-#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
-#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
-#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
-#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
-#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
-
-/* GPU_COMMAND values */
-#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */
-#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
-#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */
-#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */
-#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */
-#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */
-#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */
-#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */
-#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */
-#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
-
-/* IRQ flags */
-#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
-#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
-#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
-#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
-#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
-#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
-#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
-
-/*
- * In Debug build,
- * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ
- * by writing it onto GPU_IRQ_CLEAR/MASK registers.
- *
- * In Release build,
- * GPU_IRQ_REG_COMMON is used.
- *
- * Note:
- * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
- */
-#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
- | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
-
-#endif /* _KBASE_GPU_REGMAP_JM_H_ */
diff --git a/mali_kbase/gpu/mali_kbase_gpu.h b/mali_kbase/gpu/mali_kbase_gpu.h
deleted file mode 100644
index dba0e28..0000000
--- a/mali_kbase/gpu/mali_kbase_gpu.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_GPU_H_
-#define _KBASE_GPU_H_
-
-#include "mali_kbase_gpu_regmap.h"
-#include "mali_kbase_gpu_fault.h"
-#include "mali_kbase_gpu_coherency.h"
-#include "mali_kbase_gpu_id.h"
-
-#endif /* _KBASE_GPU_H_ */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_coherency.h b/mali_kbase/gpu/mali_kbase_gpu_coherency.h
deleted file mode 100644
index a075ed0..0000000
--- a/mali_kbase/gpu/mali_kbase_gpu_coherency.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_GPU_COHERENCY_H_
-#define _KBASE_GPU_COHERENCY_H_
-
-#define COHERENCY_ACE_LITE 0
-#define COHERENCY_ACE 1
-#define COHERENCY_NONE 31
-#define COHERENCY_FEATURE_BIT(x) (1 << (x))
-
-#endif /* _KBASE_GPU_COHERENCY_H_ */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_id.h b/mali_kbase/gpu/mali_kbase_gpu_id.h
deleted file mode 100644
index 8d687c4..0000000
--- a/mali_kbase/gpu/mali_kbase_gpu_id.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_GPU_ID_H_
-#define _KBASE_GPU_ID_H_
-
-/* GPU_ID register */
-#define GPU_ID_VERSION_STATUS_SHIFT 0
-#define GPU_ID_VERSION_MINOR_SHIFT 4
-#define GPU_ID_VERSION_MAJOR_SHIFT 12
-#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16
-#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT)
-#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
-#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT)
-#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
-
-#define GPU_ID2_VERSION_STATUS_SHIFT 0
-#define GPU_ID2_VERSION_MINOR_SHIFT 4
-#define GPU_ID2_VERSION_MAJOR_SHIFT 12
-#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16
-#define GPU_ID2_ARCH_REV_SHIFT 20
-#define GPU_ID2_ARCH_MINOR_SHIFT 24
-#define GPU_ID2_ARCH_MAJOR_SHIFT 28
-#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
-#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
-#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
-#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
-#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT)
-#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
-#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
-#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
-#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \
- GPU_ID2_VERSION_MINOR | \
- GPU_ID2_VERSION_STATUS)
-
-/* Helper macro to create a partial GPU_ID (new format) that defines
- * a product ignoring its version.
- */
-#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
- ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
- (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \
- (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \
- (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
-
-/* Helper macro to create a partial GPU_ID (new format) that specifies the
- * revision (major, minor, status) of a product
- */
-#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
- ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \
- (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \
- (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
-
-/* Helper macro to create a complete GPU_ID (new format) */
-#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
- version_major, version_minor, version_status) \
- (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
- product_major) | \
- GPU_ID2_VERSION_MAKE(version_major, version_minor, \
- version_status))
-
-/* Helper macro to create a partial GPU_ID (new format) that identifies
- * a particular GPU model by its arch_major and product_major.
- */
-#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
- ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
- (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
-
-/* Strip off the non-relevant bits from a product_id value and make it suitable
- * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
- * model.
- */
-#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
- ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
- GPU_ID2_PRODUCT_MODEL)
-
-#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0)
-#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1)
-#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0)
-#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3)
-#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1)
-#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2)
-#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0)
-#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1)
-#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2)
-#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4)
-#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5)
-#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1)
-#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2)
-#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7)
-
-/* Helper macro to create a GPU_ID assuming valid values for id, major,
- * minor, status
- */
-#define GPU_ID_MAKE(id, major, minor, status) \
- ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
- (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \
- (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \
- (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
-
-#endif /* _KBASE_GPU_ID_H_ */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index b7a566f..05a229d 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,420 +22,12 @@
#ifndef _KBASE_GPU_REGMAP_H_
#define _KBASE_GPU_REGMAP_H_
-#include "mali_kbase_gpu_coherency.h"
-#include "mali_kbase_gpu_id.h"
-#if MALI_USE_CSF
-#include "backend/mali_kbase_gpu_regmap_csf.h"
-#else
-#include "backend/mali_kbase_gpu_regmap_jm.h"
-#endif
-
-/* Begin Register Offsets */
-/* GPU control registers */
-
-#define GPU_CONTROL_BASE 0x0000
-#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
-#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
-#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
-#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
-#define MEM_FEATURES 0x010 /* (RO) Memory system features */
-#define MMU_FEATURES 0x014 /* (RO) MMU features */
-#define AS_PRESENT 0x018 /* (RO) Address space slots present */
-#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
-#define GPU_IRQ_CLEAR 0x024 /* (WO) */
-#define GPU_IRQ_MASK 0x028 /* (RW) */
-#define GPU_IRQ_STATUS 0x02C /* (RO) */
-
-#define GPU_COMMAND 0x030 /* (WO) */
-#define GPU_STATUS 0x034 /* (RO) */
-
-#define GPU_DBGEN (1 << 8) /* DBGEN wire status */
-
-#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */
-#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */
-#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */
-
-#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */
-
-#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */
-#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core
- * supergroup are l2 coherent
- */
-
-#define PWR_KEY 0x050 /* (WO) Power manager key register */
-#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */
-#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */
-#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */
-#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */
-#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */
-#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */
-#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */
-#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */
-
-#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */
-#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
-#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */
-#define THREAD_FEATURES 0x0AC /* (RO) Thread features */
-#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */
-
-#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */
-#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */
-#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */
-#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */
-
-#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
-
-#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
-#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
-
-#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */
-#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */
-
-#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */
-#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
-
-#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
-#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
-
-#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
-#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
-
-#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
-#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
-
-#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
-#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
-
-#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
-#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
-
-#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
-#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
-
-#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
-#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
-
-#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
-#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
-
-#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
-#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
-
-#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */
-#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */
-
-#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */
-#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */
-
-#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */
-#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */
-
-#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */
-#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */
-
-#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */
-#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */
-
-#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */
-#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */
-
-#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */
-#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */
-
-#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */
-#define ASN_HASH(n) (ASN_HASH_0 + (n)*4)
-#define ASN_HASH_COUNT 3
-
-#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */
-#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */
-
-#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */
-#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */
-
-#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */
-#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */
-
-#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */
-#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */
-
-#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
-#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
-
-#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
-#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
-#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */
-
-/* Job control registers */
-
-#define JOB_CONTROL_BASE 0x1000
-
-#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
-
-#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
-#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
-#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
-#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
-
-/* MMU control registers */
-
-#define MEMORY_MANAGEMENT_BASE 0x2000
-#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r))
-
-#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */
-#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
-
-#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
-#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
-#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
-#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
-#define MMU_AS4 0x500 /* Configuration registers for address space 4 */
-#define MMU_AS5 0x540 /* Configuration registers for address space 5 */
-#define MMU_AS6 0x580 /* Configuration registers for address space 6 */
-#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */
-#define MMU_AS8 0x600 /* Configuration registers for address space 8 */
-#define MMU_AS9 0x640 /* Configuration registers for address space 9 */
-#define MMU_AS10 0x680 /* Configuration registers for address space 10 */
-#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */
-#define MMU_AS12 0x700 /* Configuration registers for address space 12 */
-#define MMU_AS13 0x740 /* Configuration registers for address space 13 */
-#define MMU_AS14 0x780 /* Configuration registers for address space 14 */
-#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
-
-/* MMU address space control registers */
-
-#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
-
-#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
-#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
-#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
-#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
-#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
-#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
-#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
-#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
-#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
-#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
-#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
-
-/* (RW) Translation table configuration for address space n, low word */
-#define AS_TRANSCFG_LO 0x30
-/* (RW) Translation table configuration for address space n, high word */
-#define AS_TRANSCFG_HI 0x34
-/* (RO) Secondary fault address for address space n, low word */
-#define AS_FAULTEXTRA_LO 0x38
-/* (RO) Secondary fault address for address space n, high word */
-#define AS_FAULTEXTRA_HI 0x3C
-
-/* End Register Offsets */
+#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h>
/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */
#ifdef CONFIG_MALI_DEBUG
+#undef GPU_IRQ_REG_ALL
#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE)
-#else /* CONFIG_MALI_DEBUG */
-#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
#endif /* CONFIG_MALI_DEBUG */
-/*
- * MMU_IRQ_RAWSTAT register values. Values are valid also for
- * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
- */
-
-#define MMU_PAGE_FAULT_FLAGS 16
-
-/* Macros returning a bitmask to retrieve page fault or bus error flags from
- * MMU registers
- */
-#define MMU_PAGE_FAULT(n) (1UL << (n))
-#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
-
-/*
- * Begin LPAE MMU TRANSTAB register values
- */
-#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000
-#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0)
-#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1)
-#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0)
-#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2)
-#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4)
-
-#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003
-
-/*
- * Begin AARCH64 MMU TRANSTAB register values
- */
-#define MMU_HW_OUTA_BITS 40
-#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
-
-/*
- * Begin MMU STATUS register values
- */
-#define AS_STATUS_AS_ACTIVE 0x01
-
-#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
-
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
- (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
-
-#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
-#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
-#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
- (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
-
-#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0)
-#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1)
-#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2)
-#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3)
-
-#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
-#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
-#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
- (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
-
-/*
- * Begin MMU TRANSCFG register values
- */
-#define AS_TRANSCFG_ADRMODE_LEGACY 0
-#define AS_TRANSCFG_ADRMODE_UNMAPPED 1
-#define AS_TRANSCFG_ADRMODE_IDENTITY 2
-#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6
-#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
-
-#define AS_TRANSCFG_ADRMODE_MASK 0xF
-
-/*
- * Begin TRANSCFG register values
- */
-#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
-
-#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
-#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
-#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
-#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
-
-/*
- * Begin Command Values
- */
-
-/* AS_COMMAND register commands */
-#define AS_COMMAND_NOP 0x00 /* NOP Operation */
-#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
-#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */
-#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */
-/* Flush all L2 caches then issue a flush region command to all MMUs
- * (deprecated - only for use with T60x)
- */
-#define AS_COMMAND_FLUSH 0x04
-/* Flush all L2 caches then issue a flush region command to all MMUs */
-#define AS_COMMAND_FLUSH_PT 0x04
-/* Wait for memory accesses to complete, flush all the L1s cache then flush all
- * L2 caches then issue a flush region command to all MMUs
- */
-#define AS_COMMAND_FLUSH_MEM 0x05
-
-/* GPU_STATUS values */
-#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
-#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */
-#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */
-
-/* PRFCNT_CONFIG register values */
-#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */
-#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */
-#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
-
-/* The performance counters are disabled. */
-#define PRFCNT_CONFIG_MODE_OFF 0
-/* The performance counters are enabled, but are only written out when a
- * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
- */
-#define PRFCNT_CONFIG_MODE_MANUAL 1
-/* The performance counters are enabled, and are written out each time a tile
- * finishes rendering.
- */
-#define PRFCNT_CONFIG_MODE_TILE 2
-
-/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
-/* Use GPU implementation-defined caching policy. */
-#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
-/* The attribute set to force all resources to be cached. */
-#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full
-/* Inner write-alloc cache setup, no outer caching */
-#define AS_MEMATTR_WRITE_ALLOC 0x8Dull
-
-/* Use GPU implementation-defined caching policy. */
-#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
-/* The attribute set to force all resources to be cached. */
-#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full
-/* Inner write-alloc cache setup, no outer caching */
-#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull
-/* Set to implementation defined, outer caching */
-#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull
-/* Set to write back memory, outer caching */
-#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull
-/* There is no LPAE support for non-cacheable, since the memory type is always
- * write-back.
- * Marking this setting as reserved for LPAE
- */
-#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
-
-/* L2_MMU_CONFIG register */
-#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23)
-#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
-
-/* End L2_MMU_CONFIG register */
-
-/* THREAD_* registers */
-
-/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
-#define IMPLEMENTATION_UNSPECIFIED 0
-#define IMPLEMENTATION_SILICON 1
-#define IMPLEMENTATION_FPGA 2
-#define IMPLEMENTATION_MODEL 3
-
-/* Default values when registers are not supported by the implemented hardware */
-#define THREAD_MT_DEFAULT 256
-#define THREAD_MWS_DEFAULT 256
-#define THREAD_MBS_DEFAULT 256
-#define THREAD_MR_DEFAULT 1024
-#define THREAD_MTQ_DEFAULT 4
-#define THREAD_MTGS_DEFAULT 10
-
-/* End THREAD_* registers */
-
-/* SHADER_CONFIG register */
-#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16)
-#define SC_TLS_HASH_ENABLE (1ul << 17)
-#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18)
-#define SC_VAR_ALGORITHM (1ul << 29)
-/* End SHADER_CONFIG register */
-
-/* TILER_CONFIG register */
-#define TC_CLOCK_GATE_OVERRIDE (1ul << 0)
-/* End TILER_CONFIG register */
-
-/* L2_CONFIG register */
-#define L2_CONFIG_SIZE_SHIFT 16
-#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT)
-#define L2_CONFIG_HASH_SHIFT 24
-#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT)
-#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24
-#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
-/* End L2_CONFIG register */
-
-/* IDVS_GROUP register */
-#define IDVS_GROUP_SIZE_SHIFT (16)
-#define IDVS_GROUP_MAX_SIZE (0x3F)
-
#endif /* _KBASE_GPU_REGMAP_H_ */
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index d7648cd..00c0f60 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -23,7 +23,9 @@
#include "mali_kbase_ipa_counter_common_jm.h"
#include "mali_kbase.h"
-
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
/* Performance counter blocks base offsets */
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
@@ -94,10 +96,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst
static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
u32 counter_block_offset)
{
+#ifdef CONFIG_MALI_NO_MALI
+ const u32 sc_base = MEMSYS_BASE +
+ (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+ KBASE_IPA_NR_BYTES_PER_BLOCK);
+#else
const u32 sc_base = MEMSYS_BASE +
(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
KBASE_IPA_NR_BYTES_PER_BLOCK);
-
+#endif
return sc_base + counter_block_offset;
}
diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h
deleted file mode 100644
index a72819e..0000000
--- a/mali_kbase/jm/mali_base_jm_kernel.h
+++ /dev/null
@@ -1,1191 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _BASE_JM_KERNEL_H_
-#define _BASE_JM_KERNEL_H_
-
-/* Memory allocation, access/hint flags.
- *
- * See base_mem_alloc_flags.
- */
-
-/* IN */
-/* Read access CPU side
- */
-#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
-
-/* Write access CPU side
- */
-#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
-
-/* Read access GPU side
- */
-#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
-
-/* Write access GPU side
- */
-#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
-
-/* Execute allowed on the GPU side
- */
-#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
-
-/* Will be permanently mapped in kernel space.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
-
-/* The allocation will completely reside within the same 4GB chunk in the GPU
- * virtual space.
- * Since this flag is primarily required only for the TLS memory which will
- * not be used to contain executable code and also not used for Tiler heap,
- * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
- */
-#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
-
-/* Userspace is not allowed to free this memory.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
-
-#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
-
-/* Grow backing store on GPU Page Fault
- */
-#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
-
-/* Page coherence Outer shareable, if available
- */
-#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
-
-/* Page coherence Inner shareable
- */
-#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
-
-/* IN/OUT */
-/* Should be cached on the CPU, returned if actually cached
- */
-#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
-
-/* IN/OUT */
-/* Must have same VA on both the GPU and the CPU
- */
-#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
-
-/* OUT */
-/* Must call mmap to acquire a GPU address for the allocation
- */
-#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
-
-/* IN */
-/* Page coherence Outer shareable, required.
- */
-#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
-
-/* Protected memory
- */
-#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
-
-/* Not needed physical memory
- */
-#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
-
-/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
- * addresses to be the same
- */
-#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
-
-/**
- * Bit 19 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
-
-/**
- * Memory starting from the end of the initial commit is aligned to 'extension'
- * pages, where 'extension' must be a power of 2 and no more than
- * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
- */
-#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
-
-/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
- * mode. Some components within the GPU might only be able to access memory
- * that is GPU cacheable. Refer to the specific GPU implementation for more
- * details. The 3 shareability flags will be ignored for GPU uncached memory.
- * If used while importing USER_BUFFER type memory, then the import will fail
- * if the memory is not aligned to GPU and CPU cache line width.
- */
-#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
-
-/*
- * Bits [22:25] for group_id (0~15).
- *
- * base_mem_group_id_set() should be used to pack a memory group ID into a
- * base_mem_alloc_flags value instead of accessing the bits directly.
- * base_mem_group_id_get() should be used to extract the memory group ID from
- * a base_mem_alloc_flags value.
- */
-#define BASEP_MEM_GROUP_ID_SHIFT 22
-#define BASE_MEM_GROUP_ID_MASK \
- ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
-
-/* Must do CPU cache maintenance when imported memory is mapped/unmapped
- * on GPU. Currently applicable to dma-buf type only.
- */
-#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
-
-/* Use the GPU VA chosen by the kernel client */
-#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
-
-/* OUT */
-/* Kernel side cache sync ops required */
-#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
-
-/* Force trimming of JIT allocations when creating a new allocation */
-#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
-
-/* Number of bits used as flags for base memory management
- *
- * Must be kept in sync with the base_mem_alloc_flags flags
- */
-#define BASE_MEM_FLAGS_NR_BITS 30
-
-/* A mask of all the flags which are only valid for allocations within kbase,
- * and may not be passed from user space.
- */
-#define BASEP_MEM_FLAGS_KERNEL_ONLY \
- (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
- BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM)
-
-/* A mask for all output bits, excluding IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
-
-/* A mask for all input bits, including IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_INPUT_MASK \
- (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
-
-/* A mask of all currently reserved flags
- */
-#define BASE_MEM_FLAGS_RESERVED \
- (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
-
-#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
-/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
-#define BASE_MEM_COOKIE_BASE (64ul << 12)
-#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
- BASE_MEM_COOKIE_BASE)
-
-/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
- * initial commit is aligned to 'extension' pages, where 'extension' must be a power
- * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
- */
-#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
-
-/**
- * If set, the heap info address points to a u32 holding the used size in bytes;
- * otherwise it points to a u64 holding the lowest address of unused memory.
- */
-#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
-
-/**
- * Valid set of just-in-time memory allocation flags
- *
- * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
- * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
- * and heap_info_gpu_addr being 0 will be rejected).
- */
-#define BASE_JIT_ALLOC_VALID_FLAGS \
- (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
-
-/**
- * typedef base_context_create_flags - Flags to pass to ::base_context_init.
- *
- * Flags can be ORed together to enable multiple things.
- *
- * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
- * not collide with them.
- */
-typedef u32 base_context_create_flags;
-
-/* No flags set */
-#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
-
-/* Base context is embedded in a cctx object (flag used for CINSTR
- * software counter macros)
- */
-#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
-
-/* Base context is a 'System Monitor' context for Hardware counters.
- *
- * One important side effect of this is that job submission is disabled.
- */
-#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
- ((base_context_create_flags)1 << 1)
-
-/* Bit-shift used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
-
-/* Bitmask used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
- ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
-
-/* Bitpattern describing the base_context_create_flags that can be
- * passed to the kernel
- */
-#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
- (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
- BASEP_CONTEXT_MMU_GROUP_ID_MASK)
-
-/* Bitpattern describing the ::base_context_create_flags that can be
- * passed to base_context_init()
- */
-#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
- (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
-
-/*
- * Private flags used on the base context
- *
- * These start at bit 31, and run down to zero.
- *
- * They share the same space as base_context_create_flags, and so must
- * not collide with them.
- */
-
-/* Private flag tracking whether job descriptor dumping is disabled */
-#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
- ((base_context_create_flags)(1 << 31))
-
-/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
- * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
- */
-#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
-
-/* Indicate that job dumping is enabled. This could affect certain timers
- * to account for the performance impact.
- */
-#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
-
-#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
- BASE_TLSTREAM_JOB_DUMPING_ENABLED)
-/*
- * Dependency stuff, keep it private for now. May want to expose it if
- * we decide to make the number of semaphores a configurable
- * option.
- */
-#define BASE_JD_ATOM_COUNT 256
-
-/* Maximum number of concurrent render passes.
- */
-#define BASE_JD_RP_COUNT (256)
-
-/* Set/reset values for a software event */
-#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
-#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
-
-/**
- * struct base_jd_udata - Per-job data
- *
- * This structure is used to store per-job data, and is completely unused
- * by the Base driver. It can be used to store things such as callback
- * function pointer, data to handle job completion. It is guaranteed to be
- * untouched by the Base driver.
- *
- * @blob: per-job data array
- */
-struct base_jd_udata {
- u64 blob[2];
-};
-
-/**
- * typedef base_jd_dep_type - Job dependency type.
- *
- * A flags field will be inserted into the atom structure to specify whether a
- * dependency is a data or ordering dependency (by putting it before/after
- * 'core_req' in the structure it should be possible to add without changing
- * the structure size).
- * When the flag is set for a particular dependency to signal that it is an
- * ordering only dependency then errors will not be propagated.
- */
-typedef u8 base_jd_dep_type;
-
-#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */
-#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */
-#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */
-
-/**
- * typedef base_jd_core_req - Job chain hardware requirements.
- *
- * A job chain must specify what GPU features it needs to allow the
- * driver to schedule the job correctly. By not specifying the
- * correct settings can/will cause an early job termination. Multiple
- * values can be ORed together to specify multiple requirements.
- * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
- * dependencies, and that doesn't execute anything on the hardware.
- */
-typedef u32 base_jd_core_req;
-
-/* Requirements that come from the HW */
-
-/* No requirement, dependency only
- */
-#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
-
-/* Requires fragment shaders
- */
-#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0)
-
-/* Requires compute shaders
- *
- * This covers any of the following GPU job types:
- * - Vertex Shader Job
- * - Geometry Shader Job
- * - An actual Compute Shader Job
- *
- * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
- * job is specifically just the "Compute Shader" job type, and not the "Vertex
- * Shader" nor the "Geometry Shader" job type.
- */
-#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
-
-/* Requires tiling */
-#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2)
-
-/* Requires cache flushes */
-#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3)
-
-/* Requires value writeback */
-#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4)
-
-/* SW-only requirements - the HW does not expose these as part of the job slot
- * capabilities
- */
-
-/* Requires fragment job with AFBC encoding */
-#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13)
-
-/* SW-only requirement: coalesce completion events.
- * If this bit is set then completion of this atom will not cause an event to
- * be sent to userspace, whether successful or not; completion events will be
- * deferred until an atom completes which does not have this bit set.
- *
- * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
- */
-#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
-
-/* SW Only requirement: the job chain requires a coherent core group. We don't
- * mind which coherent core group is used.
- */
-#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6)
-
-/* SW Only requirement: The performance counters should be enabled only when
- * they are needed, to reduce power consumption.
- */
-#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7)
-
-/* SW Only requirement: External resources are referenced by this atom.
- *
- * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
- * BASE_JD_REQ_SOFT_EVENT_WAIT.
- */
-#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8)
-
-/* SW Only requirement: Software defined job. Jobs with this bit set will not be
- * submitted to the hardware but will cause some action to happen within the
- * driver
- */
-#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9)
-
-#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1)
-#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
-#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
-
-/* 0x4 RESERVED for now */
-
-/* SW only requirement: event wait/trigger job.
- *
- * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
- * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
- * other waiting jobs. It completes immediately.
- * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
- * possible for other jobs to wait upon. It completes immediately.
- */
-#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5)
-#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6)
-#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7)
-
-#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8)
-
-/* SW only requirement: Just In Time allocation
- *
- * This job requests a single or multiple just-in-time allocations through a
- * list of base_jit_alloc_info structure which is passed via the jc element of
- * the atom. The number of base_jit_alloc_info structures present in the
- * list is passed via the nr_extres element of the atom
- *
- * It should be noted that the id entry in base_jit_alloc_info must not
- * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE.
- *
- * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE
- * soft job to free the JIT allocation is still made.
- *
- * The job will complete immediately.
- */
-#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9)
-
-/* SW only requirement: Just In Time free
- *
- * This job requests a single or multiple just-in-time allocations created by
- * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time
- * allocations is passed via the jc element of the atom.
- *
- * The job will complete immediately.
- */
-#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa)
-
-/* SW only requirement: Map external resource
- *
- * This job requests external resource(s) are mapped once the dependencies
- * of the job have been satisfied. The list of external resources are
- * passed via the jc element of the atom which is a pointer to a
- * base_external_resource_list.
- */
-#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb)
-
-/* SW only requirement: Unmap external resource
- *
- * This job requests external resource(s) are unmapped once the dependencies
- * of the job has been satisfied. The list of external resources are
- * passed via the jc element of the atom which is a pointer to a
- * base_external_resource_list.
- */
-#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc)
-
-/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
- *
- * This indicates that the Job Chain contains GPU jobs of the 'Compute
- * Shaders' type.
- *
- * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job
- * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
- */
-#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10)
-
-/* HW Requirement: Use the base_jd_atom::device_nr field to specify a
- * particular core group
- *
- * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag
- * takes priority
- *
- * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
- *
- * If the core availability policy is keeping the required core group turned
- * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
- */
-#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
-
-/* SW Flag: If this bit is set then the successful completion of this atom
- * will not cause an event to be sent to userspace
- */
-#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12)
-
-/* SW Flag: If this bit is set then completion of this atom will not cause an
- * event to be sent to userspace, whether successful or not.
- */
-#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
-
-/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
- *
- * If this bit is set then the GPU's cache will not be cleaned and invalidated
- * until a GPU job starts which does not have this bit set or a job completes
- * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use
- * if the CPU may have written to memory addressed by the job since the last job
- * without this bit set was submitted.
- */
-#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
-
-/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
- *
- * If this bit is set then the GPU's cache will not be cleaned and invalidated
- * until a GPU job completes which does not have this bit set or a job starts
- * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use
- * if the CPU may read from or partially overwrite memory addressed by the job
- * before the next job without this bit set completes.
- */
-#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
-
-/* Request the atom be executed on a specific job slot.
- *
- * When this flag is specified, it takes precedence over any existing job slot
- * selection logic.
- */
-#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
-
-/* SW-only requirement: The atom is the start of a renderpass.
- *
- * If this bit is set then the job chain will be soft-stopped if it causes the
- * GPU to write beyond the end of the physical pages backing the tiler heap, and
- * committing more memory to the heap would exceed an internal threshold. It may
- * be resumed after running one of the job chains attached to an atom with
- * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be
- * resumed multiple times until it completes without memory usage exceeding the
- * threshold.
- *
- * Usually used with BASE_JD_REQ_T.
- */
-#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18)
-
-/* SW-only requirement: The atom is the end of a renderpass.
- *
- * If this bit is set then the atom incorporates the CPU address of a
- * base_jd_fragment object instead of the GPU address of a job chain.
- *
- * Which job chain is run depends upon whether the atom with the same renderpass
- * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or
- * was soft-stopped when it exceeded an upper threshold for tiler heap memory
- * usage.
- *
- * It also depends upon whether one of the job chains attached to the atom has
- * already been run as part of the same renderpass (in which case it would have
- * written unresolved multisampled and otherwise-discarded output to temporary
- * buffers that need to be read back). The job chain for doing a forced read and
- * forced write (from/to temporary buffers) is run as many times as necessary.
- *
- * Usually used with BASE_JD_REQ_FS.
- */
-#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
-
-/* These requirement bits are currently unused in base_jd_core_req
- */
-#define BASEP_JD_REQ_RESERVED \
- (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
- BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
- BASE_JD_REQ_EVENT_COALESCE | \
- BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
- BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
- BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
- BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \
- BASE_JD_REQ_END_RENDERPASS))
-
-/* Mask of all bits in base_jd_core_req that control the type of the atom.
- *
- * This allows dependency only atoms to have flags set
- */
-#define BASE_JD_REQ_ATOM_TYPE \
- (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
- BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
-
-/**
- * Mask of all bits in base_jd_core_req that control the type of a soft job.
- */
-#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
-
-/* Returns non-zero value if core requirements passed define a soft job or
- * a dependency only job.
- */
-#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
- (((core_req) & BASE_JD_REQ_SOFT_JOB) || \
- ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
-
-/**
- * enum kbase_jd_atom_state
- *
- * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used.
- * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD.
- * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running).
- * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet
- * handed back to job dispatcher for
- * dependency resolution.
- * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed
- * back to userspace.
- */
-enum kbase_jd_atom_state {
- KBASE_JD_ATOM_STATE_UNUSED,
- KBASE_JD_ATOM_STATE_QUEUED,
- KBASE_JD_ATOM_STATE_IN_JS,
- KBASE_JD_ATOM_STATE_HW_COMPLETED,
- KBASE_JD_ATOM_STATE_COMPLETED
-};
-
-/**
- * typedef base_atom_id - Type big enough to store an atom number in.
- */
-typedef u8 base_atom_id;
-
-/**
- * struct base_dependency -
- *
- * @atom_id: An atom number
- * @dependency_type: Dependency type
- */
-struct base_dependency {
- base_atom_id atom_id;
- base_jd_dep_type dependency_type;
-};
-
-/**
- * struct base_jd_fragment - Set of GPU fragment job chains used for rendering.
- *
- * @norm_read_norm_write: Job chain for full rendering.
- * GPU address of a fragment job chain to render in the
- * circumstance where the tiler job chain did not exceed
- * its memory usage threshold and no fragment job chain
- * was previously run for the same renderpass.
- * It is used no more than once per renderpass.
- * @norm_read_forced_write: Job chain for starting incremental
- * rendering.
- * GPU address of a fragment job chain to render in
- * the circumstance where the tiler job chain exceeded
- * its memory usage threshold for the first time and
- * no fragment job chain was previously run for the
- * same renderpass.
- * Writes unresolved multisampled and normally-
- * discarded output to temporary buffers that must be
- * read back by a subsequent forced_read job chain
- * before the renderpass is complete.
- * It is used no more than once per renderpass.
- * @forced_read_forced_write: Job chain for continuing incremental
- * rendering.
- * GPU address of a fragment job chain to render in
- * the circumstance where the tiler job chain
- * exceeded its memory usage threshold again
- * and a fragment job chain was previously run for
- * the same renderpass.
- * Reads unresolved multisampled and
- * normally-discarded output from temporary buffers
- * written by a previous forced_write job chain and
- * writes the same to temporary buffers again.
- * It is used as many times as required until
- * rendering completes.
- * @forced_read_norm_write: Job chain for ending incremental rendering.
- * GPU address of a fragment job chain to render in the
- * circumstance where the tiler job chain did not
- * exceed its memory usage threshold this time and a
- * fragment job chain was previously run for the same
- * renderpass.
- * Reads unresolved multisampled and normally-discarded
- * output from temporary buffers written by a previous
- * forced_write job chain in order to complete a
- * renderpass.
- * It is used no more than once per renderpass.
- *
- * This structure is referenced by the main atom structure if
- * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
- */
-struct base_jd_fragment {
- u64 norm_read_norm_write;
- u64 norm_read_forced_write;
- u64 forced_read_forced_write;
- u64 forced_read_norm_write;
-};
-
-/**
- * typedef base_jd_prio - Base Atom priority.
- *
- * Only certain priority levels are actually implemented, as specified by the
- * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
- * level that is not one of those defined below.
- *
- * Priority levels only affect scheduling after the atoms have had dependencies
- * resolved. For example, a low priority atom that has had its dependencies
- * resolved might run before a higher priority atom that has not had its
- * dependencies resolved.
- *
- * In general, fragment atoms do not affect non-fragment atoms with
- * lower priorities, and vice versa. One exception is that there is only one
- * priority value for each context. So a high-priority (e.g.) fragment atom
- * could increase its context priority, causing its non-fragment atoms to also
- * be scheduled sooner.
- *
- * The atoms are scheduled as follows with respect to their priorities:
- * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies
- * resolved, and atom 'X' has a higher priority than atom 'Y'
- * * If atom 'Y' is currently running on the HW, then it is interrupted to
- * allow atom 'X' to run soon after
- * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing
- * the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
- * * Any two atoms that have the same priority could run in any order with
- * respect to each other. That is, there is no ordering constraint between
- * atoms of the same priority.
- *
- * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
- * scheduled between contexts. The default value, 0, will cause higher-priority
- * atoms to be scheduled first, regardless of their context. The value 1 will
- * use a round-robin algorithm when deciding which context's atoms to schedule
- * next, so higher-priority atoms can only preempt lower priority atoms within
- * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
- * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
- */
-typedef u8 base_jd_prio;
-
-/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
-#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
-/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
- * BASE_JD_PRIO_LOW
- */
-#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
-/* Low atom priority. */
-#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
-/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH,
- * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW
- */
-#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3)
-
-/* Count of the number of priority levels. This itself is not a valid
- * base_jd_prio setting
- */
-#define BASE_JD_NR_PRIO_LEVELS 4
-
-/**
- * struct base_jd_atom_v2 - Node of a dependency graph used to submit a
- * GPU job chain or soft-job to the kernel driver.
- *
- * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
- * is set in the base_jd_core_req) the CPU address of a
- * base_jd_fragment object.
- * @udata: User data.
- * @extres_list: List of external resources.
- * @nr_extres: Number of external resources or JIT allocations.
- * @jit_id: Zero-terminated array of IDs of just-in-time memory
- * allocations written to by the atom. When the atom
- * completes, the value stored at the
- * &struct_base_jit_alloc_info.heap_info_gpu_addr of
- * each allocation is read in order to enforce an
- * overall physical memory usage limit.
- * @pre_dep: Pre-dependencies. One need to use SETTER function to assign
- * this field; this is done in order to reduce possibility of
- * improper assignment of a dependency field.
- * @atom_number: Unique number to identify the atom.
- * @prio: Atom priority. Refer to base_jd_prio for more details.
- * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
- * specified.
- * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
- * @core_req: Core requirements.
- * @renderpass_id: Renderpass identifier used to associate an atom that has
- * BASE_JD_REQ_START_RENDERPASS set in its core requirements
- * with an atom that has BASE_JD_REQ_END_RENDERPASS set.
- * @padding: Unused. Must be zero.
- *
- * This structure has changed since UK 10.2 for which base_jd_core_req was a
- * u16 value.
- *
- * In UK 10.3 a core_req field of a u32 type was added to the end of the
- * structure, and the place in the structure previously occupied by u16
- * core_req was kept but renamed to compat_core_req.
- *
- * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2].
- * Compatibility with UK 10.x from UK 11.y is not handled because
- * the major version increase prevents this.
- *
- * For UK 11.20 jit_id[2] must be initialized to zero.
- */
-struct base_jd_atom_v2 {
- u64 jc;
- struct base_jd_udata udata;
- u64 extres_list;
- u16 nr_extres;
- u8 jit_id[2];
- struct base_dependency pre_dep[2];
- base_atom_id atom_number;
- base_jd_prio prio;
- u8 device_nr;
- u8 jobslot;
- base_jd_core_req core_req;
- u8 renderpass_id;
- u8 padding[7];
-};
-
-/**
- * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
- * at the beginning.
- *
- * @seq_nr: Sequence number of logical grouping of atoms.
- * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
- * is set in the base_jd_core_req) the CPU address of a
- * base_jd_fragment object.
- * @udata: User data.
- * @extres_list: List of external resources.
- * @nr_extres: Number of external resources or JIT allocations.
- * @jit_id: Zero-terminated array of IDs of just-in-time memory
- * allocations written to by the atom. When the atom
- * completes, the value stored at the
- * &struct_base_jit_alloc_info.heap_info_gpu_addr of
- * each allocation is read in order to enforce an
- * overall physical memory usage limit.
- * @pre_dep: Pre-dependencies. One need to use SETTER function to assign
- * this field; this is done in order to reduce possibility of
- * improper assignment of a dependency field.
- * @atom_number: Unique number to identify the atom.
- * @prio: Atom priority. Refer to base_jd_prio for more details.
- * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
- * specified.
- * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
- * @core_req: Core requirements.
- * @renderpass_id: Renderpass identifier used to associate an atom that has
- * BASE_JD_REQ_START_RENDERPASS set in its core requirements
- * with an atom that has BASE_JD_REQ_END_RENDERPASS set.
- * @padding: Unused. Must be zero.
- */
-typedef struct base_jd_atom {
- u64 seq_nr;
- u64 jc;
- struct base_jd_udata udata;
- u64 extres_list;
- u16 nr_extres;
- u8 jit_id[2];
- struct base_dependency pre_dep[2];
- base_atom_id atom_number;
- base_jd_prio prio;
- u8 device_nr;
- u8 jobslot;
- base_jd_core_req core_req;
- u8 renderpass_id;
- u8 padding[7];
-} base_jd_atom;
-
-/* Job chain event code bits
- * Defines the bits used to create ::base_jd_event_code
- */
-enum {
- BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */
- BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */
- /* Event indicates success (SW events only) */
- BASE_JD_SW_EVENT_SUCCESS = (1u << 13),
- BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */
- BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */
- BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */
- BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */
- /* Mask to extract the type from an event code */
- BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)
-};
-
-/**
- * enum base_jd_event_code - Job chain event codes
- *
- * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status
- * codes.
- * Obscurely, BASE_JD_EVENT_TERMINATED
- * indicates a real fault, because the
- * job was hard-stopped.
- * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as
- * 'previous job done'.
- * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes
- * TERMINATED, DONE or JOB_CANCELLED.
- * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job
- * was hard stopped.
- * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on
- * complete/fail/cancel.
- * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes.
- * Obscurely, BASE_JD_EVENT_TERMINATED
- * indicates a real fault,
- * because the job was hard-stopped.
- * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and
- * software error status codes.
- * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and
- * software error status codes.
- * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status
- * codes.
- * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes.
- * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes.
- * Such codes are never returned to
- * user-space.
- * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
- * @BASE_JD_EVENT_DONE: atom has completed successfull
- * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which
- * shall result in a failed atom
- * @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the
- * part of the memory system required to access
- * job descriptors was not powered on
- * @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job
- * manager failed
- * @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job
- * manager failed
- * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the
- * specified affinity mask does not intersect
- * any available cores
- * @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job
- * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program
- * counter was executed.
- * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal
- * encoding was executed.
- * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where
- * the instruction encoding did not match the
- * instruction type encoded in the program
- * counter.
- * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that
- * contained invalid combinations of operands.
- * @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried
- * to access the thread local storage section
- * of another thread.
- * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that
- * tried to do an unsupported unaligned memory
- * access.
- * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that
- * failed to complete an instruction barrier.
- * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job
- * contains invalid combinations of data.
- * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to
- * process a tile that is entirely outside the
- * bounding box of the frame.
- * @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address
- * has been found that exceeds the virtual
- * address range.
- * @BASE_JD_EVENT_OUT_OF_MEMORY: The tiler ran out of memory when executing a job.
- * @BASE_JD_EVENT_UNKNOWN: If multiple jobs in a job chain fail, only
- * the first one the reports an error will set
- * and return full error information.
- * Subsequent failing jobs will not update the
- * error status registers, and may write an
- * error status of UNKNOWN.
- * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to
- * physical memory where the original virtual
- * address is no longer available.
- * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache
- * has detected that the same line has been
- * accessed as both shareable and non-shareable
- * memory from inside the GPU.
- * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table
- * entry at level 1 of the translation table.
- * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table
- * entry at level 2 of the translation table.
- * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table
- * entry at level 3 of the translation table.
- * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table
- * entry at level 4 of the translation table.
- * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to
- * the permission flags set in translation
- * table
- * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading
- * level 0 of the translation tables.
- * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading
- * level 1 of the translation tables.
- * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading
- * level 2 of the translation tables.
- * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading
- * level 3 of the translation tables.
- * @BASE_JD_EVENT_ACCESS_FLAG: Matches ACCESS_FLAG_0. A memory access hit a
- * translation table entry with the ACCESS_FLAG
- * bit set to zero in level 0 of the
- * page table, and the DISABLE_AF_FAULT flag
- * was not set.
- * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to
- * grow memory on demand
- * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its
- * dependencies failed
- * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data
- * in the atom which overlaps with
- * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the
- * platform doesn't support the feature specified in
- * the atom.
- * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used
- * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used
- * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used
- * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used
- * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used
- * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate
- * to userspace that the KBase context has been
- * destroyed and Base should stop listening for
- * further events
- * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in
- * the GPU has to be retried (but it has not
- * started) due to e.g., GPU reset
- * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal
- * the completion of a renderpass. This value
- * shouldn't be returned to userspace but I haven't
- * seen where it is reset back to JD_EVENT_DONE.
- *
- * HW and low-level SW events are represented by event codes.
- * The status of jobs which succeeded are also represented by
- * an event code (see @BASE_JD_EVENT_DONE).
- * Events are usually reported as part of a &struct base_jd_event.
- *
- * The event codes are encoded in the following way:
- * * 10:0 - subtype
- * * 12:11 - type
- * * 13 - SW success (only valid if the SW bit is set)
- * * 14 - SW event (HW event if not set)
- * * 15 - Kernel event (should never be seen in userspace)
- *
- * Events are split up into ranges as follows:
- * * BASE_JD_EVENT_RANGE_<description>_START
- * * BASE_JD_EVENT_RANGE_<description>_END
- *
- * code is in <description>'s range when:
- * BASE_JD_EVENT_RANGE_<description>_START <= code <
- * BASE_JD_EVENT_RANGE_<description>_END
- *
- * Ranges can be asserted for adjacency by testing that the END of the previous
- * is equal to the START of the next. This is useful for optimizing some tests
- * for range.
- *
- * A limitation is that the last member of this enum must explicitly be handled
- * (with an assert-unreachable statement) in switch statements that use
- * variables of this type. Otherwise, the compiler warns that we have not
- * handled that enum value.
- */
-enum base_jd_event_code {
- /* HW defined exceptions */
- BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
-
- /* non-fatal exceptions */
- BASE_JD_EVENT_NOT_STARTED = 0x00,
- BASE_JD_EVENT_DONE = 0x01,
- BASE_JD_EVENT_STOPPED = 0x03,
- BASE_JD_EVENT_TERMINATED = 0x04,
- BASE_JD_EVENT_ACTIVE = 0x08,
-
- BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
- BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
-
- /* job exceptions */
- BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
- BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
- BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
- BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
- BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
- BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
- BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
- BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
- BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
- BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
- BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
- BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
- BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
- BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
- BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
- BASE_JD_EVENT_STATE_FAULT = 0x5A,
- BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
- BASE_JD_EVENT_UNKNOWN = 0x7F,
-
- /* GPU exceptions */
- BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
- BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
-
- /* MMU exceptions */
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
- BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
- BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
- BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
- BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
-
- /* SW defined exceptions */
- BASE_JD_EVENT_MEM_GROWTH_FAILED =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_TIMED_OUT =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
- BASE_JD_EVENT_JOB_CANCELLED =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
- BASE_JD_EVENT_JOB_INVALID =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
- BASE_JD_EVENT_PM_EVENT =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
-
- BASE_JD_EVENT_BAG_INVALID =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
-
- BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_RESERVED | 0x3FF,
-
- BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_SUCCESS | 0x000,
-
- BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS |
- BASE_JD_SW_EVENT_BAG | 0x000,
- BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
-
- BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
-
- BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_KERNEL | 0x000,
- BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001,
-
- BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
-};
-
-/**
- * struct base_jd_event_v2 - Event reporting structure
- *
- * @event_code: event code.
- * @atom_number: the atom number that has completed.
- * @udata: user data.
- *
- * This structure is used by the kernel driver to report information
- * about GPU events. They can either be HW-specific events or low-level
- * SW events, such as job-chain completion.
- *
- * The event code contains an event type field which can be extracted
- * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK.
- */
-struct base_jd_event_v2 {
- enum base_jd_event_code event_code;
- base_atom_id atom_number;
- struct base_jd_udata udata;
-};
-
-/**
- * struct base_dump_cpu_gpu_counters - Structure for
- * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS
- * jobs.
- * @system_time: gpu timestamp
- * @cycle_counter: gpu cycle count
- * @sec: cpu time(sec)
- * @usec: cpu time(usec)
- * @padding: padding
- *
- * This structure is stored into the memory pointed to by the @jc field
- * of &struct base_jd_atom.
- *
- * It must not occupy the same CPU cache line(s) as any neighboring data.
- * This is to avoid cases where access to pages containing the structure
- * is shared between cached and un-cached memory regions, which would
- * cause memory corruption.
- */
-
-struct base_dump_cpu_gpu_counters {
- u64 system_time;
- u64 cycle_counter;
- u64 sec;
- u32 usec;
- u8 padding[36];
-};
-
-#endif /* _BASE_JM_KERNEL_H_ */
diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h
deleted file mode 100644
index 93c9c44..0000000
--- a/mali_kbase/jm/mali_kbase_jm_ioctl.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_JM_IOCTL_H_
-#define _KBASE_JM_IOCTL_H_
-
-#include <asm-generic/ioctl.h>
-#include <linux/types.h>
-
-/*
- * 11.1:
- * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
- * 11.2:
- * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
- * which some user-side clients prior to 11.2 might fault if they received
- * them
- * 11.3:
- * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
- * KBASE_IOCTL_STICKY_RESOURCE_UNMAP
- * 11.4:
- * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
- * 11.5:
- * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
- * 11.6:
- * - Added flags field to base_jit_alloc_info structure, which can be used to
- * specify pseudo chunked tiler alignment for JIT allocations.
- * 11.7:
- * - Removed UMP support
- * 11.8:
- * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
- * 11.9:
- * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
- * under base_mem_alloc_flags
- * 11.10:
- * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
- * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
- * with one softjob.
- * 11.11:
- * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
- * 11.12:
- * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
- * 11.13:
- * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
- * 11.14:
- * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
- * under base_mem_alloc_flags
- * 11.15:
- * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
- * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
- * passed to mmap().
- * 11.16:
- * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
- * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
- * dma-buf. Now, buffers are mapped on GPU when first imported, no longer
- * requiring external resource or sticky resource tracking. UNLESS,
- * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
- * 11.17:
- * - Added BASE_JD_REQ_JOB_SLOT.
- * - Reused padding field in base_jd_atom_v2 to pass job slot number.
- * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
- * 11.18:
- * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
- * 11.19:
- * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified.
- * 11.20:
- * - Added new phys_pages member to kbase_ioctl_mem_jit_init for
- * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2
- * (replacing '_OLD') and _11_5 suffixes
- * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in
- * base_jd_atom_v2. It must currently be initialized to zero.
- * - Added heap_info_gpu_addr to base_jit_alloc_info, and
- * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's
- * flags member. Previous variants of this structure are kept and given _10_2
- * and _11_5 suffixes.
- * - The above changes are checked for safe values in usual builds
- * 11.21:
- * - v2.0 of mali_trace debugfs file, which now versions the file separately
- * 11.22:
- * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
- * KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
- * 11.23:
- * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
- * the physical memory backing of JIT allocations. This was not supposed
- * to be a valid use case, but it was allowed by the previous implementation.
- * 11.24:
- * - Added a sysfs file 'serialize_jobs' inside a new sub-directory
- * 'scheduling'.
- * 11.25:
- * - Enabled JIT pressure limit in base/kbase by default
- * 11.26
- * - Added kinstr_jm API
- * 11.27
- * - Backwards compatible extension to HWC ioctl.
- * 11.28:
- * - Added kernel side cache ops needed hint
- * 11.29:
- * - Reserve ioctl 52
- * 11.30:
- * - Add a new priority level BASE_JD_PRIO_REALTIME
- * - Add ioctl 54: This controls the priority setting.
- */
-#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 30
-
-/**
- * struct kbase_ioctl_version_check - Check version compatibility between
- * kernel and userspace
- *
- * @major: Major version number
- * @minor: Minor version number
- */
-struct kbase_ioctl_version_check {
- __u16 major;
- __u16 minor;
-};
-
-#define KBASE_IOCTL_VERSION_CHECK \
- _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
-
-
-/**
- * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
- *
- * @addr: Memory address of an array of struct base_jd_atom_v2 or v3
- * @nr_atoms: Number of entries in the array
- * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
- */
-struct kbase_ioctl_job_submit {
- __u64 addr;
- __u32 nr_atoms;
- __u32 stride;
-};
-
-#define KBASE_IOCTL_JOB_SUBMIT \
- _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
-
-#define KBASE_IOCTL_POST_TERM \
- _IO(KBASE_IOCTL_TYPE, 4)
-
-/**
- * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
- * @event: GPU address of the event which has been updated
- * @new_status: The new status to set
- * @flags: Flags for future expansion
- */
-struct kbase_ioctl_soft_event_update {
- __u64 event;
- __u32 new_status;
- __u32 flags;
-};
-
-#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
- _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
-
-/**
- * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
- * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
- * kernel
- *
- * @size: The size of the `struct kbase_kinstr_jm_atom_state_change`
- * @version: Represents a breaking change in the
- * `struct kbase_kinstr_jm_atom_state_change`
- * @padding: Explicit padding to get the structure up to 64bits. See
- * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
- *
- * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
- * end of the structure that older user space might not understand. If the
- * `version` is the same, the structure is still compatible with newer kernels.
- * The `size` can be used to cast the opaque memory returned from the kernel.
- */
-struct kbase_kinstr_jm_fd_out {
- __u16 size;
- __u8 version;
- __u8 padding[5];
-};
-
-/**
- * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
- *
- * @count: Number of atom states that can be stored in the kernel circular
- * buffer. Must be a power of two
- * @padding: Explicit padding to get the structure up to 64bits. See
- * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
- */
-struct kbase_kinstr_jm_fd_in {
- __u16 count;
- __u8 padding[6];
-};
-
-union kbase_kinstr_jm_fd {
- struct kbase_kinstr_jm_fd_in in;
- struct kbase_kinstr_jm_fd_out out;
-};
-
-#define KBASE_IOCTL_KINSTR_JM_FD \
- _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
-
-
-#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
- _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
-
-#endif /* _KBASE_JM_IOCTL_H_ */
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
index 06adb36..e327536 100644
--- a/mali_kbase/jm/mali_kbase_jm_js.h
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -657,7 +657,7 @@ static inline bool kbasep_js_is_submit_allowed(
test_bit = (u16) (1u << kctx->as_nr);
is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
- dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)",
+ dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)",
is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr);
return is_allowed;
}
@@ -684,7 +684,7 @@ static inline void kbasep_js_set_submit_allowed(
set_bit = (u16) (1u << kctx->as_nr);
- dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)",
kctx, kctx->as_nr);
js_devdata->runpool_irq.submit_allowed |= set_bit;
@@ -715,7 +715,7 @@ static inline void kbasep_js_clear_submit_allowed(
clear_bit = (u16) (1u << kctx->as_nr);
clear_mask = ~clear_bit;
- dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)",
kctx, kctx->as_nr);
js_devdata->runpool_irq.submit_allowed &= clear_mask;
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 997cd49..183f0b0 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -171,7 +171,8 @@ enum {
* Internal atom priority defines for kbase_jd_atom::sched_prio
*/
enum {
- KBASE_JS_ATOM_SCHED_PRIO_REALTIME = 0,
+ KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0,
+ KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST,
KBASE_JS_ATOM_SCHED_PRIO_HIGH,
KBASE_JS_ATOM_SCHED_PRIO_MED,
KBASE_JS_ATOM_SCHED_PRIO_LOW,
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index d6f31cf..bdc769f 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -49,7 +49,6 @@ enum base_hw_feature {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -139,7 +136,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -166,7 +162,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
@@ -193,7 +188,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
@@ -222,7 +216,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
@@ -250,7 +243,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
@@ -278,7 +270,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
@@ -306,7 +297,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -335,7 +325,6 @@ static const enum base_hw_feature base_hw_features_tBAx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -364,7 +353,6 @@ static const enum base_hw_feature base_hw_features_tDUx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -393,7 +381,6 @@ static const enum base_hw_feature base_hw_features_tODx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
- BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 0afabb1..a61eeb2 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h
deleted file mode 100644
index 5c173eb..0000000
--- a/mali_kbase/mali_base_kernel.h
+++ /dev/null
@@ -1,812 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * Base structures shared with the kernel.
- */
-
-#ifndef _BASE_KERNEL_H_
-#define _BASE_KERNEL_H_
-
-struct base_mem_handle {
- struct {
- u64 handle;
- } basep;
-};
-
-#include "mali_base_mem_priv.h"
-#include "gpu/mali_kbase_gpu_coherency.h"
-#include "gpu/mali_kbase_gpu_id.h"
-
-#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
-
-#define BASE_MAX_COHERENT_GROUPS 16
-
-#if defined CDBG_ASSERT
-#define LOCAL_ASSERT CDBG_ASSERT
-#elif defined KBASE_DEBUG_ASSERT
-#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
-#else
-#error assert macro not defined!
-#endif
-
-#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
-#define LOCAL_PAGE_SHIFT PAGE_SHIFT
-#define LOCAL_PAGE_LSB ~PAGE_MASK
-#else
-#include <osu/mali_osu.h>
-
-#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
-#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
-#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
-#else
-#error Failed to find page size
-#endif
-#endif
-
-/* Physical memory group ID for normal usage.
- */
-#define BASE_MEM_GROUP_DEFAULT (0)
-
-/* Number of physical memory groups.
- */
-#define BASE_MEM_GROUP_COUNT (16)
-
-/**
- * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
- *
- * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
- * in order to determine the best cache policy. Some combinations are
- * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
- * which defines a write-only region on the CPU side, which is
- * heavily read by the CPU...
- * Other flags are only meaningful to a particular allocator.
- * More flags can be added to this list, as long as they don't clash
- * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
- */
-typedef u32 base_mem_alloc_flags;
-
-/* A mask for all the flags which are modifiable via the base_mem_set_flags
- * interface.
- */
-#define BASE_MEM_FLAGS_MODIFIABLE \
- (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
- BASE_MEM_COHERENT_LOCAL)
-
-/* A mask of all the flags that can be returned via the base_mem_get_flags()
- * interface.
- */
-#define BASE_MEM_FLAGS_QUERYABLE \
- (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
- BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
- BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
- BASEP_MEM_FLAGS_KERNEL_ONLY))
-
-/**
- * enum base_mem_import_type - Memory types supported by @a base_mem_import
- *
- * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
- * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
- * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
- * base_mem_import_user_buffer
- *
- * Each type defines what the supported handle type is.
- *
- * If any new type is added here ARM must be contacted
- * to allocate a numeric value for it.
- * Do not just add a new type without synchronizing with ARM
- * as future releases from ARM might include other new types
- * which could clash with your custom types.
- */
-enum base_mem_import_type {
- BASE_MEM_IMPORT_TYPE_INVALID = 0,
- /*
- * Import type with value 1 is deprecated.
- */
- BASE_MEM_IMPORT_TYPE_UMM = 2,
- BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
-};
-
-/**
- * struct base_mem_import_user_buffer - Handle of an imported user buffer
- *
- * @ptr: address of imported user buffer
- * @length: length of imported user buffer in bytes
- *
- * This structure is used to represent a handle of an imported user buffer.
- */
-
-struct base_mem_import_user_buffer {
- u64 ptr;
- u64 length;
-};
-
-/* Mask to detect 4GB boundary alignment */
-#define BASE_MEM_MASK_4GB 0xfffff000UL
-/* Mask to detect 4GB boundary (in page units) alignment */
-#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
-
-/* Limit on the 'extension' parameter for an allocation with the
- * BASE_MEM_TILER_ALIGN_TOP flag set
- *
- * This is the same as the maximum limit for a Buffer Descriptor's chunk size
- */
-#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \
- (21u - (LOCAL_PAGE_SHIFT))
-#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \
- (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
-
-/* Bit mask of cookies used for for memory allocation setup */
-#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */
-
-/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
-#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
-
-/*
- * struct base_fence - Cross-device synchronisation fence.
- *
- * A fence is used to signal when the GPU has finished accessing a resource that
- * may be shared with other devices, and also to delay work done asynchronously
- * by the GPU until other devices have finished accessing a shared resource.
- */
-struct base_fence {
- struct {
- int fd;
- int stream_fd;
- } basep;
-};
-
-/**
- * struct base_mem_aliasing_info - Memory aliasing info
- *
- * Describes a memory handle to be aliased.
- * A subset of the handle can be chosen for aliasing, given an offset and a
- * length.
- * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
- * region where a special page is mapped with a write-alloc cache setup,
- * typically used when the write result of the GPU isn't needed, but the GPU
- * must write anyway.
- *
- * Offset and length are specified in pages.
- * Offset must be within the size of the handle.
- * Offset+length must not overrun the size of the handle.
- *
- * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
- * @offset: Offset within the handle to start aliasing from, in pages.
- * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
- * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
- * specifies the number of times the special page is needed.
- */
-struct base_mem_aliasing_info {
- struct base_mem_handle handle;
- u64 offset;
- u64 length;
-};
-
-/* Maximum percentage of just-in-time memory allocation trimming to perform
- * on free.
- */
-#define BASE_JIT_MAX_TRIM_LEVEL (100)
-
-/* Maximum number of concurrent just-in-time memory allocations.
- */
-#define BASE_JIT_ALLOC_COUNT (255)
-
-/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5
- *
- * jit_version is 1
- *
- * Due to the lack of padding specified, user clients between 32 and 64-bit
- * may have assumed a different size of the struct
- *
- * An array of structures was not supported
- */
-struct base_jit_alloc_info_10_2 {
- u64 gpu_alloc_addr;
- u64 va_pages;
- u64 commit_pages;
- u64 extension;
- u8 id;
-};
-
-/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up
- * to 11.19
- *
- * This structure had a number of modifications during and after kernel driver
- * version 11.5, but remains size-compatible throughout its version history, and
- * with earlier variants compatible with future variants by requiring
- * zero-initialization to the unused space in the structure.
- *
- * jit_version is 2
- *
- * Kernel driver version history:
- * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes
- * must be zero. Kbase minor version was not incremented, so some
- * versions of 11.5 do not have this change.
- * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase
- * minor version not incremented)
- * 11.6: Added 'flags', replacing 1 padding byte
- * 11.10: Arrays of this structure are supported
- */
-struct base_jit_alloc_info_11_5 {
- u64 gpu_alloc_addr;
- u64 va_pages;
- u64 commit_pages;
- u64 extension;
- u8 id;
- u8 bin_id;
- u8 max_allocations;
- u8 flags;
- u8 padding[2];
- u16 usage_id;
-};
-
-/**
- * struct base_jit_alloc_info - Structure which describes a JIT allocation
- * request.
- * @gpu_alloc_addr: The GPU virtual address to write the JIT
- * allocated GPU virtual address to.
- * @va_pages: The minimum number of virtual pages required.
- * @commit_pages: The minimum number of physical pages which
- * should back the allocation.
- * @extension: Granularity of physical pages to grow the
- * allocation by during a fault.
- * @id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests.
- * Zero is not a valid value.
- * @bin_id: The JIT allocation bin, used in conjunction with
- * @max_allocations to limit the number of each
- * type of JIT allocation.
- * @max_allocations: The maximum number of allocations allowed within
- * the bin specified by @bin_id. Should be the same
- * for all allocations within the same bin.
- * @flags: flags specifying the special requirements for
- * the JIT allocation, see
- * %BASE_JIT_ALLOC_VALID_FLAGS
- * @padding: Expansion space - should be initialised to zero
- * @usage_id: A hint about which allocation should be reused.
- * The kernel should attempt to use a previous
- * allocation with the same usage_id
- * @heap_info_gpu_addr: Pointer to an object in GPU memory describing
- * the actual usage of the region.
- *
- * jit_version is 3.
- *
- * When modifications are made to this structure, it is still compatible with
- * jit_version 3 when: a) the size is unchanged, and b) new members only
- * replace the padding bytes.
- *
- * Previous jit_version history:
- * jit_version == 1, refer to &base_jit_alloc_info_10_2
- * jit_version == 2, refer to &base_jit_alloc_info_11_5
- *
- * Kbase version history:
- * 11.20: added @heap_info_gpu_addr
- */
-struct base_jit_alloc_info {
- u64 gpu_alloc_addr;
- u64 va_pages;
- u64 commit_pages;
- u64 extension;
- u8 id;
- u8 bin_id;
- u8 max_allocations;
- u8 flags;
- u8 padding[2];
- u16 usage_id;
- u64 heap_info_gpu_addr;
-};
-
-enum base_external_resource_access {
- BASE_EXT_RES_ACCESS_SHARED,
- BASE_EXT_RES_ACCESS_EXCLUSIVE
-};
-
-struct base_external_resource {
- u64 ext_resource;
-};
-
-
-/**
- * The maximum number of external resources which can be mapped/unmapped
- * in a single request.
- */
-#define BASE_EXT_RES_COUNT_MAX 10
-
-/**
- * struct base_external_resource_list - Structure which describes a list of
- * external resources.
- * @count: The number of resources.
- * @ext_res: Array of external resources which is
- * sized at allocation time.
- */
-struct base_external_resource_list {
- u64 count;
- struct base_external_resource ext_res[1];
-};
-
-struct base_jd_debug_copy_buffer {
- u64 address;
- u64 size;
- struct base_external_resource extres;
-};
-
-#define GPU_MAX_JOB_SLOTS 16
-
-/**
- * User-side Base GPU Property Queries
- *
- * The User-side Base GPU Property Query interface encapsulates two
- * sub-modules:
- *
- * - "Dynamic GPU Properties"
- * - "Base Platform Config GPU Properties"
- *
- * Base only deals with properties that vary between different GPU
- * implementations - the Dynamic GPU properties and the Platform Config
- * properties.
- *
- * For properties that are constant for the GPU Architecture, refer to the
- * GPU module. However, we will discuss their relevance here just to
- * provide background information.
- *
- * About the GPU Properties in Base and GPU modules
- *
- * The compile-time properties (Platform Config, GPU Compile-time
- * properties) are exposed as pre-processor macros.
- *
- * Complementing the compile-time properties are the Dynamic GPU
- * Properties, which act as a conduit for the GPU Configuration
- * Discovery.
- *
- * In general, the dynamic properties are present to verify that the platform
- * has been configured correctly with the right set of Platform Config
- * Compile-time Properties.
- *
- * As a consistent guide across the entire DDK, the choice for dynamic or
- * compile-time should consider the following, in order:
- * 1. Can the code be written so that it doesn't need to know the
- * implementation limits at all?
- * 2. If you need the limits, get the information from the Dynamic Property
- * lookup. This should be done once as you fetch the context, and then cached
- * as part of the context data structure, so it's cheap to access.
- * 3. If there's a clear and arguable inefficiency in using Dynamic Properties,
- * then use a Compile-Time Property (Platform Config, or GPU Compile-time
- * property). Examples of where this might be sensible follow:
- * - Part of a critical inner-loop
- * - Frequent re-use throughout the driver, causing significant extra load
- * instructions or control flow that would be worthwhile optimizing out.
- *
- * We cannot provide an exhaustive set of examples, neither can we provide a
- * rule for every possible situation. Use common sense, and think about: what
- * the rest of the driver will be doing; how the compiler might represent the
- * value if it is a compile-time constant; whether an OEM shipping multiple
- * devices would benefit much more from a single DDK binary, instead of
- * insignificant micro-optimizations.
- *
- * Dynamic GPU Properties
- *
- * Dynamic GPU properties are presented in two sets:
- * 1. the commonly used properties in @ref base_gpu_props, which have been
- * unpacked from GPU register bitfields.
- * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props
- * (also a member of base_gpu_props). All of these are presented in
- * the packed form, as presented by the GPU registers themselves.
- *
- * The raw properties in gpu_raw_gpu_props are necessary to
- * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
- * behaving differently?". In this case, all information about the
- * configuration is potentially useful, but it does not need to be processed
- * by the driver. Instead, the raw registers can be processed by the Mali
- * Tools software on the host PC.
- *
- * The properties returned extend the GPU Configuration Discovery
- * registers. For example, GPU clock speed is not specified in the GPU
- * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function.
- *
- * The GPU properties are obtained by a call to
- * base_get_gpu_props(). This simply returns a pointer to a const
- * base_gpu_props structure. It is constant for the life of a base
- * context. Multiple calls to base_get_gpu_props() to a base context
- * return the same pointer to a constant structure. This avoids cache pollution
- * of the common data.
- *
- * This pointer must not be freed, because it does not point to the start of a
- * region allocated by the memory allocator; instead, just close the @ref
- * base_context.
- *
- *
- * Kernel Operation
- *
- * During Base Context Create time, user-side makes a single kernel call:
- * - A call to fill user memory with GPU information structures
- *
- * The kernel-side will fill the provided the entire processed base_gpu_props
- * structure, because this information is required in both
- * user and kernel side; it does not make sense to decode it twice.
- *
- * Coherency groups must be derived from the bitmasks, but this can be done
- * kernel side, and just once at kernel startup: Coherency groups must already
- * be known kernel-side, to support chains that specify a 'Only Coherent Group'
- * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
- *
- * Coherency Group calculation
- *
- * Creation of the coherent group data is done at device-driver startup, and so
- * is one-time. This will most likely involve a loop with CLZ, shifting, and
- * bit clearing on the L2_PRESENT mask, depending on whether the
- * system is L2 Coherent. The number of shader cores is done by a
- * population count, since faulty cores may be disabled during production,
- * producing a non-contiguous mask.
- *
- * The memory requirements for this algorithm can be determined either by a u64
- * population count on the L2_PRESENT mask (a LUT helper already is
- * required for the above), or simple assumption that there can be no more than
- * 16 coherent groups, since core groups are typically 4 cores.
- */
-
-#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
-
-#define BASE_MAX_COHERENT_GROUPS 16
-/**
- * struct mali_base_gpu_core_props - GPU core props info
- * @product_id: Pro specific value.
- * @version_status: Status of the GPU release. No defined values, but starts at
- * 0 and increases by one for each release status (alpha, beta, EAC, etc.).
- * 4 bit values (0-15).
- * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
- * release number.
- * 8 bit values (0-255).
- * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
- * release number.
- * 4 bit values (0-15).
- * @padding: padding to allign to 8-byte
- * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
- * clGetDeviceInfo()
- * @log2_program_counter_size: Size of the shader program counter, in bits.
- * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
- * is a bitpattern where a set bit indicates that the format is supported.
- * Before using a texture format, it is recommended that the corresponding
- * bit be checked.
- * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
- * It is unlikely that a client will be able to allocate all of this memory
- * for their own purposes, but this at least provides an upper bound on the
- * memory available to the GPU.
- * This is required for OpenCL's clGetDeviceInfo() call when
- * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
- * client will not be expecting to allocate anywhere near this value.
- * @num_exec_engines: The number of execution engines.
- */
-struct mali_base_gpu_core_props {
- u32 product_id;
- u16 version_status;
- u16 minor_revision;
- u16 major_revision;
- u16 padding;
- u32 gpu_freq_khz_max;
- u32 log2_program_counter_size;
- u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
- u64 gpu_available_memory_size;
- u8 num_exec_engines;
-};
-
-/*
- * More information is possible - but associativity and bus width are not
- * required by upper-level apis.
- */
-struct mali_base_gpu_l2_cache_props {
- u8 log2_line_size;
- u8 log2_cache_size;
- u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
- u8 padding[5];
-};
-
-struct mali_base_gpu_tiler_props {
- u32 bin_size_bytes; /* Max is 4*2^15 */
- u32 max_active_levels; /* Max is 2^15 */
-};
-
-/**
- * struct mali_base_gpu_thread_props - GPU threading system details.
- * @max_threads: Max. number of threads per core
- * @max_workgroup_size: Max. number of threads per workgroup
- * @max_barrier_size: Max. number of threads that can synchronize on a
- * simple barrier
- * @max_registers: Total size [1..65535] of the register file available
- * per core.
- * @max_task_queue: Max. tasks [1..255] which may be sent to a core
- * before it becomes blocked.
- * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split
- * field.
- * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA,
- * 3 = SW Model/Emulation
- * @padding: padding to allign to 8-byte
- * @tls_alloc: Number of threads per core that TLS must be
- * allocated for
- */
-struct mali_base_gpu_thread_props {
- u32 max_threads;
- u32 max_workgroup_size;
- u32 max_barrier_size;
- u16 max_registers;
- u8 max_task_queue;
- u8 max_thread_group_split;
- u8 impl_tech;
- u8 padding[3];
- u32 tls_alloc;
-};
-
-/**
- * struct mali_base_gpu_coherent_group - descriptor for a coherent group
- * @core_mask: Core restriction mask required for the group
- * @num_cores: Number of cores in the group
- * @padding: padding to allign to 8-byte
- *
- * \c core_mask exposes all cores in that coherent group, and \c num_cores
- * provides a cached population-count for that mask.
- *
- * @note Whilst all cores are exposed in the mask, not all may be available to
- * the application, depending on the Kernel Power policy.
- *
- * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
- * wastage.
- */
-struct mali_base_gpu_coherent_group {
- u64 core_mask;
- u16 num_cores;
- u16 padding[3];
-};
-
-/**
- * struct mali_base_gpu_coherent_group_info - Coherency group information
- * @num_groups: Number of coherent groups in the GPU.
- * @num_core_groups: Number of core groups (coherent or not) in the GPU.
- * Equivalent to the number of L2 Caches.
- * The GPU Counter dumping writes 2048 bytes per core group, regardless
- * of whether the core groups are coherent or not. Hence this member is
- * needed to calculate how much memory is required for dumping.
- * @note Do not use it to work out how many valid elements are in the
- * group[] member. Use num_groups instead.
- * @coherency: Coherency features of the memory, accessed by gpu_mem_features
- * methods
- * @padding: padding to allign to 8-byte
- * @group: Descriptors of coherent groups
- *
- * Note that the sizes of the members could be reduced. However, the \c group
- * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
- * aligned, thus leading to wastage if the other members sizes were reduced.
- *
- * The groups are sorted by core mask. The core masks are non-repeating and do
- * not intersect.
- */
-struct mali_base_gpu_coherent_group_info {
- u32 num_groups;
- u32 num_core_groups;
- u32 coherency;
- u32 padding;
- struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
-};
-
-/**
- * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
- * Configuration Discovery registers.
- * @shader_present: Shader core present bitmap
- * @tiler_present: Tiler core present bitmap
- * @l2_present: Level 2 cache present bitmap
- * @stack_present: Core stack present bitmap
- * @l2_features: L2 features
- * @core_features: Core features
- * @mem_features: Mem features
- * @mmu_features: Mmu features
- * @as_present: Bitmap of address spaces present
- * @js_present: Job slots present
- * @js_features: Array of job slot features.
- * @tiler_features: Tiler features
- * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU
- * @gpu_id: GPU and revision identifier
- * @thread_max_threads: Maximum number of threads per core
- * @thread_max_workgroup_size: Maximum number of threads per workgroup
- * @thread_max_barrier_size: Maximum number of threads per barrier
- * @thread_features: Thread features
- * @coherency_mode: Note: This is the _selected_ coherency mode rather than the
- * available modes as exposed in the coherency_features register
- * @thread_tls_alloc: Number of threads per core that TLS must be allocated for
- * @gpu_features: GPU features
- *
- * The information is presented inefficiently for access. For frequent access,
- * the values should be better expressed in an unpacked form in the
- * base_gpu_props structure.
- *
- * The raw properties in gpu_raw_gpu_props are necessary to
- * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
- * behaving differently?". In this case, all information about the
- * configuration is potentially useful, but it does not need to be processed
- * by the driver. Instead, the raw registers can be processed by the Mali
- * Tools software on the host PC.
- *
- */
-struct gpu_raw_gpu_props {
- u64 shader_present;
- u64 tiler_present;
- u64 l2_present;
- u64 stack_present;
- u32 l2_features;
- u32 core_features;
- u32 mem_features;
- u32 mmu_features;
-
- u32 as_present;
-
- u32 js_present;
- u32 js_features[GPU_MAX_JOB_SLOTS];
- u32 tiler_features;
- u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
-
- u32 gpu_id;
-
- u32 thread_max_threads;
- u32 thread_max_workgroup_size;
- u32 thread_max_barrier_size;
- u32 thread_features;
-
- /*
- * Note: This is the _selected_ coherency mode rather than the
- * available modes as exposed in the coherency_features register.
- */
- u32 coherency_mode;
-
- u32 thread_tls_alloc;
- u64 gpu_features;
-};
-
-/**
- * struct base_gpu_props - Return structure for base_get_gpu_props().
- * @core_props: Core props.
- * @l2_props: L2 props.
- * @unused_1: Keep for backwards compatibility.
- * @tiler_props: Tiler props.
- * @thread_props: Thread props.
- * @raw_props: This member is large, likely to be 128 bytes.
- * @coherency_info: This must be last member of the structure.
- *
- * NOTE: the raw_props member in this data structure contains the register
- * values from which the value of the other members are derived. The derived
- * members exist to allow for efficient access and/or shielding the details
- * of the layout of the registers.
- * */
-struct base_gpu_props {
- struct mali_base_gpu_core_props core_props;
- struct mali_base_gpu_l2_cache_props l2_props;
- u64 unused_1;
- struct mali_base_gpu_tiler_props tiler_props;
- struct mali_base_gpu_thread_props thread_props;
- struct gpu_raw_gpu_props raw_props;
- struct mali_base_gpu_coherent_group_info coherency_info;
-};
-
-#if MALI_USE_CSF
-#include "csf/mali_base_csf_kernel.h"
-#else
-#include "jm/mali_base_jm_kernel.h"
-#endif
-
-/**
- * base_mem_group_id_get() - Get group ID from flags
- * @flags: Flags to pass to base_mem_alloc
- *
- * This inline function extracts the encoded group ID from flags
- * and converts it into numeric value (0~15).
- *
- * Return: group ID(0~15) extracted from the parameter
- */
-static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
-{
- LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
- return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
- BASEP_MEM_GROUP_ID_SHIFT);
-}
-
-/**
- * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
- * @id: group ID(0~15) you want to encode
- *
- * This inline function encodes specific group ID into base_mem_alloc_flags.
- * Parameter 'id' should lie in-between 0 to 15.
- *
- * Return: base_mem_alloc_flags with the group ID (id) encoded
- *
- * The return value can be combined with other flags against base_mem_alloc
- * to identify a specific memory group.
- */
-static inline base_mem_alloc_flags base_mem_group_id_set(int id)
-{
- if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) {
- /* Set to default value when id is out of range. */
- id = BASE_MEM_GROUP_DEFAULT;
- }
-
- return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
- BASE_MEM_GROUP_ID_MASK;
-}
-
-/**
- * base_context_mmu_group_id_set - Encode a memory group ID in
- * base_context_create_flags
- *
- * Memory allocated for GPU page tables will come from the specified group.
- *
- * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1).
- *
- * Return: Bitmask of flags to pass to base_context_init.
- */
-static inline base_context_create_flags base_context_mmu_group_id_set(
- int const group_id)
-{
- LOCAL_ASSERT(group_id >= 0);
- LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT);
- return BASEP_CONTEXT_MMU_GROUP_ID_MASK &
- ((base_context_create_flags)group_id <<
- BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
-}
-
-/**
- * base_context_mmu_group_id_get - Decode a memory group ID from
- * base_context_create_flags
- *
- * Memory allocated for GPU page tables will come from the returned group.
- *
- * @flags: Bitmask of flags to pass to base_context_init.
- *
- * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
- */
-static inline int base_context_mmu_group_id_get(
- base_context_create_flags const flags)
-{
- LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
- return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>
- BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
-}
-
-/*
- * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
- * flags are also used, where applicable, for specifying which fields
- * are valid following the request operation.
- */
-
-/* For monotonic (counter) timefield */
-#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0)
-/* For system wide timestamp */
-#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1)
-/* For GPU cycle counter */
-#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2)
-/* Specify kernel GPU register timestamp */
-#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30)
-/* Specify userspace cntvct_el0 timestamp source */
-#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31)
-
-#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\
- BASE_TIMEINFO_MONOTONIC_FLAG | \
- BASE_TIMEINFO_TIMESTAMP_FLAG | \
- BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \
- BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
- BASE_TIMEINFO_USER_SOURCE_FLAG)
-
-#endif /* _BASE_KERNEL_H_ */
diff --git a/mali_kbase/mali_base_mem_priv.h b/mali_kbase/mali_base_mem_priv.h
deleted file mode 100644
index 9f59a4f..0000000
--- a/mali_kbase/mali_base_mem_priv.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _BASE_MEM_PRIV_H_
-#define _BASE_MEM_PRIV_H_
-
-#define BASE_SYNCSET_OP_MSYNC (1U << 0)
-#define BASE_SYNCSET_OP_CSYNC (1U << 1)
-
-/*
- * This structure describe a basic memory coherency operation.
- * It can either be:
- * @li a sync from CPU to Memory:
- * - type = ::BASE_SYNCSET_OP_MSYNC
- * - mem_handle = a handle to the memory object on which the operation
- * is taking place
- * - user_addr = the address of the range to be synced
- * - size = the amount of data to be synced, in bytes
- * - offset is ignored.
- * @li a sync from Memory to CPU:
- * - type = ::BASE_SYNCSET_OP_CSYNC
- * - mem_handle = a handle to the memory object on which the operation
- * is taking place
- * - user_addr = the address of the range to be synced
- * - size = the amount of data to be synced, in bytes.
- * - offset is ignored.
- */
-struct basep_syncset {
- struct base_mem_handle mem_handle;
- u64 user_addr;
- u64 size;
- u8 type;
- u8 padding[7];
-};
-
-#endif
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index a78ff43..b6683b9 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -45,7 +45,7 @@
#include <linux/workqueue.h>
#include <linux/interrupt.h>
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_linux.h>
/*
@@ -64,7 +64,7 @@
#include "mali_kbase_gpu_memory_debugfs.h"
#include "mali_kbase_mem_profile_debugfs.h"
#include "mali_kbase_gpuprops.h"
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#if !MALI_USE_CSF
#include "mali_kbase_debug_job_fault.h"
#include "mali_kbase_jd_debugfs.h"
@@ -213,10 +213,6 @@ void registers_unmap(struct kbase_device *kbdev);
int kbase_device_coherency_init(struct kbase_device *kbdev);
-#ifdef CONFIG_MALI_BUSLOG
-int buslog_init(struct kbase_device *kbdev);
-void buslog_term(struct kbase_device *kbdev);
-#endif
#if !MALI_USE_CSF
int kbase_jd_init(struct kbase_context *kctx);
diff --git a/mali_kbase/mali_kbase_cache_policy.h b/mali_kbase/mali_kbase_cache_policy.h
index 817710a..2cd3079 100644
--- a/mali_kbase/mali_kbase_cache_policy.h
+++ b/mali_kbase/mali_kbase_cache_policy.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2012-2013, 2015, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,7 @@
#define _KBASE_CACHE_POLICY_H_
#include "mali_kbase.h"
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
/**
* kbase_cache_enabled - Choose the cache policy for a specific region
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 4e5155a..96fcbcd 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -53,7 +53,7 @@
#include <mali_kbase_hwaccess_instr.h>
#endif
#include <mali_kbase_reset_gpu.h>
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#if !MALI_USE_CSF
#include "mali_kbase_kinstr_jm.h"
#endif
@@ -1150,10 +1150,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx,
u64 flags;
int err;
- if (alias->in.nents == 0 || alias->in.nents > 2048)
- return -EINVAL;
-
- if (alias->in.stride > (U64_MAX / 2048))
+ if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS)
return -EINVAL;
ai = vmalloc(sizeof(*ai) * alias->in.nents);
@@ -1357,18 +1354,6 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
}
#if MALI_UNIT_TEST
-static int kbase_api_tlstream_test(struct kbase_context *kctx,
- struct kbase_ioctl_tlstream_test *test)
-{
- kbase_timeline_test(
- kctx->kbdev,
- test->tpw_count,
- test->msg_delay,
- test->msg_count,
- test->aux_msg);
-
- return 0;
-}
static int kbase_api_tlstream_stats(struct kbase_context *kctx,
struct kbase_ioctl_tlstream_stats *stats)
@@ -1508,14 +1493,11 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx,
}
if (!err) {
- param->out.total_stream_num =
- kbase_csf_firmware_get_glb_iface(kctx->kbdev,
- group_data, max_group_num,
- stream_data, max_total_stream_num,
- &param->out.glb_version, &param->out.features,
- &param->out.group_num, &param->out.prfcnt_size);
-
- param->out.padding = 0;
+ param->out.total_stream_num = kbase_csf_firmware_get_glb_iface(
+ kctx->kbdev, group_data, max_group_num, stream_data,
+ max_total_stream_num, &param->out.glb_version,
+ &param->out.features, &param->out.group_num,
+ &param->out.prfcnt_size, &param->out.instr_features);
if (copy_to_user(user_groups, group_data,
MIN(max_group_num, param->out.group_num) *
@@ -1619,6 +1601,23 @@ static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx,
return ret; \
} while (0)
+static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx,
+ struct kbase_ioctl_set_limited_core_count *set_limited_core_count)
+{
+ const u64 shader_core_mask =
+ kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER);
+ const u64 limited_core_mask =
+ ((u64)1 << (set_limited_core_count->max_core_count)) - 1;
+
+ if ((shader_core_mask & limited_core_mask) == 0) {
+ /* At least one shader core must be available after applying the mask */
+ return -EINVAL;
+ }
+
+ kctx->limited_core_mask = limited_core_mask;
+ return 0;
+}
+
static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct kbase_file *const kfile = filp->private_data;
@@ -1980,12 +1979,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
break;
#endif /* MALI_USE_CSF */
#if MALI_UNIT_TEST
- case KBASE_IOCTL_TLSTREAM_TEST:
- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
- kbase_api_tlstream_test,
- struct kbase_ioctl_tlstream_test,
- kctx);
- break;
case KBASE_IOCTL_TLSTREAM_STATS:
KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
kbase_api_tlstream_stats,
@@ -1999,6 +1992,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_context_priority_check,
kctx);
break;
+ case KBASE_IOCTL_SET_LIMITED_CORE_COUNT:
+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT,
+ kbasep_ioctl_set_limited_core_count,
+ struct kbase_ioctl_set_limited_core_count,
+ kctx);
+ break;
}
dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
@@ -2115,7 +2114,7 @@ static unsigned int kbase_poll(struct file *filp, poll_table *wait)
void kbase_event_wakeup(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx);
- dev_dbg(kctx->kbdev->dev, "Waking event queue for context %p\n",
+ dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n",
(void *)kctx);
wake_up_interruptible(&kctx->event_queue);
}
@@ -3086,7 +3085,7 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
{ .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G78" },
{ .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
- .name = "Mali-TBAX" },
+ .name = "Mali-G78AE" },
{ .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G68" },
{ .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
@@ -4094,21 +4093,28 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
{
struct kbase_device *kbdev = container_of(data, struct kbase_device,
protected_mode_hwcnt_disable_work);
+ spinlock_t *backend_lock;
unsigned long flags;
bool do_disable;
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+#if MALI_USE_CSF
+ backend_lock = &kbdev->csf.scheduler.interrupt_lock;
+#else
+ backend_lock = &kbdev->hwaccess_lock;
+#endif
+
+ spin_lock_irqsave(backend_lock, flags);
do_disable = !kbdev->protected_mode_hwcnt_desired &&
!kbdev->protected_mode_hwcnt_disabled;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ spin_unlock_irqrestore(backend_lock, flags);
if (!do_disable)
return;
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ spin_lock_irqsave(backend_lock, flags);
do_disable = !kbdev->protected_mode_hwcnt_desired &&
!kbdev->protected_mode_hwcnt_disabled;
@@ -4128,9 +4134,10 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
}
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ spin_unlock_irqrestore(backend_lock, flags);
}
+#ifndef PLATFORM_PROTECTED_CALLBACKS
static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
{
struct kbase_device *kbdev = pdev->data;
@@ -4150,7 +4157,6 @@ static const struct protected_mode_ops kbasep_native_protected_ops = {
.protected_mode_disable = kbasep_protected_mode_disable
};
-#ifndef PLATFORM_PROTECTED_CALLBACKS
#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops)
#endif /* PLATFORM_PROTECTED_CALLBACKS */
@@ -4330,6 +4336,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev)
u32 gpu_model_id;
if (kbase_is_pv_enabled(kbdev->dev->of_node)) {
+ dev_info(kbdev->dev, "Arbitration interface enabled\n");
if (kbase_is_pm_enabled(kbdev->dev->of_node)) {
/* Arbitration AND power management invalid */
dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n");
@@ -4353,7 +4360,8 @@ int kbase_device_pm_init(struct kbase_device *kbdev)
gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id);
if (gpu_model_id != GPU_ID2_PRODUCT_TGOX
- && gpu_model_id != GPU_ID2_PRODUCT_TNOX) {
+ && gpu_model_id != GPU_ID2_PRODUCT_TNOX
+ && gpu_model_id != GPU_ID2_PRODUCT_TBAX) {
kbase_arbiter_pm_early_term(kbdev);
dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n");
return -EPERM;
@@ -4542,7 +4550,7 @@ void power_control_term(struct kbase_device *kbdev)
static void trigger_reset(struct kbase_device *kbdev)
{
kbase_pm_context_active(kbdev);
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
kbase_pm_context_idle(kbdev);
}
@@ -4570,7 +4578,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
MAKE_QUIRK_ACCESSORS(sc);
MAKE_QUIRK_ACCESSORS(tiler);
MAKE_QUIRK_ACCESSORS(mmu);
-MAKE_QUIRK_ACCESSORS(jm);
+MAKE_QUIRK_ACCESSORS(gpu);
static ssize_t kbase_device_debugfs_reset_write(struct file *file,
const char __user *ubuf, size_t count, loff_t *ppos)
@@ -4691,7 +4699,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
NULL);
if (!kbdev->mali_debugfs_directory) {
- dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+ dev_err(kbdev->dev,
+ "Couldn't create mali debugfs directory: %s\n",
+ kbdev->devname);
err = -ENOMEM;
goto out;
}
@@ -4746,9 +4756,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
debugfs_create_file("quirks_mmu", 0644,
kbdev->mali_debugfs_directory, kbdev,
&fops_mmu_quirks);
- debugfs_create_file("quirks_jm", 0644,
- kbdev->mali_debugfs_directory, kbdev,
- &fops_jm_quirks);
+ debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory,
+ kbdev, &fops_gpu_quirks);
debugfs_create_bool("infinite_cache", mode,
debugfs_ctx_defaults_directory,
@@ -4878,40 +4887,6 @@ int kbase_device_coherency_init(struct kbase_device *kbdev)
return 0;
}
-#ifdef CONFIG_MALI_BUSLOG
-
-/* Callback used by the kbase bus logger client, to initiate a GPU reset
- * when the bus log is restarted. GPU reset is used as reference point
- * in HW bus log analyses.
- */
-static void kbase_logging_started_cb(void *data)
-{
- struct kbase_device *kbdev = (struct kbase_device *)data;
-
- if (kbase_prepare_to_reset_gpu(kbdev))
- kbase_reset_gpu(kbdev);
- dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
-}
-
-int buslog_init(struct kbase_device *kbdev)
-{
- int err = 0;
-
- err = bl_core_client_register(kbdev->devname,
- kbase_logging_started_cb,
- kbdev, &kbdev->buslogger,
- THIS_MODULE, NULL);
- if (err == 0)
- bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
-
- return err;
-}
-
-void buslog_term(struct kbase_device *kbdev)
-{
- bl_core_client_unregister(kbdev->buslogger);
-}
-#endif
#if MALI_USE_CSF
/**
@@ -5222,7 +5197,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
if (err) {
if (err == -EPROBE_DEFER)
- dev_err(kbdev->dev, "Device initialization Deferred\n");
+ dev_info(kbdev->dev,
+ "Device initialization Deferred\n");
else
dev_err(kbdev->dev, "Device initialization failed\n");
@@ -5448,7 +5424,6 @@ static struct platform_driver kbase_platform_driver = {
.remove = kbase_platform_device_remove,
.driver = {
.name = kbase_drv_name,
- .owner = THIS_MODULE,
.pm = &kbase_pm_ops,
.of_match_table = of_match_ptr(kbase_dt_ids),
},
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index f59a2d7..c63bc8d 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -365,8 +365,7 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx)
}
#if MALI_USE_CSF
-bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx,
- bool sync)
+bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx)
{
struct kbase_device *kbdev;
bool added_ref = false;
@@ -383,20 +382,16 @@ bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx,
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- added_ref = kbase_ctx_sched_inc_refcount_nolock(kctx);
-
- WARN_ON(added_ref &&
- (kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_NOT_PEND));
-
- if (!added_ref && (kctx->as_nr != KBASEP_AS_NR_INVALID)) {
- enum kbase_ctx_mmu_flush_pending_state new_state =
- sync ? KCTX_MMU_FLUSH_PEND_SYNC :
- KCTX_MMU_FLUSH_PEND_NO_SYNC;
+ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+ (kctx == kbdev->as_to_kctx[kctx->as_nr])) {
+ atomic_inc(&kctx->refcount);
- WARN_ON(kctx != kbdev->as_to_kctx[kctx->as_nr]);
+ if (kbdev->as_free & (1u << kctx->as_nr))
+ kbdev->as_free &= ~(1u << kctx->as_nr);
- if (kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_PEND_SYNC)
- kctx->mmu_flush_pend_state = new_state;
+ KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx,
+ kbase_ktrace_get_ctx_refcnt(kctx));
+ added_ref = true;
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/mali_kbase_ctx_sched.h b/mali_kbase/mali_kbase_ctx_sched.h
index 1aa3762..cadb735 100644
--- a/mali_kbase/mali_kbase_ctx_sched.h
+++ b/mali_kbase/mali_kbase_ctx_sched.h
@@ -222,23 +222,20 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx);
#if MALI_USE_CSF
/**
- * kbase_ctx_sched_refcount_mmu_flush - Refcount the context for the MMU flush
- * operation.
+ * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU
+ * address space slot assigned to it.
*
- * @kctx: Context to be refcounted.
- * @sync: Flag passed to the caller function kbase_mmu_flush_invalidate().
+ * @kctx: Context to be refcounted
*
- * This function takes a reference on the context for the MMU flush operation.
- * The refcount is taken only if the context is busy/active.
- * If the context isn't active but has a GPU address space slot assigned to it
- * then a flag is set to indicate that MMU flush operation is pending, which
- * will be performed when the context becomes active.
+ * This function takes a reference on the context if it has a GPU address space
+ * slot assigned to it. The address space slot will not be available for
+ * re-assignment until the reference is released.
*
* Return: true if refcount succeeded and the address space slot will not be
- * reassigned, false if the refcount failed (because the context was inactive)
+ * reassigned, false if the refcount failed (because the address space slot
+ * was not assigned).
*/
-bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx,
- bool sync);
+bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx);
#endif
#endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c
index 6902ded..7dfdff1 100644
--- a/mali_kbase/mali_kbase_debug_job_fault.c
+++ b/mali_kbase/mali_kbase_debug_job_fault.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -549,6 +549,14 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx)
{
WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING));
+ /* Return early if the job fault part of the kbase_device is not
+ * initialized yet. An error can happen during the device probe after
+ * the privileged Kbase context was created for the HW counter dumping
+ * but before the job fault part is initialized.
+ */
+ if (!kctx->kbdev->job_fault_resume_workq)
+ return;
+
kbase_ctx_remove_pending_event(kctx);
}
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index d813f2f..5b7591c 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -48,9 +48,6 @@
#include <linux/file.h>
#include <linux/sizes.h>
-#ifdef CONFIG_MALI_BUSLOG
-#include <linux/bus_logger.h>
-#endif
#if defined(CONFIG_SYNC)
#include <sync.h>
@@ -554,7 +551,6 @@ struct kbase_mmu_mode {
unsigned long flags;
};
-struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
#define DEVNAME_SIZE 16
@@ -624,8 +620,8 @@ struct kbase_process {
* issues present in the GPU.
* @hw_quirks_mmu: Configuration to be used for the MMU as per the HW
* issues present in the GPU.
- * @hw_quirks_jm: Configuration to be used for the Job Manager as per
- * the HW issues present in the GPU.
+ * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU
+ * subsystems as per the HW issues present in the GPU.
* @entry: Links the device instance to the global list of GPU
* devices. The list would have as many entries as there
* are GPU device instances.
@@ -710,6 +706,8 @@ struct kbase_process {
* @nr_hw_address_spaces: Number of address spaces actually available in the
* GPU, remains constant after driver initialisation.
* @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance
+ * counters.
* @hwcnt: Structure used for instrumentation and HW counters
* dumping
* @hwcnt.lock: The lock should be used when accessing any of the
@@ -754,6 +752,8 @@ struct kbase_process {
* including any contexts that might be created for
* hardware counters.
* @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list.
+ * @group_max_uid_in_devices: Max value of any queue group UID in any kernel
+ * context in the kbase device.
* @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed
* to devfreq_add_device() to add devfreq feature to Mali
* GPU device.
@@ -918,7 +918,7 @@ struct kbase_device {
u32 hw_quirks_sc;
u32 hw_quirks_tiler;
u32 hw_quirks_mmu;
- u32 hw_quirks_jm;
+ u32 hw_quirks_gpu;
struct list_head entry;
struct device *dev;
@@ -1016,6 +1016,7 @@ struct kbase_device {
struct list_head kctx_list;
struct mutex kctx_list_lock;
+ atomic_t group_max_uid_in_devices;
#ifdef CONFIG_MALI_DEVFREQ
struct devfreq_dev_profile devfreq_profile;
@@ -1120,9 +1121,6 @@ struct kbase_device {
struct work_struct protected_mode_hwcnt_disable_work;
-#ifdef CONFIG_MALI_BUSLOG
- struct bus_logger_client *buslogger;
-#endif
bool irq_reset_flush;
@@ -1225,7 +1223,7 @@ struct kbase_file {
unsigned long api_version;
atomic_t setup_state;
};
-
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
* enum kbase_context_flags - Flags for kbase contexts
*
@@ -1285,6 +1283,9 @@ struct kbase_file {
* refcount for the context drops to 0 or on when the address spaces are
* re-enabled on GPU reset or power cycle.
*
+ * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual
+ * address page limit, so we must take care to not exceed the physical limit
+ *
* All members need to be separate bits. This enum is intended for use in a
* bitmask where multiple values get OR-ed together.
*/
@@ -1305,38 +1306,90 @@ enum kbase_context_flags {
KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
-#if MALI_JIT_PRESSURE_LIMIT_BASE
- /*
- * Set when JIT physical page limit is less than JIT virtual address
- * page limit, so we must take care to not exceed the physical limit
- */
KCTX_JPL_ENABLED = 1U << 16,
-#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */
};
-
-#if MALI_USE_CSF
+#else
/**
- * enum kbase_ctx_mmu_flush_pending_state - State for the pending mmu flush
- * operation for a kbase context.
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
+ * allocations. For 64-bit clients it is enabled by default, and disabled by
+ * default on 32-bit clients. Being able to clear this flag is only used for
+ * testing purposes of the custom zone allocation on 64-bit user-space builds,
+ * where we also require more control than is available through e.g. the JIT
+ * allocation mechanism. However, the 64-bit user-space client must still
+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
+ * from it for job slot 0. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
+ * from it for job slot 1. This is reset when the context first goes active or
+ * is re-activated on that slot.
*
- * @KCTX_MMU_FLUSH_NOT_PEND: Set when there is no MMU flush operation pending
- * for a kbase context or deferred flush operation
- * is performed.
+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
+ * from it for job slot 2. This is reset when the context first goes active or
+ * is re-activated on that slot.
*
- * @KCTX_MMU_FLUSH_PEND_NO_SYNC: Set when the MMU flush operation is deferred
- * for a kbase context when it is inactive and
- * the sync flag passed is 0.
+ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for
+ * the context due to unhandled page(or bus) fault. It is cleared when the
+ * refcount for the context drops to 0 or on when the address spaces are
+ * re-enabled on GPU reset or power cycle.
*
- * @KCTX_MMU_FLUSH_PEND_SYNC: Set when the MMU flush operation is deferred
- * for a kbase context when it is inactive and
- * the sync flag passed is 1.
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
*/
-enum kbase_ctx_mmu_flush_pending_state {
- KCTX_MMU_FLUSH_NOT_PEND,
- KCTX_MMU_FLUSH_PEND_NO_SYNC,
- KCTX_MMU_FLUSH_PEND_SYNC,
+enum kbase_context_flags {
+ KCTX_COMPAT = 1U << 0,
+ KCTX_RUNNABLE_REF = 1U << 1,
+ KCTX_ACTIVE = 1U << 2,
+ KCTX_PULLED = 1U << 3,
+ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+ KCTX_INFINITE_CACHE = 1U << 5,
+ KCTX_SUBMIT_DISABLED = 1U << 6,
+ KCTX_PRIVILEGED = 1U << 7,
+ KCTX_SCHEDULED = 1U << 8,
+ KCTX_DYING = 1U << 9,
+ KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+ KCTX_FORCE_SAME_VA = 1U << 11,
+ KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
+ KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
+ KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
+ KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
};
-#endif
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
struct kbase_sub_alloc {
struct list_head link;
@@ -1616,12 +1669,8 @@ struct kbase_reg_zone {
* @kinstr_jm: Kernel job manager instrumentation context handle
* @tl_kctx_list_node: List item into the device timeline's list of
* contexts, for timeline summarization.
- * @mmu_flush_pend_state: Tracks if the MMU flush operations are pending for the
- * context. The flush required due to unmap is also
- * tracked. It is supposed to be in
- * KCTX_MMU_FLUSH_NOT_PEND state whilst a context is
- * active and shall be updated with mmu_hw_mutex lock
- * held.
+ * @limited_core_mask: The mask that is applied to the affinity in case of atoms
+ * marked with BASE_JD_REQ_LIMITED_CORE_MASK.
*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
@@ -1769,9 +1818,7 @@ struct kbase_context {
#endif
struct list_head tl_kctx_list_node;
-#if MALI_USE_CSF
- enum kbase_ctx_mmu_flush_pending_state mmu_flush_pend_state;
-#endif
+ u64 limited_core_mask;
};
#ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/mali_kbase/mali_kbase_event.c b/mali_kbase/mali_kbase_event.c
index 04687ee..25a379d 100644
--- a/mali_kbase/mali_kbase_event.c
+++ b/mali_kbase/mali_kbase_event.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016,2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -42,7 +42,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom);
katom->status = KBASE_JD_ATOM_STATE_UNUSED;
- dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom);
+ dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom);
wake_up(&katom->completed);
return data;
@@ -79,7 +79,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve
mutex_unlock(&ctx->event_mutex);
- dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+ dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom);
uevent->event_code = atom->event_code;
uevent->atom_number = (atom - ctx->jctx.atoms);
@@ -164,11 +164,11 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
{
struct kbase_device *kbdev = ctx->kbdev;
- dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom);
+ dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom);
if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) {
dev_warn(kbdev->dev,
- "%s: Atom %d (%p) not completed (status %d)\n",
+ "%s: Atom %d (%pK) not completed (status %d)\n",
__func__,
kbase_jd_atom_id(atom->kctx, atom),
atom->kctx,
diff --git a/mali_kbase/mali_kbase_gpu_memory_debugfs.c b/mali_kbase/mali_kbase_gpu_memory_debugfs.c
index 45ce740..a10b2bb 100644
--- a/mali_kbase/mali_kbase_gpu_memory_debugfs.c
+++ b/mali_kbase/mali_kbase_gpu_memory_debugfs.c
@@ -56,7 +56,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
/* output the memory usage and cap for each kctx
* opened on this device
*/
- seq_printf(sfile, " %s-0x%p %10u\n",
+ seq_printf(sfile, " %s-0x%pK %10u\n",
"kctx",
kctx,
atomic_read(&(kctx->used_pages)));
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index 9da0b00..49f96f6 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -28,7 +28,7 @@
#include <mali_kbase_gpuprops.h>
#include <mali_kbase_hwaccess_gpuprops.h>
#include <mali_kbase_config_defaults.h>
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include <linux/clk.h>
#include <mali_kbase_pm_internal.h>
#include <linux/of_platform.h>
@@ -104,6 +104,71 @@ static void kbase_gpuprops_construct_coherent_groups(
}
/**
+ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
+ * @kbdev: The &struct kbase_device structure for the device
+ * @curr_config: The &struct curr_config_props structure to receive the result
+ *
+ * Fill the &struct curr_config_props structure with values from the GPU
+ * configuration registers.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
+ struct curr_config_props * const curr_config)
+{
+ struct kbase_current_config_regdump curr_config_regdump;
+ int err;
+
+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
+ return -EINVAL;
+
+ /* If update not needed just return. */
+ if (!curr_config->update_needed)
+ return 0;
+
+ /* Dump relevant registers */
+ err = kbase_backend_gpuprops_get_curr_config(kbdev,
+ &curr_config_regdump);
+ if (err)
+ return err;
+
+ curr_config->l2_slices =
+ KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1;
+
+ curr_config->l2_present =
+ ((u64) curr_config_regdump.l2_present_hi << 32) +
+ curr_config_regdump.l2_present_lo;
+
+ curr_config->shader_present =
+ ((u64) curr_config_regdump.shader_present_hi << 32) +
+ curr_config_regdump.shader_present_lo;
+
+ curr_config->num_cores = hweight64(curr_config->shader_present);
+
+ curr_config->update_needed = false;
+
+ return 0;
+}
+
+/**
+ * kbase_gpuprops_req_curr_config_update - Request Current Config Update
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Requests the current configuration to be updated next time the
+ * kbase_gpuprops_get_curr_config_props() is called.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev)
+{
+ if (WARN_ON(!kbdev))
+ return -EINVAL;
+
+ kbdev->gpu_props.curr_config.update_needed = true;
+ return 0;
+}
+
+/**
* kbase_gpuprops_get_props - Get the GPU configuration
* @gpu_props: The &struct base_gpu_props structure
* @kbdev: The &struct kbase_device structure for the device
@@ -183,6 +248,59 @@ void kbase_gpuprops_update_core_props_gpu_id(
}
/**
+ * kbase_gpuprops_update_max_config_props - Updates the max config properties in
+ * the base_gpu_props.
+ * @base_props: The &struct base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Updates the &struct base_gpu_props structure with the max config properties.
+ */
+static void kbase_gpuprops_update_max_config_props(
+ struct base_gpu_props * const base_props, struct kbase_device *kbdev)
+{
+ int l2_n = 0;
+
+ if (WARN_ON(!kbdev) || WARN_ON(!base_props))
+ return;
+
+ /* return if the max_config is not set during arbif initialization */
+ if (kbdev->gpu_props.max_config.core_mask == 0)
+ return;
+
+ /*
+ * Set the base_props with the maximum config values to ensure that the
+ * user space will always be based on the maximum resources available.
+ */
+ base_props->l2_props.num_l2_slices =
+ kbdev->gpu_props.max_config.l2_slices;
+ base_props->raw_props.shader_present =
+ kbdev->gpu_props.max_config.core_mask;
+ /*
+ * Update l2_present in the raw data to be consistent with the
+ * max_config.l2_slices number.
+ */
+ base_props->raw_props.l2_present = 0;
+ for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) {
+ base_props->raw_props.l2_present <<= 1;
+ base_props->raw_props.l2_present |= 0x1;
+ }
+ /*
+ * Update the coherency_info data using just one core group. For
+ * architectures where the max_config is provided by the arbiter it is
+ * not necessary to split the shader core groups in different coherent
+ * groups.
+ */
+ base_props->coherency_info.coherency =
+ base_props->raw_props.mem_features;
+ base_props->coherency_info.num_core_groups = 1;
+ base_props->coherency_info.num_groups = 1;
+ base_props->coherency_info.group[0].core_mask =
+ kbdev->gpu_props.max_config.core_mask;
+ base_props->coherency_info.group[0].num_cores =
+ hweight32(kbdev->gpu_props.max_config.core_mask);
+}
+
+/**
* kbase_gpuprops_calculate_props - Calculate the derived properties
* @gpu_props: The &struct base_gpu_props structure
* @kbdev: The &struct kbase_device structure for the device
@@ -297,8 +415,30 @@ static void kbase_gpuprops_calculate_props(
gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
}
- /* Initialize the coherent_group structure for each group */
- kbase_gpuprops_construct_coherent_groups(gpu_props);
+
+ /*
+ * If the maximum resources allocated information is available it is
+ * necessary to update the base_gpu_props with the max_config info to
+ * the userspace. This is applicable to systems that receive this
+ * information from the arbiter.
+ */
+ if (kbdev->gpu_props.max_config.core_mask)
+ /* Update the max config properties in the base_gpu_props */
+ kbase_gpuprops_update_max_config_props(gpu_props,
+ kbdev);
+ else
+ /* Initialize the coherent_group structure for each group */
+ kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
+ const struct max_config_props *max_config)
+{
+ if (WARN_ON(!kbdev) || WARN_ON(!max_config))
+ return;
+
+ kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices;
+ kbdev->gpu_props.max_config.core_mask = max_config->core_mask;
}
void kbase_gpuprops_set(struct kbase_device *kbdev)
@@ -306,7 +446,8 @@ void kbase_gpuprops_set(struct kbase_device *kbdev)
struct kbase_gpu_props *gpu_props;
struct gpu_raw_gpu_props *raw;
- KBASE_DEBUG_ASSERT(kbdev != NULL);
+ if (WARN_ON(!kbdev))
+ return;
gpu_props = &kbdev->gpu_props;
raw = &gpu_props->props.raw_props;
@@ -326,9 +467,19 @@ void kbase_gpuprops_set(struct kbase_device *kbdev)
gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
gpu_props->num_cores = hweight64(raw->shader_present);
- gpu_props->num_core_groups = hweight64(raw->l2_present);
+ gpu_props->num_core_groups =
+ gpu_props->props.coherency_info.num_core_groups;
gpu_props->num_address_spaces = hweight32(raw->as_present);
gpu_props->num_job_slots = hweight32(raw->js_present);
+
+ /*
+ * Current configuration is used on HW interactions so that the maximum
+ * config is just used for user space avoiding interactions with parts
+ * of the hardware that might not be allocated to the kbase instance at
+ * that moment.
+ */
+ kbase_gpuprops_req_curr_config_update(kbdev);
+ kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config);
}
int kbase_gpuprops_set_features(struct kbase_device *kbdev)
@@ -494,7 +645,10 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
goto exit;
dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
- regdump.l2_features);
+ regdump.l2_features);
+ dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n",
+ regdump.l2_config);
+
/* Update gpuprops with reflected L2_FEATURES */
gpu_props->raw_props.l2_features = regdump.l2_features;
diff --git a/mali_kbase/mali_kbase_gpuprops.h b/mali_kbase/mali_kbase_gpuprops.h
index 7c7b123..72f76c3 100644
--- a/mali_kbase/mali_kbase_gpuprops.h
+++ b/mali_kbase/mali_kbase_gpuprops.h
@@ -115,4 +115,38 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev);
void kbase_gpuprops_update_core_props_gpu_id(
struct base_gpu_props * const gpu_props);
+/**
+ * kbase_gpuprops_set_max_config - Set the max config information
+ * @kbdev: Device pointer
+ * @max_config: Maximum configuration data to be updated
+ *
+ * This function sets max_config in the kbase_gpu_props.
+ */
+void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
+ const struct max_config_props *max_config);
+
+/**
+ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
+ * @kbdev: The &struct kbase_device structure for the device
+ * @curr_config: The &struct curr_config_props structure to receive the result
+ *
+ * Fill the &struct curr_config_props structure with values from the GPU
+ * configuration registers.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
+ struct curr_config_props * const curr_config);
+
+/**
+ * kbase_gpuprops_req_curr_config_update - Request Current Config Update
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Requests the current configuration to be updated next time the
+ * kbase_gpuprops_get_curr_config_props() is called.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev);
+
#endif /* _KBASE_GPUPROPS_H_ */
diff --git a/mali_kbase/mali_kbase_gpuprops_types.h b/mali_kbase/mali_kbase_gpuprops_types.h
index 8ecb54f..8b37b88 100644
--- a/mali_kbase/mali_kbase_gpuprops_types.h
+++ b/mali_kbase/mali_kbase_gpuprops_types.h
@@ -26,7 +26,7 @@
#ifndef _KBASE_GPUPROPS_TYPES_H_
#define _KBASE_GPUPROPS_TYPES_H_
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#define KBASE_GPU_SPEED_MHZ 123
#define KBASE_GPU_PC_SIZE_LOG2 24U
@@ -34,6 +34,7 @@
struct kbase_gpuprops_regdump {
u32 gpu_id;
u32 l2_features;
+ u32 l2_config;
u32 core_features;
u32 tiler_features;
u32 mem_features;
@@ -60,6 +61,28 @@ struct kbase_gpuprops_regdump {
u32 gpu_features_hi;
};
+/**
+ * struct kbase_current_config_regdump - Register dump for current resources
+ * allocated to the GPU.
+ * @mem_features: Memory system features. Contains information about the
+ * features of the memory system. Used here to get the L2 slice
+ * count.
+ * @shader_present_lo: Shader core present bitmap. Low word.
+ * @shader_present_hi: Shader core present bitmap. High word.
+ * @l2_present_lo: L2 cache present bitmap. Low word.
+ * @l2_present_hi: L2 cache present bitmap. High word.
+ *
+ * Register dump structure used to store the resgisters data realated to the
+ * current resources allocated to the GPU.
+ */
+struct kbase_current_config_regdump {
+ u32 mem_features;
+ u32 shader_present_lo;
+ u32 shader_present_hi;
+ u32 l2_present_lo;
+ u32 l2_present_hi;
+};
+
struct kbase_gpu_cache_props {
u8 associativity;
u8 external_bus_width;
@@ -74,6 +97,50 @@ struct kbase_gpu_mmu_props {
u8 pa_bits;
};
+/**
+ * struct max_config_props - Properties based on the maximum resources
+ * available.
+ * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU
+ * during runtime.
+ * @padding: Padding to a multiple of 64 bits.
+ * @core_mask: Largest core mask bitmap that can be assigned to the GPU during
+ * runtime.
+ *
+ * Properties based on the maximum resources available (not necessarly
+ * allocated at that moment). Used to provide the maximum configuration to the
+ * userspace allowing the applications to allocate enough resources in case the
+ * real allocated resources change.
+ */
+struct max_config_props {
+ u8 l2_slices;
+ u8 padding[3];
+ u32 core_mask;
+};
+
+/**
+ * struct curr_config_props - Properties based on the current resources
+ * allocated to the GPU.
+ * @l2_present: Current L2 present bitmap that is allocated to the GPU.
+ * @shader_present: Current shader present bitmap that is allocated to the GPU.
+ * @num_cores: Current number of shader cores allocated to the GPU.
+ * @l2_slices: Current number of L2 slices allocated to the GPU.
+ * @update_needed: Defines if it is necessary to re-read the registers to
+ * update the current allocated resources.
+ * @padding: Padding to a multiple of 64 bits.
+ *
+ * Properties based on the current resource available. Used for operations with
+ * hardware interactions to avoid using userspace data that can be based on
+ * the maximum resource available.
+ */
+struct curr_config_props {
+ u64 l2_present;
+ u64 shader_present;
+ u16 num_cores;
+ u8 l2_slices;
+ bool update_needed;
+ u8 padding[4];
+};
+
struct kbase_gpu_props {
/* kernel-only properties */
u8 num_cores;
@@ -86,6 +153,12 @@ struct kbase_gpu_props {
struct kbase_gpu_mem_props mem;
struct kbase_gpu_mmu_props mmu;
+ /* Properties based on the current resource available */
+ struct curr_config_props curr_config;
+
+ /* Properties based on the maximum resource available */
+ struct max_config_props max_config;
+
/* Properties shared with userspace */
struct base_gpu_props props;
diff --git a/mali_kbase/mali_kbase_gwt.h b/mali_kbase/mali_kbase_gwt.h
index f349d8f..32b0f5f 100644
--- a/mali_kbase/mali_kbase_gwt.h
+++ b/mali_kbase/mali_kbase_gwt.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2010-2017, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,7 @@
#define _KBASE_GWT_H
#include <mali_kbase.h>
-#include <mali_kbase_ioctl.h>
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
/**
* kbase_gpu_gwt_start - Start the GPU write tracking
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index d2063bb..b1758d7 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -126,91 +126,91 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
};
static const struct base_hw_product base_hw_products[] = {
- {GPU_ID2_PRODUCT_TMIX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 1),
- base_hw_issues_tMIx_r0p0_05dev0},
- {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
- {U32_MAX /* sentinel value */, NULL} } },
-
- {GPU_ID2_PRODUCT_THEX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
- {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
- {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
- {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TSIX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
- {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TDVX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TNOX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TGOX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TTRX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1},
- {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1},
- {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TNAX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1},
- {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_LBEX,
- {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0},
- {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TBEX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1},
- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TBAX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBAx_r0p0},
- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBAx_r1p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TDUX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_TODX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0},
- {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0},
- {U32_MAX, NULL} } },
-
- {GPU_ID2_PRODUCT_LODX,
- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
- {U32_MAX, NULL} } },
+ { GPU_ID2_PRODUCT_TMIX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 1),
+ base_hw_issues_tMIx_r0p0_05dev0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 },
+ { U32_MAX /* sentinel value */, NULL } } },
+
+ { GPU_ID2_PRODUCT_THEX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 },
+ { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TSIX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 },
+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TDVX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TNOX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TGOX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TTRX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TNAX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_LBEX,
+ { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 },
+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TBEX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 },
+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TBAX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TDUX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TODX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_LODX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
+ { U32_MAX, NULL } } },
};
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
diff --git a/mali_kbase/mali_kbase_hwaccess_gpuprops.h b/mali_kbase/mali_kbase_hwaccess_gpuprops.h
index 5e5f9dc..0fca83e 100644
--- a/mali_kbase/mali_kbase_hwaccess_gpuprops.h
+++ b/mali_kbase/mali_kbase_hwaccess_gpuprops.h
@@ -40,6 +40,23 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump);
/**
+ * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with
+ * relevant GPU properties read from
+ * the GPU registers.
+ * @kbdev: Device pointer.
+ * @curr_config_regdump: Pointer to struct kbase_current_config_regdump
+ * structure.
+ *
+ * The caller should ensure that GPU remains powered-on during this function and
+ * the caller must ensure this function returns success before using the values
+ * returned in the curr_config_regdump in any part of the kernel.
+ *
+ * Return: Zero for succeess or a Linux error code
+ */
+int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
+ struct kbase_current_config_regdump *curr_config_regdump);
+
+/**
* kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read
* from GPU
* @kbdev: Device pointer
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
index c1bc7fc..4bc62c1 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -127,41 +127,31 @@ enum kbase_hwcnt_backend_csf_enable_state {
* struct kbase_hwcnt_backend_csf_info - Information used to create an instance
* of a CSF hardware counter backend.
* @backend: Pointer to access CSF backend.
- * @lock: Spinlock protecting backend and its internal
- * states.
* @fw_in_protected_mode: True if FW is running in protected mode, else
* false.
* @unrecoverable_error_happened: True if an recoverable error happened, else
* false.
- * @csf_if: CSF interface object pointer. Functions inside
- * this interface MUST never be called while
- * holding the spin lock, as that could cause
- * deadlocks.
+ * @csf_if: CSF interface object pointer.
* @ring_buf_cnt: Dump buffer count in the ring buffer.
* @counter_set: The performance counter set to use.
* @metadata: Hardware counter metadata.
- * @dump_bytes: Bytes of GPU memory required to perform a
- * hardware counter dump.
- * @gpu_info: GPU information to initialise HWC dump memory
- * layout.
+ * @prfcnt_info: Performance counter information.
*/
struct kbase_hwcnt_backend_csf_info {
struct kbase_hwcnt_backend_csf *backend;
- spinlock_t lock;
bool fw_in_protected_mode;
bool unrecoverable_error_happened;
struct kbase_hwcnt_backend_csf_if *csf_if;
u32 ring_buf_cnt;
enum kbase_hwcnt_set counter_set;
const struct kbase_hwcnt_metadata *metadata;
- size_t dump_bytes;
- struct kbase_hwcnt_gpu_info gpu_info;
+ struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info;
};
/**
* struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
* information.
- * @fe_cnt: FroneEnd block count.
+ * @fe_cnt: Front end block count.
* @tiler_cnt: Tiler block count.
* @mmu_l2_cnt: Memory system(MMU and L2 cache) block count.
* @shader_cnt: Shader Core block count.
@@ -207,7 +197,7 @@ struct kbase_hwcnt_csf_physical_layout {
* count for sample period.
* @phys_layout: Physical memory layout information of HWC
* sample buffer.
- * @dump_completed: Completion signalled by the dump worker when
+ * @dump_completed: Completion signaled by the dump worker when
* it is completed accumulating up to the
* insert_index_to_accumulate.
* Should be initialized to the "complete" state.
@@ -242,7 +232,7 @@ bool kbasep_hwcnt_backend_csf_backend_exists(
struct kbase_hwcnt_backend_csf_info *csf_info)
{
WARN_ON(!csf_info);
- lockdep_assert_held(&csf_info->lock);
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
return (csf_info->backend != NULL);
}
@@ -280,6 +270,9 @@ kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf)
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
+ backend_csf->info->csf_if->assert_lock_held(
+ backend_csf->info->csf_if->ctx);
+
backend_csf->info->csf_if->get_gpu_cycle_count(
backend_csf->info->csf_if->ctx, cycle_counts,
backend_csf->clk_enable_map);
@@ -310,10 +303,9 @@ kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
}
/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
- * guarantee the header is
- * enabled, the header will be
- * used when do the samples
- * delta calculation.
+ * guarantee headers are
+ * enabled if any counter is
+ * required.
*@phys_enable_map: HWC physical enable map to be processed.
*/
static void kbasep_hwcnt_backend_csf_process_enable_map(
@@ -338,21 +330,21 @@ static void kbasep_hwcnt_backend_csf_process_enable_map(
}
static void kbasep_hwcnt_backend_csf_init_layout(
- const struct kbase_hwcnt_gpu_info *gpu_info,
+ const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
struct kbase_hwcnt_csf_physical_layout *phys_layout)
{
- WARN_ON(!gpu_info);
+ WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
phys_layout->fe_cnt = 1;
phys_layout->tiler_cnt = 1;
- phys_layout->mmu_l2_cnt = gpu_info->l2_count;
- phys_layout->shader_cnt = fls64(gpu_info->core_mask);
+ phys_layout->mmu_l2_cnt = prfcnt_info->l2_count;
+ phys_layout->shader_cnt = fls64(prfcnt_info->core_mask);
phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt +
phys_layout->mmu_l2_cnt +
phys_layout->shader_cnt;
- phys_layout->shader_avail_mask = gpu_info->core_mask;
+ phys_layout->shader_avail_mask = prfcnt_info->core_mask;
phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
phys_layout->counters_per_block = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
@@ -363,9 +355,12 @@ static void kbasep_hwcnt_backend_csf_init_layout(
static void kbasep_hwcnt_backend_csf_reset_internal_buffers(
struct kbase_hwcnt_backend_csf *backend_csf)
{
- memset(backend_csf->to_user_buf, 0, backend_csf->info->dump_bytes);
- memset(backend_csf->accum_buf, 0, backend_csf->info->dump_bytes);
- memset(backend_csf->old_sample_buf, 0, backend_csf->info->dump_bytes);
+ memset(backend_csf->to_user_buf, 0,
+ backend_csf->info->prfcnt_info.dump_bytes);
+ memset(backend_csf->accum_buf, 0,
+ backend_csf->info->prfcnt_info.dump_bytes);
+ memset(backend_csf->old_sample_buf, 0,
+ backend_csf->info->prfcnt_info.dump_bytes);
}
static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
@@ -389,12 +384,12 @@ static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(
u32 idx;
u32 *sample;
char *cpu_dump_base;
+ size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base;
for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) {
- sample = (u32 *)&cpu_dump_base[idx *
- backend_csf->info->dump_bytes];
+ sample = (u32 *)&cpu_dump_base[idx * dump_bytes];
kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
backend_csf, sample);
}
@@ -405,19 +400,20 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(
{
/* Copy the data into the sample and wait for the user to get it. */
memcpy(backend_csf->to_user_buf, backend_csf->accum_buf,
- backend_csf->info->dump_bytes);
+ backend_csf->info->prfcnt_info.dump_bytes);
/* After copied data into user sample, clear the accumulator values to
* prepare for the next accumulator, such as the next request or
* threshold.
*/
- memset(backend_csf->accum_buf, 0, backend_csf->info->dump_bytes);
+ memset(backend_csf->accum_buf, 0,
+ backend_csf->info->prfcnt_info.dump_bytes);
}
static void kbasep_hwcnt_backend_csf_accumulate_sample(
const struct kbase_hwcnt_csf_physical_layout *phys_layout,
size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf,
- const u32 *new_sample_buf)
+ const u32 *new_sample_buf, bool clearing_samples)
{
size_t block_idx, ctr_idx;
const u32 *old_block = old_sample_buf;
@@ -425,6 +421,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
u32 *acc_block = accum_buf;
for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+ const u32 old_enable_mask =
+ old_block[phys_layout->offset_enable_mask];
const u32 new_enable_mask =
new_block[phys_layout->offset_enable_mask];
@@ -442,11 +440,63 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
phys_layout->headers_per_block *
KBASE_HWCNT_VALUE_BYTES);
- /* Accumulate the counters. */
- for (ctr_idx = phys_layout->headers_per_block;
- ctr_idx < phys_layout->values_per_block;
- ctr_idx++) {
- acc_block[ctr_idx] += new_block[ctr_idx];
+ /* Accumulate counter samples
+ *
+ * When accumulating samples we need to take into
+ * account whether the counter sampling method involves
+ * clearing counters back to zero after each sample is
+ * taken.
+ *
+ * The intention for CSF was that all HW should use
+ * counters which wrap to zero when their maximum value
+ * is reached. This, combined with non-clearing
+ * sampling, enables multiple concurrent users to
+ * request samples without interfering with each other.
+ *
+ * However some early HW may not support wrapping
+ * counters, for these GPUs counters must be cleared on
+ * sample to avoid loss of data due to counters
+ * saturating at their maximum value.
+ */
+ if (!clearing_samples) {
+ if (old_enable_mask == 0) {
+ /* Hardware block was previously
+ * unavailable. Accumulate the new
+ * counters only, as we know previous
+ * values are zeroes.
+ */
+ for (ctr_idx =
+ phys_layout
+ ->headers_per_block;
+ ctr_idx <
+ phys_layout->values_per_block;
+ ctr_idx++) {
+ acc_block[ctr_idx] +=
+ new_block[ctr_idx];
+ }
+ } else {
+ /* Hardware block was previously
+ * available. Accumulate the delta
+ * between old and new counter values.
+ */
+ for (ctr_idx =
+ phys_layout
+ ->headers_per_block;
+ ctr_idx <
+ phys_layout->values_per_block;
+ ctr_idx++) {
+ acc_block[ctr_idx] +=
+ new_block[ctr_idx] -
+ old_block[ctr_idx];
+ }
+ }
+ } else {
+ for (ctr_idx = phys_layout->headers_per_block;
+ ctr_idx < phys_layout->values_per_block;
+ ctr_idx++) {
+ acc_block[ctr_idx] +=
+ new_block[ctr_idx];
+ }
}
}
old_block += phys_layout->values_per_block;
@@ -467,9 +517,11 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
u32 insert_index_to_stop)
{
u32 raw_idx;
+ unsigned long flags;
u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
- const size_t buf_dump_bytes = backend_csf->info->dump_bytes;
+ const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+ bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
u32 *old_sample_buf = backend_csf->old_sample_buf;
u32 *new_sample_buf;
@@ -478,9 +530,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
return;
/* Sync all the buffers to CPU side before read the data. */
- backend_csf->info->csf_if->ring_buf_sync(
- backend_csf->info->csf_if->ctx, backend_csf->ring_buf,
- extract_index_to_start, (insert_index_to_stop - 1), true);
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf,
+ extract_index_to_start,
+ insert_index_to_stop, true);
/* Consider u32 wrap case, '!=' is used here instead of '<' operator */
for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop;
@@ -495,7 +548,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
kbasep_hwcnt_backend_csf_accumulate_sample(
&backend_csf->phys_layout, buf_dump_bytes,
- backend_csf->accum_buf, old_sample_buf, new_sample_buf);
+ backend_csf->accum_buf, old_sample_buf, new_sample_buf,
+ clearing_samples);
old_sample_buf = new_sample_buf;
}
@@ -514,23 +568,28 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
}
/* Sync zeroed buffers to avoid coherency issues on future use. */
- backend_csf->info->csf_if->ring_buf_sync(
- backend_csf->info->csf_if->ctx, backend_csf->ring_buf,
- extract_index_to_start, (insert_index_to_stop - 1), false);
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf,
+ extract_index_to_start,
+ insert_index_to_stop, false);
/* After consuming all samples between extract_idx and insert_idx,
* set the raw extract index to insert_idx so that the sample buffers
* can be released back to the ring buffer pool.
*/
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
backend_csf->info->csf_if->set_extract_index(
backend_csf->info->csf_if->ctx, insert_index_to_stop);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
}
static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
struct kbase_hwcnt_backend_csf *backend_csf,
enum kbase_hwcnt_backend_csf_enable_state new_state)
{
- lockdep_assert_held(&backend_csf->info->lock);
+ backend_csf->info->csf_if->assert_lock_held(
+ backend_csf->info->csf_if->ctx);
if (backend_csf->enable_state != new_state) {
backend_csf->enable_state = new_state;
@@ -558,21 +617,19 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
WARN_ON(!work);
backend_csf = container_of(work, struct kbase_hwcnt_backend_csf,
hwc_dump_work);
-
- spin_lock_irqsave(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
/* Assert the backend is not destroyed. */
WARN_ON(backend_csf != backend_csf->info->backend);
/* The backend was disabled or had an error while the worker was being
* launched.
*/
- if (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED &&
- backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
WARN_ON(backend_csf->dump_state !=
KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
WARN_ON(!completion_done(&backend_csf->dump_completed));
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
return;
}
@@ -581,12 +638,14 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING;
insert_index_to_acc = backend_csf->insert_index_to_accumulate;
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
/* Read the raw extract and insert indexes from the CSF interface. */
backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx,
&extract_index, &insert_index);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
+
/* Accumulate up to the insert we grabbed at the prfcnt request
* interrupt.
*/
@@ -599,19 +658,18 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
kbasep_hwcnt_backend_csf_update_user_sample(backend_csf);
/* Dump done, set state back to COMPLETED for next request. */
- spin_lock_irqsave(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
/* Assert the backend is not destroyed. */
WARN_ON(backend_csf != backend_csf->info->backend);
/* The backend was disabled or had an error while we were accumulating.
*/
- if (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED &&
- backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
WARN_ON(backend_csf->dump_state !=
KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
WARN_ON(!completion_done(&backend_csf->dump_completed));
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
return;
}
@@ -621,7 +679,8 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
/* Our work here is done - set the wait object and unblock waiters. */
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
complete_all(&backend_csf->dump_completed);
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
}
/**
@@ -643,20 +702,21 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
backend_csf = container_of(work, struct kbase_hwcnt_backend_csf,
hwc_threshold_work);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+ /* Assert the backend is not destroyed. */
+ WARN_ON(backend_csf != backend_csf->info->backend);
/* Read the raw extract and insert indexes from the CSF interface. */
backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx,
&extract_index, &insert_index);
- spin_lock_irqsave(&backend_csf->info->lock, flags);
- /* Assert the backend is not destroyed. */
- WARN_ON(backend_csf != backend_csf->info->backend);
-
/* The backend was disabled or had an error while the worker was being
* launched.
*/
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
return;
}
@@ -667,14 +727,19 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
(backend_csf->dump_state !=
KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
return;
}
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
-
- /* Accumulate everything we possibly can. We grabbed offsets before the
- * spin lock, so we know it is not possible for a concurrent dump's
- * insert_to_accumulate to exceed the insert we grabbed.
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
+
+ /* Accumulate everything we possibly can. We grabbed the insert index
+ * immediately after we acquired the lock but before we checked whether
+ * a concurrent dump was triggered. This ensures that if a concurrent
+ * dump was triggered between releasing the lock and now, we know for a
+ * fact that our insert will not exceed the concurrent dump's
+ * insert_to_accumulate, so we don't risk accumulating too much data.
*/
kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index,
insert_index);
@@ -685,45 +750,31 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
static void kbase_hwcnt_backend_csf_submit_dump_worker(
struct kbase_hwcnt_backend_csf_info *csf_info)
{
- unsigned long flags;
u32 extract_index;
- u32 insert_index;
WARN_ON(!csf_info);
-
- csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index,
- &insert_index);
-
- spin_lock_irqsave(&csf_info->lock, flags);
-
- /* Make sure the backend exists and is in the correct state.
- * A lot of things could have happened to it in the period before we
- * acquired the lock.
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info));
+ WARN_ON(csf_info->backend->enable_state !=
+ KBASE_HWCNT_BACKEND_CSF_ENABLED);
+ WARN_ON(csf_info->backend->dump_state !=
+ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT);
+
+ /* Save insert index now so that the dump worker only accumulates the
+ * HWC data associated with this request. Extract index is not stored
+ * as that needs to be checked when accumulating to prevent re-reading
+ * buffers that have already been read and returned to the GPU.
*/
- if (kbasep_hwcnt_backend_csf_backend_exists(csf_info) &&
- (csf_info->backend->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_ENABLED ||
- csf_info->backend->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) &&
- csf_info->backend->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT) {
- csf_info->backend->insert_index_to_accumulate = insert_index;
- csf_info->backend->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
-
- /* Submit the accumulator task into the work queue. */
- while (true != queue_work(csf_info->backend->hwc_dump_workq,
- &csf_info->backend->hwc_dump_work)) {
- /* Spin until we have guaranteed the work has been
- * submitted.
- * Without this there is a potential race where a prior
- * submission of the work may still technically be on
- * the queue, even though all of its work is complete.
- */
- }
- }
-
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ csf_info->csf_if->get_indexes(
+ csf_info->csf_if->ctx, &extract_index,
+ &csf_info->backend->insert_index_to_accumulate);
+ csf_info->backend->dump_state =
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
+
+ /* Submit the accumulator task into the work queue. */
+ queue_work(csf_info->backend->hwc_dump_workq,
+ &csf_info->backend->hwc_dump_work);
}
static void kbasep_hwcnt_backend_csf_get_physical_enable(
@@ -753,59 +804,36 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable(
enable->clk_enable_map = enable_map->clk_enable_map;
}
-static int kbasep_hwcnt_backend_csf_dump_enable_impl(
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map,
- struct kbase_hwcnt_backend_csf_if_enable *out_enable)
+ const struct kbase_hwcnt_enable_map *enable_map)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf *backend_csf =
(struct kbase_hwcnt_backend_csf *)backend;
struct kbase_hwcnt_backend_csf_if_enable enable;
- WARN_ON(!out_enable);
-
if (!backend_csf || !enable_map ||
(enable_map->metadata != backend_csf->info->metadata))
return -EINVAL;
+ backend_csf->info->csf_if->assert_lock_held(
+ backend_csf->info->csf_if->ctx);
+
kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map,
&enable);
- spin_lock_irqsave(&backend_csf->info->lock, flags);
/* enable_state should be DISABLED before we transfer it to enabled */
- if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) {
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
return -EIO;
- }
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
WARN_ON(!completion_done(&backend_csf->dump_completed));
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED);
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
- *out_enable = enable;
- return 0;
-}
-
-/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
-static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
-{
- int errcode;
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
- struct kbase_hwcnt_backend_csf_if_enable enable;
-
- errcode = kbasep_hwcnt_backend_csf_dump_enable_impl(backend, enable_map,
- &enable);
- if (errcode)
- return errcode;
-
- backend_csf->info->csf_if->dump_enable_nolock(
- backend_csf->info->csf_if->ctx, backend_csf->ring_buf, &enable);
+ backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, &enable);
kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map);
@@ -818,33 +846,33 @@ static int kbasep_hwcnt_backend_csf_dump_enable(
const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
+ unsigned long flags;
struct kbase_hwcnt_backend_csf *backend_csf =
(struct kbase_hwcnt_backend_csf *)backend;
- struct kbase_hwcnt_backend_csf_if_enable enable;
-
- errcode = kbasep_hwcnt_backend_csf_dump_enable_impl(backend, enable_map,
- &enable);
- if (errcode)
- return errcode;
-
- backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx,
- backend_csf->ring_buf, &enable);
- kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map);
+ if (!backend_csf)
+ return -EINVAL;
- return 0;
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend,
+ enable_map);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
+ return errcode;
}
static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
{
- lockdep_assert_held(&backend_csf->info->lock);
+ backend_csf->info->csf_if->assert_lock_held(
+ backend_csf->info->csf_if->ctx);
while ((backend_csf->enable_state ==
KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) ||
(backend_csf->enable_state ==
KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) {
- spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, *lock_flags);
wait_event(
backend_csf->enable_state_waitq,
@@ -853,7 +881,8 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
(backend_csf->enable_state !=
KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED));
- spin_lock_irqsave(&backend_csf->info->lock, *lock_flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx,
+ lock_flags);
}
}
@@ -868,7 +897,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
WARN_ON(!backend_csf);
- spin_lock_irqsave(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
/* Make sure we wait until any previous enable or disable have completed
* before doing anything.
@@ -882,7 +911,8 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
/* If we are already disabled or in an unrecoverable error
* state, there is nothing for us to do.
*/
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
return;
}
@@ -901,7 +931,8 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
WARN_ON(!completion_done(&backend_csf->dump_completed));
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
/* Block until any async work has completed. We have transitioned out of
* the ENABLED state so we can guarantee no new work will concurrently
@@ -909,23 +940,16 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
*/
flush_workqueue(backend_csf->hwc_dump_workq);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
if (do_disable)
backend_csf->info->csf_if->dump_disable(
backend_csf->info->csf_if->ctx);
- spin_lock_irqsave(&backend_csf->info->lock, flags);
-
kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf,
&flags);
switch (backend_csf->enable_state) {
- case KBASE_HWCNT_BACKEND_CSF_DISABLED:
- case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
- case KBASE_HWCNT_BACKEND_CSF_ENABLED:
- case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED:
- case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:
- WARN_ON(true);
- break;
case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
@@ -935,9 +959,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
backend_csf,
KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
break;
+ default:
+ WARN_ON(true);
+ break;
}
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
/* After disable, zero the header of all buffers in the ring buffer back
* to 0 to prepare for the next enable.
@@ -947,7 +975,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
/* Sync zeroed buffers to avoid coherency issues on future use. */
backend_csf->info->csf_if->ring_buf_sync(
backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0,
- (backend_csf->info->ring_buf_cnt - 1), false);
+ backend_csf->info->ring_buf_cnt, false);
/* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare
* for next enable.
@@ -968,12 +996,27 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
if (!backend_csf)
return -EINVAL;
- spin_lock_irqsave(&backend_csf->info->lock, flags);
- /* Make sure we are enabled or becoming enabled. */
- if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
- (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+ /* If we're transitioning to enabled there's nothing to accumulate, and
+ * the user dump buffer is already zeroed. We can just short circuit to
+ * the DUMP_COMPLETED state.
+ */
+ if (backend_csf->enable_state ==
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+ backend_csf->dump_state =
+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+ kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
+ return 0;
+ }
+
+ /* Otherwise, make sure we're already enabled. */
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
return -EIO;
}
@@ -983,27 +1026,14 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
(backend_csf->dump_state !=
KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(
+ backend_csf->info->csf_if->ctx, flags);
/* HWC is disabled or another dump is ongoing, or we are on
* fault.
*/
return -EIO;
}
- /* If we are transitioning to enabled there is nothing to accumulate,
- * and the user dump buffer is already zeroed.
- * We can just short circuit to the DUMP_COMPLETED state.
- */
- if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
- backend_csf->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
- *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
- kbasep_hwcnt_backend_csf_cc_update(backend_csf);
- return 0;
- }
-
/* Reset the completion so dump_wait() has something to wait on. */
reinit_completion(&backend_csf->dump_completed);
@@ -1022,7 +1052,6 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
backend_csf->dump_state =
KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
}
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
/* CSF firmware might enter protected mode now, but still call request.
* That is fine, as we changed state while holding the lock, so the
@@ -1036,13 +1065,14 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
kbasep_hwcnt_backend_csf_cc_update(backend_csf);
- if (do_request) {
+ if (do_request)
backend_csf->info->csf_if->dump_request(
backend_csf->info->csf_if->ctx);
- } else {
+ else
kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);
- }
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
return 0;
}
@@ -1060,13 +1090,14 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
wait_for_completion(&backend_csf->dump_completed);
- spin_lock_irqsave(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
/* Make sure the last dump actually succeeded. */
errcode = (backend_csf->dump_state ==
KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ?
0 :
-EIO;
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
return errcode;
}
@@ -1144,10 +1175,8 @@ kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
destroy_workqueue(backend_csf->hwc_dump_workq);
- if (backend_csf->info->csf_if->ring_buf_free) {
- backend_csf->info->csf_if->ring_buf_free(
- backend_csf->info->csf_if->ctx, backend_csf->ring_buf);
- }
+ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf);
kfree(backend_csf->accum_buf);
backend_csf->accum_buf = NULL;
@@ -1183,18 +1212,21 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
goto alloc_error;
backend_csf->info = csf_info;
- kbasep_hwcnt_backend_csf_init_layout(&csf_info->gpu_info,
+ kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info,
&backend_csf->phys_layout);
- backend_csf->accum_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL);
+ backend_csf->accum_buf =
+ kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
if (!backend_csf->accum_buf)
goto err_alloc_acc_buf;
- backend_csf->old_sample_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL);
+ backend_csf->old_sample_buf =
+ kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
if (!backend_csf->old_sample_buf)
goto err_alloc_pre_sample_buf;
- backend_csf->to_user_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL);
+ backend_csf->to_user_buf =
+ kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
if (!backend_csf->to_user_buf)
goto err_alloc_user_sample_buf;
@@ -1210,7 +1242,7 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
/* Sync zeroed buffers to avoid coherency issues on use. */
backend_csf->info->csf_if->ring_buf_sync(
backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0,
- (backend_csf->info->ring_buf_cnt - 1), false);
+ backend_csf->info->ring_buf_cnt, false);
init_completion(&backend_csf->dump_completed);
@@ -1278,17 +1310,17 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
/* If it was not created before, attach it to csf_info.
* Use spin lock to avoid concurrent initialization.
*/
- spin_lock_irqsave(&csf_info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
if (csf_info->backend == NULL) {
csf_info->backend = backend_csf;
*out_backend = (struct kbase_hwcnt_backend *)backend_csf;
success = true;
- if (csf_info->unrecoverable_error_happened) {
+ if (csf_info->unrecoverable_error_happened)
backend_csf->enable_state =
KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR;
- }
}
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
/* Destroy the new created backend if the backend has already created
* before. In normal case, this won't happen if the client call init()
@@ -1317,9 +1349,10 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
/* Set the backend in csf_info to NULL so we won't handle any external
* notification anymore since we are terminating.
*/
- spin_lock_irqsave(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
backend_csf->info->backend = NULL;
- spin_unlock_irqrestore(&backend_csf->info->lock, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+ flags);
kbasep_hwcnt_backend_csf_destroy(backend_csf);
}
@@ -1370,8 +1403,6 @@ static int kbasep_hwcnt_backend_csf_info_create(
if (!info)
return -ENOMEM;
- spin_lock_init(&info->lock);
-
#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
info->counter_set = KBASE_HWCNT_SET_SECONDARY;
#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
@@ -1405,11 +1436,12 @@ kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info)
}
static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
+ struct kbase_hwcnt_backend_csf *backend_csf)
{
bool do_disable = false;
- lockdep_assert_held(&backend_csf->info->lock);
+ backend_csf->info->csf_if->assert_lock_held(
+ backend_csf->info->csf_if->ctx);
/* We are already in or transitioning to the unrecoverable error state.
* Early out.
@@ -1451,18 +1483,16 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
* disabled, - we don't want to disable twice if an unrecoverable error
* happens while we are disabling.
*/
- if (do_disable) {
- spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags);
+ if (do_disable)
backend_csf->info->csf_if->dump_disable(
backend_csf->info->csf_if->ctx);
- spin_lock_irqsave(&backend_csf->info->lock, *lock_flags);
- }
}
static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
- struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
+ struct kbase_hwcnt_backend_csf *backend_csf)
{
- lockdep_assert_held(&backend_csf->info->lock);
+ backend_csf->info->csf_if->assert_lock_held(
+ backend_csf->info->csf_if->ctx);
switch (backend_csf->enable_state) {
case KBASE_HWCNT_BACKEND_CSF_DISABLED:
@@ -1478,8 +1508,8 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
/* A seemingly recoverable error that occurs while we are
* transitioning to enabled is probably unrecoverable.
*/
- kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf,
- lock_flags);
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
+ backend_csf);
return;
case KBASE_HWCNT_BACKEND_CSF_ENABLED:
/* Start transitioning to the disabled state. We can't wait for
@@ -1496,14 +1526,8 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
complete_all(&backend_csf->dump_completed);
- /* Unlock spin lock before we call csf_if disable(). */
- spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags);
-
backend_csf->info->csf_if->dump_disable(
backend_csf->info->csf_if->ctx);
-
- /* Lock spin lock again to match the spin lock pairs. */
- spin_lock_irqsave(&backend_csf->info->lock, *lock_flags);
return;
}
}
@@ -1511,44 +1535,27 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
void kbase_hwcnt_backend_csf_protm_entered(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
- struct kbase_hwcnt_backend_csf_info *csf_info;
- struct kbase_hwcnt_backend_csf *backend_csf;
-
- csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ struct kbase_hwcnt_backend_csf_info *csf_info =
+ (struct kbase_hwcnt_backend_csf_info *)iface->info;
- spin_lock_irqsave(&csf_info->lock, flags);
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
csf_info->fw_in_protected_mode = true;
- /* Early out if the backend does not exist. */
- if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
- return;
- }
-
- backend_csf = csf_info->backend;
- /* If we are not in REQUESTED state, we don't need to do the dumping. */
- if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
- return;
- }
- backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
-
- spin_unlock_irqrestore(&csf_info->lock, flags);
- kbase_hwcnt_backend_csf_submit_dump_worker(csf_info);
+ /* Call on_prfcnt_sample() to trigger collection of the protected mode
+ * entry auto-sample if there is currently a pending dump request.
+ */
+ kbase_hwcnt_backend_csf_on_prfcnt_sample(iface);
}
void kbase_hwcnt_backend_csf_protm_exited(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_info *csf_info;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
- spin_lock_irqsave(&csf_info->lock, flags);
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
csf_info->fw_in_protected_mode = false;
- spin_unlock_irqrestore(&csf_info->lock, flags);
}
void kbase_hwcnt_backend_csf_on_unrecoverable_error(
@@ -1559,18 +1566,17 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
- spin_lock_irqsave(&csf_info->lock, flags);
+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
csf_info->unrecoverable_error_happened = true;
/* Early out if the backend does not exist. */
if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
return;
}
- kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend,
- &flags);
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
}
void kbase_hwcnt_backend_csf_on_before_reset(
@@ -1582,11 +1588,11 @@ void kbase_hwcnt_backend_csf_on_before_reset(
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
- spin_lock_irqsave(&csf_info->lock, flags);
+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
csf_info->unrecoverable_error_happened = false;
/* Early out if the backend does not exist. */
if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
return;
}
backend_csf = csf_info->backend;
@@ -1605,7 +1611,7 @@ void kbase_hwcnt_backend_csf_on_before_reset(
* really matter, the power is being pulled.
*/
kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- csf_info->backend, &flags);
+ csf_info->backend);
}
/* A reset is the only way to exit the unrecoverable error state */
@@ -1615,81 +1621,66 @@ void kbase_hwcnt_backend_csf_on_before_reset(
backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
}
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
}
void kbase_hwcnt_backend_csf_on_prfcnt_sample(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
- spin_lock_irqsave(&csf_info->lock, flags);
/* Early out if the backend does not exist. */
- if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
return;
- }
backend_csf = csf_info->backend;
/* If the current state is not REQUESTED, this HWC sample will be
* skipped and processed in next dump_request.
*/
- if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED)
return;
- }
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
- spin_unlock_irqrestore(&csf_info->lock, flags);
kbase_hwcnt_backend_csf_submit_dump_worker(csf_info);
}
void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
- spin_lock_irqsave(&csf_info->lock, flags);
/* Early out if the backend does not exist. */
- if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
return;
- }
backend_csf = csf_info->backend;
- if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED)
/* Submit the threshold work into the work queue to consume the
* available samples.
*/
queue_work(backend_csf->hwc_dump_workq,
&backend_csf->hwc_threshold_work);
- }
-
- spin_unlock_irqrestore(&csf_info->lock, flags);
}
void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_info *csf_info;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
- spin_lock_irqsave(&csf_info->lock, flags);
/* Early out if the backend does not exist. */
- if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
return;
- }
/* Called when an overflow occurs. We treat this as a recoverable error,
* so we start transitioning to the disabled state.
@@ -1698,27 +1689,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
* complex recovery code when we can just turn ourselves off instead for
* a while.
*/
- kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend,
- &flags);
-
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend);
}
void kbase_hwcnt_backend_csf_on_prfcnt_enable(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
- spin_lock_irqsave(&csf_info->lock, flags);
/* Early out if the backend does not exist. */
- if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
return;
- }
backend_csf = csf_info->backend;
if (backend_csf->enable_state ==
@@ -1735,27 +1720,22 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(
* we reset.
*/
kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- csf_info->backend, &flags);
+ csf_info->backend);
}
-
- spin_unlock_irqrestore(&csf_info->lock, flags);
}
void kbase_hwcnt_backend_csf_on_prfcnt_disable(
struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
- spin_lock_irqsave(&csf_info->lock, flags);
/* Early out if the backend does not exist. */
- if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
- spin_unlock_irqrestore(&csf_info->lock, flags);
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
return;
- }
backend_csf = csf_info->backend;
if (backend_csf->enable_state ==
@@ -1773,10 +1753,8 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable(
* we reset.
*/
kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- csf_info->backend, &flags);
+ csf_info->backend);
}
-
- spin_unlock_irqrestore(&csf_info->lock, flags);
}
int kbase_hwcnt_backend_csf_metadata_init(
@@ -1784,28 +1762,29 @@ int kbase_hwcnt_backend_csf_metadata_init(
{
int errcode;
struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_gpu_info gpu_info;
if (!iface)
return -EINVAL;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
- WARN_ON(!csf_info->csf_if->get_gpu_info);
- csf_info->csf_if->get_gpu_info(csf_info->csf_if->ctx,
- &csf_info->dump_bytes,
- &csf_info->gpu_info.l2_count,
- &csf_info->gpu_info.core_mask,
- &csf_info->gpu_info.clk_cnt);
+ WARN_ON(!csf_info->csf_if->get_prfcnt_info);
+
+ csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx,
+ &csf_info->prfcnt_info);
/* The clock domain counts should not exceed the number of maximum
* number of clock regulators.
*/
- if (csf_info->gpu_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS)
+ if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS)
return -EIO;
- errcode = kbase_hwcnt_csf_metadata_create(&csf_info->gpu_info,
- csf_info->counter_set,
- &csf_info->metadata);
+ gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
+ gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+ gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
+ errcode = kbase_hwcnt_csf_metadata_create(
+ &gpu_info, csf_info->counter_set, &csf_info->metadata);
if (errcode)
return errcode;
@@ -1813,7 +1792,8 @@ int kbase_hwcnt_backend_csf_metadata_init(
* Dump abstraction size should be exactly the same size and layout as
* the physical dump size, for backwards compatibility.
*/
- WARN_ON(csf_info->dump_bytes != csf_info->metadata->dump_buf_bytes);
+ WARN_ON(csf_info->prfcnt_info.dump_bytes !=
+ csf_info->metadata->dump_buf_bytes);
return 0;
}
@@ -1868,8 +1848,7 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
return 0;
}
-void kbase_hwcnt_backend_csf_destroy(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface)
{
if (!iface)
return;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/mali_kbase_hwcnt_backend_csf.h
index 93938f0..7506274 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,7 +40,7 @@
* @iface: Non-NULL pointer to backend interface structure that is filled
* in on creation success.
*
- * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock.
*
* Return: 0 on success, else error code.
*/
@@ -77,7 +77,7 @@ void kbase_hwcnt_backend_csf_destroy(
struct kbase_hwcnt_backend_interface *iface);
/**
- * kbase_hwcnt_backend_csf_protm_entered() - CSf HWC backend function to receive
+ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
* notification that protected mode
* has been entered.
* @iface: Non-NULL pointer to HWC backend interface.
@@ -86,7 +86,7 @@ void kbase_hwcnt_backend_csf_protm_entered(
struct kbase_hwcnt_backend_interface *iface);
/**
- * kbase_hwcnt_backend_csf_protm_exited() - CSf HWC backend function to receive
+ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
* notification that protected mode has
* been exited.
* @iface: Non-NULL pointer to HWC backend interface.
@@ -95,22 +95,20 @@ void kbase_hwcnt_backend_csf_protm_exited(
struct kbase_hwcnt_backend_interface *iface);
/**
- * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSf HWC backend function
- * to be called when an
- * unrecoverable error
- * occurs, such as the
- * firmware has died or bus
- * error, this puts us into
- * the unrecoverable error
- * state, which we can only
- * get out of by a reset.
+ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
+ * called when unrecoverable
+ * errors are detected.
* @iface: Non-NULL pointer to HWC backend interface.
+ *
+ * This should be called on encountering errors that can only be recovered from
+ * with reset, or that may put HWC logic in state that could result in hang. For
+ * example, on bus error, or when FW becomes unresponsive.
*/
void kbase_hwcnt_backend_csf_on_unrecoverable_error(
struct kbase_hwcnt_backend_interface *iface);
/**
- * kbase_hwcnt_backend_csf_on_before_reset() - CSf HWC backend function to be
+ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
* called immediately before a
* reset. Takes us out of the
* unrecoverable error state, if we
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
index e86d240..b4ddd31 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -61,19 +61,63 @@ struct kbase_hwcnt_backend_csf_if_enable {
};
/**
- * typedef kbase_hwcnt_backend_csf_if_get_gpu_info_fn - Get GPU information
- * @ctx: Non-NULL pointer to a CSF context.
- * @dump_size: Non-NULL pointer to where the dump size of performance counter
- * sample is stored.
- * @l2_count: Non-NULL pointer to where the MMU L2 cache count is stored.
- * @core_mask: Non-NULL pointer to where shader core mask is stored.
+ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter
+ * information.
+ * @dump_bytes: Bytes of GPU memory required to perform a performance
+ * counter dump.
+ * @l2_count: The MMU L2 cache count.
+ * @core_mask: Shader core mask.
+ * @clk_cnt: Clock domain count in the system.
+ * @clearing_samples: Indicates whether counters are cleared after each sample
+ * is taken.
+ */
+struct kbase_hwcnt_backend_csf_if_prfcnt_info {
+ size_t dump_bytes;
+ size_t l2_count;
+ u64 core_mask;
+ u8 clk_cnt;
+ bool clearing_samples;
+};
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the
+ * backend spinlock is
+ * held.
+ * @ctx: Non-NULL pointer to a CSF context.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
+ *
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @flags: Pointer to the memory location that would store the previous
+ * interrupt state.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
*
- * @clk_cnt: Non-NULL pointer to where clock domain count in the system is
- * stored.
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @flags: Previously stored interrupt state when Scheduler interrupt
+ * spinlock was acquired.
*/
-typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_info_fn)(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, size_t *dump_size,
- size_t *l2_count, u64 *core_mask, u8 *clk_cnt);
+typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
+ * counter information.
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @prfcnt_info: Non-NULL pointer to struct where performance counter
+ * information should be stored.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer
@@ -105,14 +149,13 @@ typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)(
* inclusive.
* @buf_index_last: The last buffer index in the ring buffer to be synced,
* exclusive.
- * @for_cpu: The direction of sync to be applied.
- * It is set to true when CPU cache needs to be invalidated
- * before reading the ring buffer contents. And set to false
- * when CPU cache needs to be flushed after writing to the
- * ring buffer.
+ * @for_cpu: The direction of sync to be applied, set to true when CPU
+ * cache needs invalidating before reading the buffer, and set
+ * to false after CPU writes to flush these before this memory
+ * is overwritten by the GPU.
*
- * After HWC sample request is done in GPU side, we need to sync the dump memory
- * to CPU side to access the HWC data.
+ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
+ * are correctly observed.
*/
typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
@@ -147,25 +190,10 @@ typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)(
* @ctx: Non-NULL pointer to a CSF interface context.
* @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC.
* @enable: Non-NULL pointer to the enable map of HWC.
- */
-typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- struct kbase_hwcnt_backend_csf_if_enable *enable);
-
-/**
- * typedef kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn - Setup and enable
- * hardware counter
- * in CSF interface.
- * @ctx: Non-NULL pointer to a CSF interface context.
- * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC.
- * @enable: Non-NULL pointer to the enable map of HWC.
*
- * Exactly the same as kbase_hwcnt_backend_csf_if_dump_enable_fn(), except must
- * be called in an atomic context with the spinlock documented by the specific
- * backend interface held.
+ * Requires lock to be taken before calling.
*/
-typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn)(
+typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable);
@@ -174,13 +202,18 @@ typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn)(
* typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
* in CSF interface.
* @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Requires lock to be taken before calling.
*/
typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
+ *
* @ctx: Non-NULL pointer to the interface context.
+ *
+ * Requires lock to be taken before calling.
*/
typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
@@ -189,9 +222,12 @@ typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)(
* typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
* insert indexes of the
* ring buffer.
+ *
* @ctx: Non-NULL pointer to a CSF interface context.
* @extract_index: Non-NULL pointer where current extract index to be saved.
* @insert_index: Non-NULL pointer where current insert index to be saved.
+ *
+ * Requires lock to be taken before calling.
*/
typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
@@ -201,8 +237,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)(
* typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
* index of the ring
* buffer.
+ *
* @ctx: Non-NULL pointer to a CSF interface context.
* @extract_index: New extract index to be set.
+ *
+ * Requires lock to be taken before calling.
*/
typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index);
@@ -213,9 +252,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)(
* @ctx: Non-NULL pointer to a CSF interface context.
* @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved,
* the array size should be at least as big as the number of
- * clock domains returned by get_gpu_info interface.
+ * clock domains returned by get_prfcnt_info interface.
* @clk_enable_map: An array of bitfields, each bit specifies an enabled clock
* domain.
+ *
+ * Requires lock to be taken before calling.
*/
typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
@@ -225,7 +266,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)(
* struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
* interface.
* @ctx: CSF interface context.
- * @get_gpu_info: Function ptr to get HWC related information.
+ * @assert_lock_held: Function ptr to assert backend spinlock is held.
+ * @lock: Function ptr to acquire backend spinlock.
+ * @unlock: Function ptr to release backend spinlock.
+ * @get_prfcnt_info: Function ptr to get performance counter related
+ * information.
* @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC.
* @ring_buf_sync: Function ptr to sync ring buffer to CPU.
* @ring_buf_free: Function ptr to free ring buffer for CSF HWC.
@@ -243,13 +288,15 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)(
*/
struct kbase_hwcnt_backend_csf_if {
struct kbase_hwcnt_backend_csf_if_ctx *ctx;
- kbase_hwcnt_backend_csf_if_get_gpu_info_fn get_gpu_info;
+ kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held;
+ kbase_hwcnt_backend_csf_if_lock_fn lock;
+ kbase_hwcnt_backend_csf_if_unlock_fn unlock;
+ kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info;
kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc;
kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync;
kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free;
kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns;
kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable;
- kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn dump_enable_nolock;
kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable;
kbase_hwcnt_backend_csf_if_dump_request_fn dump_request;
kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
index 7a3b239..67ca4cb 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
#include <device/mali_kbase_device.h>
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_types.h"
-#include "csf/mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
#include "csf/mali_kbase_csf_firmware.h"
#include "mali_kbase_hwcnt_backend_csf_if_fw.h"
@@ -88,6 +88,50 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx {
struct kbase_ccswe ccswe_shader_cores;
};
+static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+
+ WARN_ON(!ctx);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long *flags)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+
+ WARN_ON(!ctx);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock(kbdev, flags);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_unlock(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+
+ WARN_ON(!ctx);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
/**
* kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
*
@@ -170,16 +214,18 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
rtm, &fw_ctx->rate_listener);
}
-static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, size_t *dump_size,
- size_t *l2_count, u64 *core_mask, u8 *clk_cnt)
+static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
{
#ifdef CONFIG_MALI_NO_MALI
- *l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
- *core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
- *dump_size = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS *
- KBASE_DUMMY_MODEL_BLOCK_SIZE;
- *clk_cnt = 1;
+ prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ prfcnt_info->core_mask =
+ (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+ prfcnt_info->dump_bytes = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS *
+ KBASE_DUMMY_MODEL_BLOCK_SIZE;
+ prfcnt_info->clk_cnt = 1;
+ prfcnt_info->clearing_samples = false;
#else
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -188,10 +234,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info(
u32 prfcnt_fw_size = 0;
WARN_ON(!ctx);
- WARN_ON(!dump_size);
- WARN_ON(!l2_count);
- WARN_ON(!core_mask);
- WARN_ON(!clk_cnt);
+ WARN_ON(!prfcnt_info);
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
@@ -199,12 +242,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info(
prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
prfcnt_fw_size = (prfcnt_size >> 16) << 8;
fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
- *dump_size = fw_ctx->buf_bytes;
+ prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
- *l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
- *core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+ prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
+ prfcnt_info->core_mask =
+ kbdev->gpu_props.props.coherency_info.group[0].core_mask;
- *clk_cnt = fw_ctx->clk_cnt;
+ prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
+ prfcnt_info->clearing_samples = true;
#endif
}
@@ -331,9 +376,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
WARN_ON(!ctx);
WARN_ON(!ring_buf);
- /* Get the buffer indexes in the ring buffer. */
+ /* The index arguments for this function form an inclusive, exclusive
+ * range.
+ * However, when masking back to the available buffers we will make this
+ * inclusive at both ends so full flushes are not 0 -> 0.
+ */
ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
- ring_buf_index_last = buf_index_last & (fw_ring_buf->buf_count - 1);
+ ring_buf_index_last =
+ (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
/* The start address is the offset of the first buffer. */
start_address = fw_ctx->buf_bytes * ring_buf_index_first;
@@ -348,6 +398,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
/* sync the first part to the end of ring buffer. */
for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
struct page *pg = as_page(fw_ring_buf->phys[i]);
+
if (for_cpu) {
kbase_sync_single_for_cpu(fw_ctx->kbdev,
kbase_dma_addr(pg),
@@ -367,6 +418,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
for (i = pg_first; i <= pg_last; i++) {
struct page *pg = as_page(fw_ring_buf->phys[i]);
+
if (for_cpu) {
kbase_sync_single_for_cpu(fw_ctx->kbdev,
kbase_dma_addr(pg), PAGE_SIZE,
@@ -420,12 +472,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
}
}
-static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock(
+static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable)
{
- unsigned long flags;
u32 prfcnt_config;
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
@@ -437,18 +488,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock(
WARN_ON(!ctx);
WARN_ON(!ring_buf);
WARN_ON(!enable);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
/* Configure */
prfcnt_config = fw_ring_buf->buf_count;
prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
-
/* Configure the ring buffer base address */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
fw_ring_buf->as_nr);
@@ -503,52 +551,25 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock(
prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
GLB_PRFCNT_CONFIG);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
enable->clk_enable_map);
}
-static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- struct kbase_hwcnt_backend_csf_if_enable *enable)
-{
- unsigned long flags;
- struct kbase_device *kbdev;
- struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
- (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
-
- WARN_ON(!ctx);
- WARN_ON(!ring_buf);
- WARN_ON(!enable);
-
- kbdev = fw_ctx->kbdev;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock(ctx, ring_buf,
- enable);
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-}
-
static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
- unsigned long flags;
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
/* Disable the HWC */
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.hwcnt.enable_pending = true;
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
GLB_REQ_PRFCNT_ENABLE_MASK);
@@ -569,7 +590,6 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
* happens.
*/
kbdev->csf.hwcnt.request_pending = false;
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
}
@@ -577,7 +597,6 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
- unsigned long flags;
u32 glb_req;
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
@@ -585,57 +604,52 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
/* Trigger dumping */
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.hwcnt.request_pending = true;
glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
GLB_REQ_PRFCNT_SAMPLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
u32 *insert_index)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
WARN_ON(!extract_index);
WARN_ON(!insert_index);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
- kbase_csf_scheduler_spin_lock(fw_ctx->kbdev, &flags);
*extract_index = kbase_csf_firmware_global_input_read(
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
*insert_index = kbase_csf_firmware_global_output(
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
- kbase_csf_scheduler_spin_unlock(fw_ctx->kbdev, flags);
}
static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
{
- unsigned long flags;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
/* Set the raw extract index to release the buffer back to the ring
* buffer.
*/
- kbase_csf_scheduler_spin_lock(fw_ctx->kbdev, &flags);
kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
GLB_PRFCNT_EXTRACT, extract_idx);
- kbase_csf_scheduler_spin_unlock(fw_ctx->kbdev, flags);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
@@ -649,6 +663,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
WARN_ON(!ctx);
WARN_ON(!cycle_counts);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
if (!(clk_enable_map & (1ull << clk)))
@@ -749,14 +764,16 @@ int kbase_hwcnt_backend_csf_if_fw_create(
return errcode;
if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
- if_fw->get_gpu_info = kbasep_hwcnt_backend_csf_if_fw_get_gpu_info;
+ if_fw->assert_lock_held =
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
+ if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
+ if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
+ if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
- if_fw->dump_enable_nolock =
- kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock;
if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
if_fw->get_gpu_cycle_count =
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h
index d72851e..f55efb6 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
index c6c672c..4168472 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -62,6 +62,8 @@ struct kbase_hwcnt_backend_jm_info {
* @enabled: True if dumping has been enabled, else false.
* @pm_core_mask: PM state sync-ed shaders core mask for the enabled
* dumping.
+ * @curr_config: Current allocated hardware resources to correctly map the src
+ * raw dump buffer to the dst dump buffer.
* @clk_enable_map: The enable map specifying enabled clock domains.
* @cycle_count_elapsed:
* Cycle count elapsed for a given sample period.
@@ -81,6 +83,7 @@ struct kbase_hwcnt_backend_jm {
struct kbase_vmap_struct *vmap;
bool enabled;
u64 pm_core_mask;
+ struct kbase_hwcnt_curr_config curr_config;
u64 clk_enable_map;
u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
@@ -89,15 +92,16 @@ struct kbase_hwcnt_backend_jm {
};
/**
- * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the
- * hwcnt metadata.
+ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used
+ * to create the hwcnt metadata.
* @kbdev: Non-NULL pointer to kbase device.
* @info: Non-NULL pointer to data structure to be filled in.
*
* The initialised info struct will only be valid for use while kbdev is valid.
*/
-static int kbase_hwcnt_gpu_info_init(struct kbase_device *kbdev,
- struct kbase_hwcnt_gpu_info *info)
+static int
+kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
+ struct kbase_hwcnt_gpu_info *info)
{
size_t clk;
@@ -240,6 +244,37 @@ static void kbasep_hwcnt_backend_jm_cc_disable(
}
+/**
+ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
+ * current config information.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @curr_config: Non-NULL pointer to return the current configuration of
+ * hardware allocated to the GPU.
+ *
+ * The current configuration information is used for architectures where the
+ * max_config interface is available from the Arbiter. In this case the current
+ * allocated hardware is not always the same, so the current config information
+ * is used to correctly map the current allocated resources to the memory layout
+ * that is copied to the user space.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_gpu_update_curr_config(
+ struct kbase_device *kbdev,
+ struct kbase_hwcnt_curr_config *curr_config)
+{
+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
+ return -EINVAL;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ curr_config->num_l2_slices =
+ kbdev->gpu_props.curr_config.l2_slices;
+ curr_config->shader_present =
+ kbdev->gpu_props.curr_config.shader_present;
+ return 0;
+}
+
/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
struct kbase_hwcnt_backend *backend)
@@ -287,11 +322,18 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+ /* Update the current configuration information. */
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
+ &backend_jm->curr_config);
+ if (errcode)
+ goto error;
+
errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
if (errcode)
goto error;
backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+
backend_jm->enabled = true;
kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
@@ -372,7 +414,7 @@ static int kbasep_hwcnt_backend_jm_dump_request(
size_t clk;
int ret;
- if (!backend_jm || !backend_jm->enabled)
+ if (!backend_jm || !backend_jm->enabled || !dump_time_ns)
return -EINVAL;
kbdev = backend_jm->kctx->kbdev;
@@ -441,6 +483,11 @@ static int kbasep_hwcnt_backend_jm_dump_get(
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
size_t clk;
+#ifdef CONFIG_MALI_NO_MALI
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ int errcode;
+#endif
if (!backend_jm || !dst || !dst_enable_map ||
(backend_jm->info->metadata != dst->metadata) ||
@@ -460,9 +507,24 @@ static int kbasep_hwcnt_backend_jm_dump_get(
dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
}
+#ifdef CONFIG_MALI_NO_MALI
+ kbdev = backend_jm->kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ /* Update the current configuration information. */
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
+ &backend_jm->curr_config);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (errcode)
+ return errcode;
+#endif
+
return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va,
dst_enable_map, backend_jm->pm_core_mask,
- accumulate);
+ &backend_jm->curr_config, accumulate);
}
/**
@@ -684,7 +746,7 @@ static int kbasep_hwcnt_backend_jm_info_create(
WARN_ON(!kbdev);
WARN_ON(!out_info);
- errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info);
if (errcode)
return errcode;
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 91d1f8c..4fba6b6 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -242,6 +242,13 @@ int kbase_hwcnt_jm_metadata_create(
if (!gpu_info || !out_metadata || !out_dump_bytes)
return -EINVAL;
+ /*
+ * For architectures where a max_config interface is available
+ * from the arbiter, the v5 dump bytes and the metadata v5 are
+ * based on the maximum possible allocation of the HW in the
+ * GPU cause it needs to be prepared for the worst case where
+ * all the available L2 cache and Shader cores are allocated.
+ */
dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
errcode = kbasep_hwcnt_backend_gpu_metadata_create(
gpu_info, false, counter_set, &metadata);
@@ -260,8 +267,7 @@ int kbase_hwcnt_jm_metadata_create(
return 0;
}
-void kbase_hwcnt_jm_metadata_destroy(
- const struct kbase_hwcnt_metadata *metadata)
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
{
if (!metadata)
return;
@@ -318,15 +324,41 @@ static bool is_block_type_shader(
return is_shader;
}
+static bool is_block_type_l2_cache(
+ const u64 grp_type,
+ const u64 blk_type)
+{
+ bool is_l2_cache = false;
+
+ switch (grp_type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2)
+ is_l2_cache = true;
+ break;
+ default:
+ /* Warn on unknown group type */
+ WARN_ON(true);
+ }
+
+ return is_l2_cache;
+}
+
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
- u64 pm_core_mask, bool accumulate)
+ u64 pm_core_mask,
+ const struct kbase_hwcnt_curr_config *curr_config,
+ bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
const u32 *dump_src;
size_t src_offset, grp, blk, blk_inst;
u64 core_mask = pm_core_mask;
+ /* Variables to deal with the current configuration */
+ int l2_count = 0;
+ bool hw_res_available = true;
+
if (!dst || !src || !dst_enable_map ||
(dst_enable_map->metadata != dst->metadata))
return -EINVAL;
@@ -348,15 +380,43 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
const bool is_shader_core = is_block_type_shader(
kbase_hwcnt_metadata_group_type(metadata, grp),
blk_type, blk);
+ const bool is_l2_cache = is_block_type_l2_cache(
+ kbase_hwcnt_metadata_group_type(metadata, grp),
+ blk_type);
+
+ /*
+ * If l2 blocks is greater than the current allocated number of
+ * L2 slices, there is no hw allocated to that block.
+ */
+ if (is_l2_cache) {
+ l2_count++;
+ if (l2_count > curr_config->num_l2_slices)
+ hw_res_available = false;
+ else
+ hw_res_available = true;
+ }
+ /*
+ * For the shader cores, the current shader_mask allocated is
+ * always a subgroup of the maximum shader_mask, so after
+ * jumping any L2 cache not available the available shader cores
+ * will always have a matching set of blk instances available to
+ * accumulate them.
+ */
+ else {
+ hw_res_available = true;
+ }
- /* Early out if no values in the dest block are enabled */
+ /*
+ * Early out if no values in the dest block are enabled or if
+ * the resource target of the block is not available in the HW.
+ */
if (kbase_hwcnt_enable_map_block_enabled(
dst_enable_map, grp, blk, blk_inst)) {
u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
const u32 *src_blk = dump_src + src_offset;
- if (!is_shader_core || (core_mask & 1)) {
+ if ((!is_shader_core || (core_mask & 1)) && hw_res_available) {
if (accumulate) {
kbase_hwcnt_dump_buffer_block_accumulate(
dst_blk, src_blk, hdr_cnt,
@@ -372,7 +432,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
}
}
- src_offset += (hdr_cnt + ctr_cnt);
+ /* Just increase the src_offset if the HW is available */
+ if (hw_res_available)
+ src_offset += (hdr_cnt + ctr_cnt);
if (is_shader_core)
core_mask = core_mask >> 1;
}
@@ -380,10 +442,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
return 0;
}
-int kbase_hwcnt_csf_dump_get(
- struct kbase_hwcnt_dump_buffer *dst, void *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- bool accumulate)
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
const u32 *dump_src;
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
index 4ebff2d..9b846a9 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -128,6 +128,50 @@ struct kbase_hwcnt_gpu_info {
};
/**
+ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
+ * GPU.
+ * @num_l2_slices: Current number of L2 slices allocated to the GPU.
+ * @shader_present: Current shader present bitmap that is allocated to the GPU.
+ *
+ * For architectures with the max_config interface available from the Arbiter,
+ * the current resources allocated may change during runtime due to a
+ * re-partitioning (possible with partition manager). Thus, the HWC needs to be
+ * prepared to report any possible set of counters. For this reason the memory
+ * layout in the userspace is based on the maximum possible allocation. On the
+ * other hand, each partition has just the view of its currently allocated
+ * resources. Therefore, it is necessary to correctly map the dumped HWC values
+ * from the registers into this maximum memory layout so that it can be exposed
+ * to the userspace side correctly.
+ *
+ * For L2 cache just the number is enough once the allocated ones will be
+ * accumulated on the first L2 slots available in the destination buffer.
+ *
+ * For the correct mapping of the shader cores it is necessary to jump all the
+ * L2 cache slots in the destination buffer that are not allocated. But, it is
+ * not necessary to add any logic to map the shader cores bitmap into the memory
+ * layout because the shader_present allocated will always be a subset of the
+ * maximum shader_present. It is possible because:
+ * 1 - Partitions are made of slices and they are always ordered from the ones
+ * with more shader cores to the ones with less.
+ * 2 - The shader cores in a slice are always contiguous.
+ * 3 - A partition can only have a contiguous set of slices allocated to it.
+ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
+ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
+ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are
+ * contiguous in any slice.
+ * Supposing that a partition takes the two slices in the middle, the current
+ * config shader_present for this partition would be:
+ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices
+ * are contiguous.
+ * Therefore, by directly copying any subset of the maximum possible
+ * shader_present the mapping is already achieved.
+ */
+struct kbase_hwcnt_curr_config {
+ size_t num_l2_slices;
+ u64 shader_present;
+};
+
+/**
* kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
* JM GPUs.
* @info: Non-NULL pointer to info struct.
@@ -186,6 +230,8 @@ void kbase_hwcnt_csf_metadata_destroy(
* kbase_hwcnt_jm_metadata_create.
* @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
* @pm_core_mask: PM state synchronized shaders core mask with the dump.
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * src raw dump buffer to the dst dump buffer.
* @accumulate: True if counters in src should be accumulated into dst,
* rather than copied.
*
@@ -197,7 +243,9 @@ void kbase_hwcnt_csf_metadata_destroy(
*/
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
- const u64 pm_core_mask, bool accumulate);
+ const u64 pm_core_mask,
+ const struct kbase_hwcnt_curr_config *curr_config,
+ bool accumulate);
/**
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
@@ -217,10 +265,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_csf_dump_get(
- struct kbase_hwcnt_dump_buffer *dst, void *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- bool accumulate);
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate);
/**
* kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c
index bd523dd..e87dbbf 100644
--- a/mali_kbase/mali_kbase_hwcnt_legacy.c
+++ b/mali_kbase/mali_kbase_hwcnt_legacy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,7 @@
#include "mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_hwcnt_types.h"
#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
diff --git a/mali_kbase/mali_kbase_hwcnt_reader.h b/mali_kbase/mali_kbase_hwcnt_reader.h
deleted file mode 100644
index 9f2172b..0000000
--- a/mali_kbase/mali_kbase_hwcnt_reader.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_HWCNT_READER_H_
-#define _KBASE_HWCNT_READER_H_
-
-#include <stddef.h>
-
-/* The ids of ioctl commands. */
-#define KBASE_HWCNT_READER 0xBE
-#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32)
-#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
-#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32)
-#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32)
-#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\
- offsetof(struct kbase_hwcnt_reader_metadata, cycles))
-#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\
- struct kbase_hwcnt_reader_metadata)
-#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\
- offsetof(struct kbase_hwcnt_reader_metadata, cycles))
-#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\
- struct kbase_hwcnt_reader_metadata)
-#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32)
-#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32)
-#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32)
-#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
-#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
- _IOW(KBASE_HWCNT_READER, 0xFF, \
- struct kbase_hwcnt_reader_api_version)
-
-/**
- * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles
- * @top: the number of cycles associated with the main clock for the
- * GPU
- * @shader_cores: the cycles that have elapsed on the GPU shader cores
- */
-struct kbase_hwcnt_reader_metadata_cycles {
- u64 top;
- u64 shader_cores;
-};
-
-/**
- * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
- * @timestamp: time when sample was collected
- * @event_id: id of an event that triggered sample collection
- * @buffer_idx: position in sampling area where sample buffer was stored
- * @cycles: the GPU cycles that occurred since the last sample
- */
-struct kbase_hwcnt_reader_metadata {
- u64 timestamp;
- u32 event_id;
- u32 buffer_idx;
- struct kbase_hwcnt_reader_metadata_cycles cycles;
-};
-
-/**
- * enum base_hwcnt_reader_event - hwcnt dumping events
- * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump
- * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
- * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request
- * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request
- * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events
- */
-enum base_hwcnt_reader_event {
- BASE_HWCNT_READER_EVENT_MANUAL,
- BASE_HWCNT_READER_EVENT_PERIODIC,
- BASE_HWCNT_READER_EVENT_PREJOB,
- BASE_HWCNT_READER_EVENT_POSTJOB,
-
- BASE_HWCNT_READER_EVENT_COUNT
-};
-
-#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
-#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
-#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
-/**
- * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
- * @version: API version
- * @features: available features in this API version
- */
-struct kbase_hwcnt_reader_api_version {
- u32 version;
- u32 features;
-};
-
-#endif /* _KBASE_HWCNT_READER_H_ */
-
diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h
deleted file mode 100644
index 36dfc34..0000000
--- a/mali_kbase/mali_kbase_ioctl.h
+++ /dev/null
@@ -1,841 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_IOCTL_H_
-#define _KBASE_IOCTL_H_
-
-#ifdef __cpluscplus
-extern "C" {
-#endif
-
-#include <asm-generic/ioctl.h>
-#include <linux/types.h>
-
-#if MALI_USE_CSF
-#include "csf/mali_kbase_csf_ioctl.h"
-#else
-#include "jm/mali_kbase_jm_ioctl.h"
-#endif /* MALI_USE_CSF */
-
-#define KBASE_IOCTL_TYPE 0x80
-
-/**
- * struct kbase_ioctl_set_flags - Set kernel context creation flags
- *
- * @create_flags: Flags - see base_context_create_flags
- */
-struct kbase_ioctl_set_flags {
- __u32 create_flags;
-};
-
-#define KBASE_IOCTL_SET_FLAGS \
- _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
-
-/**
- * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
- *
- * @buffer: Pointer to the buffer to store properties into
- * @size: Size of the buffer
- * @flags: Flags - must be zero for now
- *
- * The ioctl will return the number of bytes stored into @buffer or an error
- * on failure (e.g. @size is too small). If @size is specified as 0 then no
- * data will be written but the return value will be the number of bytes needed
- * for all the properties.
- *
- * @flags may be used in the future to request a different format for the
- * buffer. With @flags == 0 the following format is used.
- *
- * The buffer will be filled with pairs of values, a u32 key identifying the
- * property followed by the value. The size of the value is identified using
- * the bottom bits of the key. The value then immediately followed the key and
- * is tightly packed (there is no padding). All keys and values are
- * little-endian.
- *
- * 00 = u8
- * 01 = u16
- * 10 = u32
- * 11 = u64
- */
-struct kbase_ioctl_get_gpuprops {
- __u64 buffer;
- __u32 size;
- __u32 flags;
-};
-
-#define KBASE_IOCTL_GET_GPUPROPS \
- _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
-
-/**
- * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
- * @in: Input parameters
- * @in.va_pages: The number of pages of virtual address space to reserve
- * @in.commit_pages: The number of physical pages to allocate
- * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
- * @in.flags: Flags
- * @out: Output parameters
- * @out.flags: Flags
- * @out.gpu_va: The GPU virtual address which is allocated
- */
-union kbase_ioctl_mem_alloc {
- struct {
- __u64 va_pages;
- __u64 commit_pages;
- __u64 extension;
- __u64 flags;
- } in;
- struct {
- __u64 flags;
- __u64 gpu_va;
- } out;
-};
-
-#define KBASE_IOCTL_MEM_ALLOC \
- _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
-
-/**
- * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
- * @in: Input parameters
- * @in.gpu_addr: A GPU address contained within the region
- * @in.query: The type of query
- * @out: Output parameters
- * @out.value: The result of the query
- *
- * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
- */
-union kbase_ioctl_mem_query {
- struct {
- __u64 gpu_addr;
- __u64 query;
- } in;
- struct {
- __u64 value;
- } out;
-};
-
-#define KBASE_IOCTL_MEM_QUERY \
- _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
-
-#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1)
-#define KBASE_MEM_QUERY_VA_SIZE ((u64)2)
-#define KBASE_MEM_QUERY_FLAGS ((u64)3)
-
-/**
- * struct kbase_ioctl_mem_free - Free a memory region
- * @gpu_addr: Handle to the region to free
- */
-struct kbase_ioctl_mem_free {
- __u64 gpu_addr;
-};
-
-#define KBASE_IOCTL_MEM_FREE \
- _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
-
-/**
- * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
- * @buffer_count: requested number of dumping buffers
- * @fe_bm: counters selection bitmask (Front end)
- * @shader_bm: counters selection bitmask (Shader)
- * @tiler_bm: counters selection bitmask (Tiler)
- * @mmu_l2_bm: counters selection bitmask (MMU_L2)
- *
- * A fd is returned from the ioctl if successful, or a negative value on error
- */
-struct kbase_ioctl_hwcnt_reader_setup {
- __u32 buffer_count;
- __u32 fe_bm;
- __u32 shader_bm;
- __u32 tiler_bm;
- __u32 mmu_l2_bm;
-};
-
-#define KBASE_IOCTL_HWCNT_READER_SETUP \
- _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
-
-/**
- * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
- * @dump_buffer: GPU address to write counters to
- * @fe_bm: counters selection bitmask (Front end)
- * @shader_bm: counters selection bitmask (Shader)
- * @tiler_bm: counters selection bitmask (Tiler)
- * @mmu_l2_bm: counters selection bitmask (MMU_L2)
- */
-struct kbase_ioctl_hwcnt_enable {
- __u64 dump_buffer;
- __u32 fe_bm;
- __u32 shader_bm;
- __u32 tiler_bm;
- __u32 mmu_l2_bm;
-};
-
-#define KBASE_IOCTL_HWCNT_ENABLE \
- _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
-
-#define KBASE_IOCTL_HWCNT_DUMP \
- _IO(KBASE_IOCTL_TYPE, 10)
-
-#define KBASE_IOCTL_HWCNT_CLEAR \
- _IO(KBASE_IOCTL_TYPE, 11)
-
-/**
- * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
- * @data: Counter samples for the dummy model.
- * @size: Size of the counter sample data.
- * @padding: Padding.
- */
-struct kbase_ioctl_hwcnt_values {
- __u64 data;
- __u32 size;
- __u32 padding;
-};
-
-#define KBASE_IOCTL_HWCNT_SET \
- _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
-
-/**
- * struct kbase_ioctl_disjoint_query - Query the disjoint counter
- * @counter: A counter of disjoint events in the kernel
- */
-struct kbase_ioctl_disjoint_query {
- __u32 counter;
-};
-
-#define KBASE_IOCTL_DISJOINT_QUERY \
- _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
-
-/**
- * struct kbase_ioctl_get_ddk_version - Query the kernel version
- * @version_buffer: Buffer to receive the kernel version string
- * @size: Size of the buffer
- * @padding: Padding
- *
- * The ioctl will return the number of bytes written into version_buffer
- * (which includes a NULL byte) or a negative error code
- *
- * The ioctl request code has to be _IOW because the data in ioctl struct is
- * being copied to the kernel, even though the kernel then writes out the
- * version info to the buffer specified in the ioctl.
- */
-struct kbase_ioctl_get_ddk_version {
- __u64 version_buffer;
- __u32 size;
- __u32 padding;
-};
-
-#define KBASE_IOCTL_GET_DDK_VERSION \
- _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
-
-/**
- * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory
- * allocator (between kernel driver
- * version 10.2--11.4)
- * @va_pages: Number of VA pages to reserve for JIT
- *
- * Note that depending on the VA size of the application and GPU, the value
- * specified in @va_pages may be ignored.
- *
- * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
- * backwards compatibility.
- */
-struct kbase_ioctl_mem_jit_init_10_2 {
- __u64 va_pages;
-};
-
-#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \
- _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2)
-
-/**
- * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory
- * allocator (between kernel driver
- * version 11.5--11.19)
- * @va_pages: Number of VA pages to reserve for JIT
- * @max_allocations: Maximum number of concurrent allocations
- * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
- * @group_id: Group ID to be used for physical allocations
- * @padding: Currently unused, must be zero
- *
- * Note that depending on the VA size of the application and GPU, the value
- * specified in @va_pages may be ignored.
- *
- * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
- * backwards compatibility.
- */
-struct kbase_ioctl_mem_jit_init_11_5 {
- __u64 va_pages;
- __u8 max_allocations;
- __u8 trim_level;
- __u8 group_id;
- __u8 padding[5];
-};
-
-#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \
- _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5)
-
-/**
- * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
- * allocator
- * @va_pages: Number of GPU virtual address pages to reserve for just-in-time
- * memory allocations
- * @max_allocations: Maximum number of concurrent allocations
- * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
- * @group_id: Group ID to be used for physical allocations
- * @padding: Currently unused, must be zero
- * @phys_pages: Maximum number of physical pages to allocate just-in-time
- *
- * Note that depending on the VA size of the application and GPU, the value
- * specified in @va_pages may be ignored.
- */
-struct kbase_ioctl_mem_jit_init {
- __u64 va_pages;
- __u8 max_allocations;
- __u8 trim_level;
- __u8 group_id;
- __u8 padding[5];
- __u64 phys_pages;
-};
-
-#define KBASE_IOCTL_MEM_JIT_INIT \
- _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
-
-/**
- * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
- *
- * @handle: GPU memory handle (GPU VA)
- * @user_addr: The address where it is mapped in user space
- * @size: The number of bytes to synchronise
- * @type: The direction to synchronise: 0 is sync to memory (clean),
- * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
- * @padding: Padding to round up to a multiple of 8 bytes, must be zero
- */
-struct kbase_ioctl_mem_sync {
- __u64 handle;
- __u64 user_addr;
- __u64 size;
- __u8 type;
- __u8 padding[7];
-};
-
-#define KBASE_IOCTL_MEM_SYNC \
- _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
-
-/**
- * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
- *
- * @in: Input parameters
- * @in.gpu_addr: The GPU address of the memory region
- * @in.cpu_addr: The CPU address to locate
- * @in.size: A size in bytes to validate is contained within the region
- * @out: Output parameters
- * @out.offset: The offset from the start of the memory region to @cpu_addr
- */
-union kbase_ioctl_mem_find_cpu_offset {
- struct {
- __u64 gpu_addr;
- __u64 cpu_addr;
- __u64 size;
- } in;
- struct {
- __u64 offset;
- } out;
-};
-
-#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
- _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
-
-/**
- * struct kbase_ioctl_get_context_id - Get the kernel context ID
- *
- * @id: The kernel context ID
- */
-struct kbase_ioctl_get_context_id {
- __u32 id;
-};
-
-#define KBASE_IOCTL_GET_CONTEXT_ID \
- _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
-
-/**
- * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
- *
- * @flags: Flags
- *
- * The ioctl returns a file descriptor when successful
- */
-struct kbase_ioctl_tlstream_acquire {
- __u32 flags;
-};
-
-#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
- _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
-
-#define KBASE_IOCTL_TLSTREAM_FLUSH \
- _IO(KBASE_IOCTL_TYPE, 19)
-
-/**
- * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
- *
- * @gpu_addr: The memory region to modify
- * @pages: The number of physical pages that should be present
- *
- * The ioctl may return on the following error codes or 0 for success:
- * -ENOMEM: Out of memory
- * -EINVAL: Invalid arguments
- */
-struct kbase_ioctl_mem_commit {
- __u64 gpu_addr;
- __u64 pages;
-};
-
-#define KBASE_IOCTL_MEM_COMMIT \
- _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
-
-/**
- * union kbase_ioctl_mem_alias - Create an alias of memory regions
- * @in: Input parameters
- * @in.flags: Flags, see BASE_MEM_xxx
- * @in.stride: Bytes between start of each memory region
- * @in.nents: The number of regions to pack together into the alias
- * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info
- * @out: Output parameters
- * @out.flags: Flags, see BASE_MEM_xxx
- * @out.gpu_va: Address of the new alias
- * @out.va_pages: Size of the new alias
- */
-union kbase_ioctl_mem_alias {
- struct {
- __u64 flags;
- __u64 stride;
- __u64 nents;
- __u64 aliasing_info;
- } in;
- struct {
- __u64 flags;
- __u64 gpu_va;
- __u64 va_pages;
- } out;
-};
-
-#define KBASE_IOCTL_MEM_ALIAS \
- _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
-
-/**
- * union kbase_ioctl_mem_import - Import memory for use by the GPU
- * @in: Input parameters
- * @in.flags: Flags, see BASE_MEM_xxx
- * @in.phandle: Handle to the external memory
- * @in.type: Type of external memory, see base_mem_import_type
- * @in.padding: Amount of extra VA pages to append to the imported buffer
- * @out: Output parameters
- * @out.flags: Flags, see BASE_MEM_xxx
- * @out.gpu_va: Address of the new alias
- * @out.va_pages: Size of the new alias
- */
-union kbase_ioctl_mem_import {
- struct {
- __u64 flags;
- __u64 phandle;
- __u32 type;
- __u32 padding;
- } in;
- struct {
- __u64 flags;
- __u64 gpu_va;
- __u64 va_pages;
- } out;
-};
-
-#define KBASE_IOCTL_MEM_IMPORT \
- _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
-
-/**
- * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
- * @gpu_va: The GPU region to modify
- * @flags: The new flags to set
- * @mask: Mask of the flags to modify
- */
-struct kbase_ioctl_mem_flags_change {
- __u64 gpu_va;
- __u64 flags;
- __u64 mask;
-};
-
-#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
- _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
-
-/**
- * struct kbase_ioctl_stream_create - Create a synchronisation stream
- * @name: A name to identify this stream. Must be NULL-terminated.
- *
- * Note that this is also called a "timeline", but is named stream to avoid
- * confusion with other uses of the word.
- *
- * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
- *
- * The ioctl returns a file descriptor.
- */
-struct kbase_ioctl_stream_create {
- char name[32];
-};
-
-#define KBASE_IOCTL_STREAM_CREATE \
- _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
-
-/**
- * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
- * @fd: The file descriptor to validate
- */
-struct kbase_ioctl_fence_validate {
- int fd;
-};
-
-#define KBASE_IOCTL_FENCE_VALIDATE \
- _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
-
-/**
- * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
- * @buffer: Pointer to the information
- * @len: Length
- * @padding: Padding
- *
- * The data provided is accessible through a debugfs file
- */
-struct kbase_ioctl_mem_profile_add {
- __u64 buffer;
- __u32 len;
- __u32 padding;
-};
-
-#define KBASE_IOCTL_MEM_PROFILE_ADD \
- _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
-
-/**
- * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
- * @count: Number of resources
- * @address: Array of u64 GPU addresses of the external resources to map
- */
-struct kbase_ioctl_sticky_resource_map {
- __u64 count;
- __u64 address;
-};
-
-#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
- _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
-
-/**
- * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
- * previously permanently mapped
- * @count: Number of resources
- * @address: Array of u64 GPU addresses of the external resources to unmap
- */
-struct kbase_ioctl_sticky_resource_unmap {
- __u64 count;
- __u64 address;
-};
-
-#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
- _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
-
-/**
- * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
- * the GPU memory region for
- * the given gpu address and
- * the offset of that address
- * into the region
- * @in: Input parameters
- * @in.gpu_addr: GPU virtual address
- * @in.size: Size in bytes within the region
- * @out: Output parameters
- * @out.start: Address of the beginning of the memory region enclosing @gpu_addr
- * for the length of @offset bytes
- * @out.offset: The offset from the start of the memory region to @gpu_addr
- */
-union kbase_ioctl_mem_find_gpu_start_and_offset {
- struct {
- __u64 gpu_addr;
- __u64 size;
- } in;
- struct {
- __u64 start;
- __u64 offset;
- } out;
-};
-
-#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
- _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
-
-
-#define KBASE_IOCTL_CINSTR_GWT_START \
- _IO(KBASE_IOCTL_TYPE, 33)
-
-#define KBASE_IOCTL_CINSTR_GWT_STOP \
- _IO(KBASE_IOCTL_TYPE, 34)
-
-/**
- * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
- * @in: Input parameters
- * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas.
- * @in.size_buffer: Address of buffer to hold size of modified areas (in pages)
- * @in.len: Number of addresses the buffers can hold.
- * @in.padding: padding
- * @out: Output parameters
- * @out.no_of_addr_collected: Number of addresses collected into addr_buffer.
- * @out.more_data_available: Status indicating if more addresses are available.
- * @out.padding: padding
- *
- * This structure is used when performing a call to dump GPU write fault
- * addresses.
- */
-union kbase_ioctl_cinstr_gwt_dump {
- struct {
- __u64 addr_buffer;
- __u64 size_buffer;
- __u32 len;
- __u32 padding;
-
- } in;
- struct {
- __u32 no_of_addr_collected;
- __u8 more_data_available;
- __u8 padding[27];
- } out;
-};
-
-#define KBASE_IOCTL_CINSTR_GWT_DUMP \
- _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
-
-/**
- * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
- *
- * @va_pages: Number of VA pages to reserve for EXEC_VA
- */
-struct kbase_ioctl_mem_exec_init {
- __u64 va_pages;
-};
-
-#define KBASE_IOCTL_MEM_EXEC_INIT \
- _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
-
-/**
- * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
- * cpu/gpu time (counter values)
- * @in: Input parameters
- * @in.request_flags: Bit-flags indicating the requested types.
- * @in.paddings: Unused, size alignment matching the out.
- * @out: Output parameters
- * @out.sec: Integer field of the monotonic time, unit in seconds.
- * @out.nsec: Fractional sec of the monotonic time, in nano-seconds.
- * @out.padding: Unused, for u64 alignment
- * @out.timestamp: System wide timestamp (counter) value.
- * @out.cycle_counter: GPU cycle counter value.
- */
-union kbase_ioctl_get_cpu_gpu_timeinfo {
- struct {
- __u32 request_flags;
- __u32 paddings[7];
- } in;
- struct {
- __u64 sec;
- __u32 nsec;
- __u32 padding;
- __u64 timestamp;
- __u64 cycle_counter;
- } out;
-};
-
-#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
- _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
-
-/**
- * struct kbase_ioctl_context_priority_check - Check the max possible priority
- * @priority: Input priority & output priority
- */
-
-struct kbase_ioctl_context_priority_check {
- __u8 priority;
-};
-
-#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
- _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
-
-/***************
- * test ioctls *
- ***************/
-#if MALI_UNIT_TEST
-/* These ioctls are purely for test purposes and are not used in the production
- * driver, they therefore may change without notice
- */
-
-#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
-
-/**
- * struct kbase_ioctl_tlstream_test - Start a timeline stream test
- *
- * @tpw_count: number of trace point writers in each context
- * @msg_delay: time delay between tracepoints from one writer in milliseconds
- * @msg_count: number of trace points written by one writer
- * @aux_msg: if non-zero aux messages will be included
- */
-struct kbase_ioctl_tlstream_test {
- __u32 tpw_count;
- __u32 msg_delay;
- __u32 msg_count;
- __u32 aux_msg;
-};
-
-#define KBASE_IOCTL_TLSTREAM_TEST \
- _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
-
-/**
- * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
- * @bytes_collected: number of bytes read by user
- * @bytes_generated: number of bytes generated by tracepoints
- */
-struct kbase_ioctl_tlstream_stats {
- __u32 bytes_collected;
- __u32 bytes_generated;
-};
-
-#define KBASE_IOCTL_TLSTREAM_STATS \
- _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
-
-#endif /* MALI_UNIT_TEST */
-
-/* Customer extension range */
-#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
-
-/* If the integration needs extra ioctl add them there
- * like this:
- *
- * struct my_ioctl_args {
- * ....
- * }
- *
- * #define KBASE_IOCTL_MY_IOCTL \
- * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
- */
-
-
-/**********************************
- * Definitions for GPU properties *
- **********************************/
-#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
-#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
-#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
-#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
-
-#define KBASE_GPUPROP_PRODUCT_ID 1
-#define KBASE_GPUPROP_VERSION_STATUS 2
-#define KBASE_GPUPROP_MINOR_REVISION 3
-#define KBASE_GPUPROP_MAJOR_REVISION 4
-/* 5 previously used for GPU speed */
-#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6
-/* 7 previously used for minimum GPU speed */
-#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8
-#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9
-#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10
-#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11
-#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12
-
-#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13
-#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14
-#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
-
-#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16
-#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17
-
-#define KBASE_GPUPROP_MAX_THREADS 18
-#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19
-#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20
-#define KBASE_GPUPROP_MAX_REGISTERS 21
-#define KBASE_GPUPROP_MAX_TASK_QUEUE 22
-#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23
-#define KBASE_GPUPROP_IMPL_TECH 24
-
-#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25
-#define KBASE_GPUPROP_RAW_TILER_PRESENT 26
-#define KBASE_GPUPROP_RAW_L2_PRESENT 27
-#define KBASE_GPUPROP_RAW_STACK_PRESENT 28
-#define KBASE_GPUPROP_RAW_L2_FEATURES 29
-#define KBASE_GPUPROP_RAW_CORE_FEATURES 30
-#define KBASE_GPUPROP_RAW_MEM_FEATURES 31
-#define KBASE_GPUPROP_RAW_MMU_FEATURES 32
-#define KBASE_GPUPROP_RAW_AS_PRESENT 33
-#define KBASE_GPUPROP_RAW_JS_PRESENT 34
-#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35
-#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36
-#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37
-#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38
-#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39
-#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40
-#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41
-#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42
-#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43
-#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44
-#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45
-#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46
-#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47
-#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48
-#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49
-#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50
-#define KBASE_GPUPROP_RAW_TILER_FEATURES 51
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54
-#define KBASE_GPUPROP_RAW_GPU_ID 55
-#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56
-#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57
-#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58
-#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59
-#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60
-
-#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
-#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
-#define KBASE_GPUPROP_COHERENCY_COHERENCY 63
-#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
-#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
-#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
-#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
-#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
-#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
-#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
-#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
-#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
-#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
-#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
-#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
-#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
-#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
-#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
-#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
-
-#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81
-
-#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82
-
-#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83
-#define KBASE_GPUPROP_TLS_ALLOC 84
-#define KBASE_GPUPROP_RAW_GPU_FEATURES 85
-#ifdef __cpluscplus
-}
-#endif
-
-#endif
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 1cf24a2..949c041 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -74,7 +74,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
{
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
kbase_kinstr_jm_atom_complete(katom);
- dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n",
+ dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n",
(void *)katom);
}
@@ -89,7 +89,7 @@ static bool jd_run_atom(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
- dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n",
+ dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n",
(void *)katom, (void *)kctx);
KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
@@ -99,23 +99,23 @@ static bool jd_run_atom(struct kbase_jd_atom *katom)
trace_sysgraph(SGR_SUBMIT, kctx->id,
kbase_jd_atom_id(katom->kctx, katom));
jd_mark_atom_complete(katom);
- return 0;
+ return false;
} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
/* Soft-job */
if (katom->will_fail_event_code) {
kbase_finish_soft_job(katom);
jd_mark_atom_complete(katom);
- return 0;
+ return false;
}
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
jd_mark_atom_complete(katom);
}
- return 0;
+ return false;
}
katom->status = KBASE_JD_ATOM_STATE_IN_JS;
- dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom);
+ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom);
/* Queue an action about whether we should try scheduling a context */
return kbasep_js_add_job(kctx, katom);
}
@@ -758,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
list_del(runnable_jobs.next);
node->in_jd_list = false;
- dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n",
+ dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n",
node, node->status);
KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
@@ -901,7 +901,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
unsigned long flags;
enum kbase_jd_atom_state status;
- dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom);
+ dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom);
/* Update the TOTAL number of jobs. This includes those not tracked by
* the scheduler: 'not ready to run' and 'dependency-only' jobs.
@@ -976,7 +976,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
dev_dbg(kbdev->dev,
- "Atom %p status to completed\n",
+ "Atom %pK status to completed\n",
(void *)katom);
/* Wrong dependency setup. Atom will be sent
@@ -1019,7 +1019,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
/* Atom has completed, propagate the error code if any */
katom->event_code = dep_atom->event_code;
katom->status = KBASE_JD_ATOM_STATE_QUEUED;
- dev_dbg(kbdev->dev, "Atom %p status to queued\n",
+ dev_dbg(kbdev->dev, "Atom %pK status to queued\n",
(void *)katom);
/* This atom will be sent back to user space.
@@ -1062,7 +1062,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
*/
katom->event_code = BASE_JD_EVENT_DONE;
katom->status = KBASE_JD_ATOM_STATE_QUEUED;
- dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom);
+ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom);
/* For invalid priority, be most lenient and choose the default */
sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
@@ -1199,7 +1199,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
bool need_to_try_schedule_context;
katom->status = KBASE_JD_ATOM_STATE_IN_JS;
- dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n",
+ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n",
(void *)katom);
need_to_try_schedule_context = kbasep_js_add_job(kctx, katom);
@@ -1270,7 +1270,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
if (unlikely(jd_atom_is_v2)) {
if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) {
- dev_err(kbdev->dev,
+ dev_dbg(kbdev->dev,
"Invalid atom address %p passed to job_submit\n",
user_addr);
err = -EFAULT;
@@ -1281,7 +1281,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
user_atom.seq_nr = 0;
} else {
if (copy_from_user(&user_atom, user_addr, stride) != 0) {
- dev_err(kbdev->dev,
+ dev_dbg(kbdev->dev,
"Invalid atom address %p passed to job_submit\n",
user_addr);
err = -EFAULT;
@@ -1420,7 +1420,7 @@ void kbase_jd_done_worker(struct work_struct *data)
js_kctx_info = &kctx->jctx.sched_info;
js_devdata = &kbdev->js_data;
- dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n",
+ dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n",
(void *)katom, (void *)kctx);
KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
@@ -1444,7 +1444,7 @@ void kbase_jd_done_worker(struct work_struct *data)
if (katom->event_code == BASE_JD_EVENT_STOPPED) {
unsigned long flags;
- dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n",
+ dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n",
(void *)katom);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
@@ -1452,7 +1452,7 @@ void kbase_jd_done_worker(struct work_struct *data)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
katom->status = KBASE_JD_ATOM_STATE_IN_JS;
- dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n",
+ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n",
(void *)katom);
kbase_js_unpull(kctx, katom);
@@ -1568,7 +1568,7 @@ void kbase_jd_done_worker(struct work_struct *data)
KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
- dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n",
+ dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n",
(void *)katom, (void *)kctx);
}
@@ -1698,7 +1698,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
kctx = katom->kctx;
KBASE_DEBUG_ASSERT(kctx != NULL);
- dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom);
+ dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom);
KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
/* This should only be done from a context that is not scheduled */
diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c
index 940b920..f423758 100644
--- a/mali_kbase/mali_kbase_jd_debugfs.c
+++ b/mali_kbase/mali_kbase_jd_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
#include <mali_kbase_sync.h>
#endif
-#include <mali_kbase_ioctl.h>
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
struct kbase_jd_debugfs_depinfo {
u8 id;
@@ -46,13 +46,13 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
res = kbase_sync_fence_out_info_get(atom, &info);
if (res == 0)
- seq_printf(sfile, "Sa([%p]%d) ",
+ seq_printf(sfile, "Sa([%pK]%d) ",
info.fence, info.status);
break;
case BASE_JD_REQ_SOFT_FENCE_WAIT:
res = kbase_sync_fence_in_info_get(atom, &info);
if (res == 0)
- seq_printf(sfile, "Wa([%p]%d) ",
+ seq_printf(sfile, "Wa([%pK]%d) ",
info.fence, info.status);
break;
default:
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index be14b45..73e9905 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -45,7 +45,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
kctx = kbdev->hwaccess.active_kctx[js];
dev_dbg(kbdev->dev,
- "Trying to run the next %d jobs in kctx %p (s:%d)\n",
+ "Trying to run the next %d jobs in kctx %pK (s:%d)\n",
nr_jobs_to_submit, (void *)kctx, js);
if (!kctx)
@@ -117,7 +117,7 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == kctx) {
- dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
(void *)kctx, js);
kbdev->hwaccess.active_kctx[js] = NULL;
}
@@ -129,7 +129,7 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
{
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n",
+ dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n",
(void *)katom, katom->event_code);
if (katom->event_code != BASE_JD_EVENT_STOPPED &&
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index ea317b2..6bb57e6 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -162,7 +162,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree);
dev_dbg(kctx->kbdev->dev,
- "Slot %d (prio %d) is %spullable in kctx %p\n",
+ "Slot %d (prio %d) is %spullable in kctx %pK\n",
js, prio, none_to_pull ? "not " : "", kctx);
return none_to_pull;
@@ -186,7 +186,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME;
+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
return false;
@@ -236,7 +236,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
WARN_ON(!(entry->core_req &
BASE_JD_REQ_END_RENDERPASS));
dev_dbg(kctx->kbdev->dev,
- "Del runnable atom %p from X_DEP list\n",
+ "Del runnable atom %pK from X_DEP list\n",
(void *)entry);
list_del(&entry->queue);
@@ -252,7 +252,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
WARN_ON(!(entry->atom_flags &
KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
dev_dbg(kctx->kbdev->dev,
- "Del blocked atom %p from X_DEP list\n",
+ "Del blocked atom %pK from X_DEP list\n",
(void *)entry);
list_del(queue->x_dep_head.next);
@@ -279,7 +279,7 @@ jsctx_queue_foreach(struct kbase_context *kctx, int js,
{
int prio;
- for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME;
+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
jsctx_queue_foreach_prio(kctx, js, prio, callback);
}
@@ -303,7 +303,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
dev_dbg(kctx->kbdev->dev,
- "Peeking runnable tree of kctx %p for prio %d (s:%d)\n",
+ "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n",
(void *)kctx, prio, js);
node = rb_first(&rb->runnable_tree);
@@ -335,7 +335,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME;
+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
struct kbase_jd_atom *katom;
@@ -365,7 +365,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n",
+ dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n",
(void *)katom, (void *)kctx);
/* Atoms must be pulled in the correct order. */
@@ -387,7 +387,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n",
(void *)katom, (void *)kctx, js);
while (*new) {
@@ -542,7 +542,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
sema_init(&jsdd->schedule_sem, 1);
for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
- for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+ for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
}
@@ -610,7 +610,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
- for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
@@ -684,7 +684,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n",
(void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
@@ -726,7 +726,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n",
(void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
@@ -802,7 +802,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n",
(void *)kctx, js);
list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
@@ -885,7 +885,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
lockdep_assert_held(&kbdev->hwaccess_lock);
- for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
continue;
@@ -895,7 +895,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
dev_dbg(kbdev->dev,
- "Popped %p from the pullable queue (s:%d)\n",
+ "Popped %pK from the pullable queue (s:%d)\n",
(void *)kctx, js);
return kctx;
}
@@ -949,25 +949,25 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
if (is_scheduled) {
if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) {
- dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n",
+ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n",
(void *)kctx);
return false;
}
}
katom = jsctx_rb_peek(kctx, js);
if (!katom) {
- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
(void *)kctx, js);
return false; /* No pullable atoms */
}
if (kctx->blocked_js[js][katom->sched_priority]) {
dev_dbg(kbdev->dev,
- "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n",
(void *)kctx, katom->sched_priority, js);
return false;
}
if (atomic_read(&katom->blocked)) {
- dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n",
+ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n",
(void *)katom);
return false; /* next atom blocked */
}
@@ -976,20 +976,20 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
katom->x_pre_dep->will_fail_event_code) {
dev_dbg(kbdev->dev,
- "JS: X pre-dep %p is not present in slot FIFO or will fail\n",
+ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n",
(void *)katom->x_pre_dep);
return false;
}
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) {
dev_dbg(kbdev->dev,
- "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
(void *)katom, js);
return false;
}
}
- dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n",
(void *)katom, (void *)kctx, js);
return true;
@@ -1013,7 +1013,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
int dep_prio = dep_atom->sched_priority;
dev_dbg(kbdev->dev,
- "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n",
+ "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n",
i, (void *)katom, js, (void *)dep_atom, dep_js);
/* Dependent atom must already have been submitted */
@@ -1115,7 +1115,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
int dep_js = kbase_js_get_slot(kbdev, dep_atom);
dev_dbg(kbdev->dev,
- "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n",
+ "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n",
i, (void *)katom, js, (void *)dep_atom,
dep_js);
@@ -1130,7 +1130,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
katom->atom_flags |=
KBASE_KATOM_FLAG_X_DEP_BLOCKED;
- dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n",
+ dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n",
(void *)katom);
katom->x_pre_dep = dep_atom;
@@ -1154,7 +1154,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
}
} else {
dev_dbg(kbdev->dev,
- "Deps of atom %p (s:%d) could not be represented\n",
+ "Deps of atom %pK (s:%d) could not be represented\n",
(void *)katom, js);
}
@@ -1195,7 +1195,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx)
/* Determine the new priority for context, as per the priority
* of currently in-use atoms.
*/
- for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME;
+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
if (kctx->atoms_count[prio]) {
new_priority = prio;
@@ -1237,7 +1237,7 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom)
if (rp->state != KBASE_JD_RP_COMPLETE)
return -EINVAL;
- dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n",
+ dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n",
(void *)start_katom, start_katom->renderpass_id);
/* The following members are read when updating the job slot
@@ -1280,7 +1280,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom)
rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
- dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n",
+ dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n",
(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
if (rp->state == KBASE_JD_RP_COMPLETE)
@@ -1347,7 +1347,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
/* Refcount ctx.nr_jobs */
KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
++(js_kctx_info->ctx.nr_jobs);
- dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n",
+ dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n",
(void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
/* Lock for state available during IRQ */
@@ -1360,14 +1360,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
/* Dependencies could not be represented */
--(js_kctx_info->ctx.nr_jobs);
dev_dbg(kbdev->dev,
- "Remove atom %p from kctx %p; now %d in ctx\n",
+ "Remove atom %pK from kctx %pK; now %d in ctx\n",
(void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
/* Setting atom status back to queued as it still has unresolved
* dependencies
*/
atom->status = KBASE_JD_ATOM_STATE_QUEUED;
- dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom);
+ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom);
/* Undo the count, as the atom will get added again later but
* leave the context priority adjusted or boosted, in case if
@@ -1430,7 +1430,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
* context on the Queue
*/
KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
- dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+ dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx);
/* Queue was updated - caller must try to schedule the
* head context
@@ -1439,7 +1439,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
}
}
out_unlock:
- dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n",
+ dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n",
kctx, enqueue_required ? "" : "not ");
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
@@ -1468,7 +1468,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev,
KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
--(js_kctx_info->ctx.nr_jobs);
dev_dbg(kbdev->dev,
- "Remove atom %p from kctx %p; now %d in ctx\n",
+ "Remove atom %pK from kctx %pK; now %d in ctx\n",
(void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1660,7 +1660,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
/* Last reference, and we've been told to remove this context
* from the Run Pool
*/
- dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d",
kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
kbasep_js_is_submit_allowed(js_devdata, kctx));
@@ -1670,7 +1670,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
for (slot = 0; slot < num_slots; slot++) {
if (kbdev->hwaccess.active_kctx[slot] == kctx) {
- dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
(void *)kctx, slot);
kbdev->hwaccess.active_kctx[slot] = NULL;
}
@@ -1773,7 +1773,7 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
* happens asynchronously
*/
dev_dbg(kbdev->dev,
- "JS: ** Killing Context %p on RunPool Remove **", kctx);
+ "JS: ** Killing Context %pK on RunPool Remove **", kctx);
kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
}
}
@@ -1879,7 +1879,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
bool kctx_suspended = false;
int as_nr;
- dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js);
+ dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js);
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -2025,7 +2025,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
dev_dbg(kbdev->dev,
- "kctx %p already has ASID - mark as active (s:%d)\n",
+ "kctx %pK already has ASID - mark as active (s:%d)\n",
(void *)kctx, js);
if (kbdev->hwaccess.active_kctx[js] != kctx) {
@@ -2200,7 +2200,7 @@ void kbasep_js_resume(struct kbase_device *kbdev)
mutex_lock(&js_devdata->queue_mutex);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
- for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME;
+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
struct kbase_context *kctx, *n;
unsigned long flags;
@@ -2336,7 +2336,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
int js = katom->slot_nr;
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
- dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n",
+ dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n",
(void *)katom, js);
list_add_tail(&katom->queue, &queue->x_dep_head);
@@ -2346,7 +2346,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
add_required = false;
}
} else {
- dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n",
+ dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n",
(void *)katom);
}
@@ -2360,7 +2360,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
}
dev_dbg(kctx->kbdev->dev,
- "Enqueue of kctx %p is %srequired to submit atom %p\n",
+ "Enqueue of kctx %pK is %srequired to submit atom %pK\n",
kctx, enqueue_required ? "" : "not ", katom);
return enqueue_required;
@@ -2387,7 +2387,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
if (!kbase_js_atom_blocked_on_x_dep(katom)) {
dev_dbg(kctx->kbdev->dev,
- "Del atom %p from X_DEP list in js_move_to_tree\n",
+ "Del atom %pK from X_DEP list in js_move_to_tree\n",
(void *)katom);
list_del(&katom->queue);
@@ -2405,7 +2405,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
}
} else {
dev_dbg(kctx->kbdev->dev,
- "Atom %p blocked on x-dep in js_move_to_tree\n",
+ "Atom %pK blocked on x-dep in js_move_to_tree\n",
(void *)katom);
break;
}
@@ -2449,7 +2449,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
/* Remove dependency.*/
x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
- dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n",
+ dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n",
(void *)x_dep);
/* Fail if it had a data dependency. */
@@ -2471,14 +2471,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
KBASE_DEBUG_ASSERT(kctx);
kbdev = kctx->kbdev;
- dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n",
(void *)kctx, js);
js_devdata = &kbdev->js_data;
lockdep_assert_held(&kbdev->hwaccess_lock);
if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) {
- dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n",
+ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n",
(void *)kctx);
return NULL;
}
@@ -2491,18 +2491,18 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom = jsctx_rb_peek(kctx, js);
if (!katom) {
- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
(void *)kctx, js);
return NULL;
}
if (kctx->blocked_js[js][katom->sched_priority]) {
dev_dbg(kbdev->dev,
- "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n",
(void *)kctx, katom->sched_priority, js);
return NULL;
}
if (atomic_read(&katom->blocked)) {
- dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n",
+ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n",
(void *)katom);
return NULL;
}
@@ -2524,14 +2524,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
katom->x_pre_dep->will_fail_event_code) {
dev_dbg(kbdev->dev,
- "JS: X pre-dep %p is not present in slot FIFO or will fail\n",
+ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n",
(void *)katom->x_pre_dep);
return NULL;
}
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
kbase_backend_nr_atoms_on_slot(kbdev, js)) {
dev_dbg(kbdev->dev,
- "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
(void *)katom, js);
return NULL;
}
@@ -2556,7 +2556,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom->ticks = 0;
- dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n",
(void *)katom, (void *)kctx, js);
return katom;
@@ -2599,7 +2599,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
return;
dev_dbg(kctx->kbdev->dev,
- "JS return start atom %p in state %d of RP %d\n",
+ "JS return start atom %pK in state %d of RP %d\n",
(void *)start_katom, (int)rp->state,
start_katom->renderpass_id);
@@ -2627,7 +2627,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
/* Prevent the tiler job being pulled for execution in the
* job scheduler again.
*/
- dev_dbg(kbdev->dev, "Blocking start atom %p\n",
+ dev_dbg(kbdev->dev, "Blocking start atom %pK\n",
(void *)start_katom);
atomic_inc(&start_katom->blocked);
@@ -2639,14 +2639,14 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
/* Was the fragment job chain submitted to kbase yet? */
end_katom = rp->end_katom;
if (end_katom) {
- dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n",
+ dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n",
(void *)end_katom);
if (rp->state == KBASE_JD_RP_RETRY_OOM) {
/* Allow the end of the renderpass to be pulled for
* execution again to continue incremental rendering.
*/
- dev_dbg(kbdev->dev, "Unblocking end atom %p\n",
+ dev_dbg(kbdev->dev, "Unblocking end atom %pK\n",
(void *)end_katom);
atomic_dec(&end_katom->blocked);
WARN_ON(!(end_katom->atom_flags &
@@ -2708,7 +2708,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
return;
dev_dbg(kctx->kbdev->dev,
- "JS return end atom %p in state %d of RP %d\n",
+ "JS return end atom %pK in state %d of RP %d\n",
(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
if (WARN_ON(rp->state != KBASE_JD_RP_OOM &&
@@ -2730,14 +2730,14 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_dbg(kbdev->dev,
- "Reset backing to %zu pages for region %p\n",
+ "Reset backing to %zu pages for region %pK\n",
reg->threshold_pages, (void *)reg);
if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED))
kbase_mem_shrink(kctx, reg, reg->threshold_pages);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- dev_dbg(kbdev->dev, "Deleting region %p from list\n",
+ dev_dbg(kbdev->dev, "Deleting region %pK from list\n",
(void *)reg);
list_del_init(&reg->link);
kbase_va_region_alloc_put(kctx, reg);
@@ -2755,7 +2755,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
*/
start_katom = rp->start_katom;
if (!WARN_ON(!start_katom)) {
- dev_dbg(kbdev->dev, "Unblocking start atom %p\n",
+ dev_dbg(kbdev->dev, "Unblocking start atom %pK\n",
(void *)start_katom);
atomic_dec(&start_katom->blocked);
(void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx,
@@ -2781,7 +2781,7 @@ static void js_return_worker(struct work_struct *data)
unsigned long flags;
base_jd_core_req core_req = katom->core_req;
- dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n",
+ dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n",
__func__, (void *)katom, katom->event_code);
if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
@@ -2826,12 +2826,12 @@ static void js_return_worker(struct work_struct *data)
if (!atomic_read(&kctx->atoms_pulled)) {
dev_dbg(kbdev->dev,
- "No atoms currently pulled from context %p\n",
+ "No atoms currently pulled from context %pK\n",
(void *)kctx);
if (!kctx->slots_pullable) {
dev_dbg(kbdev->dev,
- "Context %p %s counted as runnable\n",
+ "Context %pK %s counted as runnable\n",
(void *)kctx,
kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ?
"is" : "isn't");
@@ -2867,7 +2867,7 @@ static void js_return_worker(struct work_struct *data)
if (context_idle) {
dev_dbg(kbdev->dev,
- "Context %p %s counted as active\n",
+ "Context %pK %s counted as active\n",
(void *)kctx,
kbase_ctx_flag(kctx, KCTX_ACTIVE) ?
"is" : "isn't");
@@ -2906,13 +2906,13 @@ static void js_return_worker(struct work_struct *data)
kbase_backend_complete_wq_post_sched(kbdev, core_req);
- dev_dbg(kbdev->dev, "Leaving %s for atom %p\n",
+ dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n",
__func__, (void *)katom);
}
void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
- dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n",
+ dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n",
(void *)katom, (void *)kctx);
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -2967,7 +2967,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
return false;
dev_dbg(kctx->kbdev->dev,
- "Start atom %p is done in state %d of RP %d\n",
+ "Start atom %pK is done in state %d of RP %d\n",
(void *)start_katom, (int)rp->state,
start_katom->renderpass_id);
@@ -2979,7 +2979,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
unsigned long flags;
dev_dbg(kctx->kbdev->dev,
- "Start atom %p completed before soft-stop\n",
+ "Start atom %pK completed before soft-stop\n",
(void *)start_katom);
kbase_gpu_vm_lock(kctx);
@@ -2991,7 +2991,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
struct kbase_va_region, link);
WARN_ON(reg->flags & KBASE_REG_VA_FREED);
- dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n",
+ dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n",
(void *)reg);
list_del_init(&reg->link);
kbase_va_region_alloc_put(kctx, reg);
@@ -3001,7 +3001,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
kbase_gpu_vm_unlock(kctx);
} else {
dev_dbg(kctx->kbdev->dev,
- "Start atom %p did not exceed memory threshold\n",
+ "Start atom %pK did not exceed memory threshold\n",
(void *)start_katom);
WARN_ON(rp->state != KBASE_JD_RP_START &&
@@ -3018,7 +3018,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
/* Allow the end of the renderpass to be pulled for
* execution again to continue incremental rendering.
*/
- dev_dbg(kbdev->dev, "Unblocking end atom %p!\n",
+ dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n",
(void *)end_katom);
atomic_dec(&end_katom->blocked);
@@ -3062,7 +3062,7 @@ static void js_complete_end_rp(struct kbase_context *kctx,
if (WARN_ON(rp->end_katom != end_katom))
return;
- dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n",
+ dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n",
(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) ||
@@ -3096,7 +3096,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
kbdev = kctx->kbdev;
atom_slot = katom->slot_nr;
- dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n",
+ dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n",
__func__, (void *)katom, atom_slot);
/* Update the incremental rendering state machine.
@@ -3115,7 +3115,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
- dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n",
+ dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n",
(void *)katom);
context_idle = !atomic_dec_return(&kctx->atoms_pulled);
@@ -3136,7 +3136,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
&& kctx->blocked_js[atom_slot][prio]) {
dev_dbg(kbdev->dev,
- "kctx %p is no longer blocked from submitting on slot %d at priority %d\n",
+ "kctx %pK is no longer blocked from submitting on slot %d at priority %d\n",
(void *)kctx, atom_slot, prio);
kctx->blocked_js[atom_slot][prio] = false;
@@ -3190,7 +3190,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
* jd_done_worker().
*/
if (context_idle) {
- dev_dbg(kbdev->dev, "kctx %p is no longer active\n",
+ dev_dbg(kbdev->dev, "kctx %pK is no longer active\n",
(void *)kctx);
kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
}
@@ -3241,7 +3241,7 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom)
return true;
dev_dbg(kbdev->dev,
- "JS complete end atom %p in state %d of RP %d\n",
+ "JS complete end atom %pK in state %d of RP %d\n",
(void *)end_katom, (int)rp->state,
end_katom->renderpass_id);
@@ -3270,7 +3270,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
struct kbase_jd_atom *x_dep = katom->x_post_dep;
kbdev = kctx->kbdev;
- dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n",
+ dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n",
(void *)katom, (void *)kctx, (void *)x_dep);
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -3286,7 +3286,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
katom->event_code = katom->will_fail_event_code;
katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
- dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom);
+ dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom);
if (katom->event_code != BASE_JD_EVENT_DONE) {
kbase_js_evict_deps(kctx, katom, katom->slot_nr,
@@ -3308,7 +3308,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
false);
x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
- dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n",
+ dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n",
(void *)x_dep);
kbase_js_move_to_tree(x_dep);
@@ -3319,13 +3319,13 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
x_dep->slot_nr);
if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
- dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n",
+ dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n",
(void *)x_dep);
return x_dep;
}
} else {
dev_dbg(kbdev->dev,
- "No cross-slot dep to unblock for atom %p\n",
+ "No cross-slot dep to unblock for atom %pK\n",
(void *)katom);
}
@@ -3356,13 +3356,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
if (!(katom->atom_flags &
KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
- dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency",
+ dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency",
(void *)katom);
return false;
}
if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) {
- dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency",
+ dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency",
(void *)katom);
return true;
}
@@ -3388,12 +3388,12 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
* if it only depends on the tiler job chain.
*/
if (katom->x_pre_dep != rp->start_katom) {
- dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n",
+ dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n",
(void *)katom->x_pre_dep, (void *)rp->start_katom);
return true;
}
- dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n",
+ dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n",
(void *)katom->x_pre_dep);
return false;
@@ -3407,7 +3407,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
int js;
- dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n",
+ dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n",
__func__, (void *)kbdev, (unsigned int)js_mask);
js_devdata = &kbdev->js_data;
@@ -3442,7 +3442,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
context_idle = true;
dev_dbg(kbdev->dev,
- "kctx %p is not active (s:%d)\n",
+ "kctx %pK is not active (s:%d)\n",
(void *)kctx, js);
if (kbase_pm_context_active_handle_suspend(
@@ -3472,7 +3472,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
&kctx->jctx.sched_info.ctx.jsctx_mutex);
dev_dbg(kbdev->dev,
- "kctx %p cannot be used at this time\n",
+ "kctx %pK cannot be used at this time\n",
kctx);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -3514,7 +3514,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
bool pullable;
dev_dbg(kbdev->dev,
- "No atoms pulled from kctx %p (s:%d)\n",
+ "No atoms pulled from kctx %pK (s:%d)\n",
(void *)kctx, js);
pullable = kbase_js_ctx_pullable(kctx, js,
@@ -3576,7 +3576,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
break; /* Could not run atoms on this slot */
}
- dev_dbg(kbdev->dev, "Push kctx %p to back of list\n",
+ dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n",
(void *)kctx);
if (kbase_js_ctx_pullable(kctx, js, true))
timer_sync |=
@@ -3598,7 +3598,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
ctx_waiting[js]) {
- dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
(void *)last_active[js], js);
kbdev->hwaccess.active_kctx[js] = NULL;
}
@@ -3629,7 +3629,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
kbase_ctx_flag_set(kctx, KCTX_DYING);
- dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx);
/*
* At this point we know:
@@ -3693,7 +3693,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED));
- dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+ dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx);
/* Only cancel jobs when we evicted from the
* queue. No Power Manager active reference was held.
@@ -3714,7 +3714,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
* Pool
*/
KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED));
- dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+ dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx);
/* Disable the ctx from submitting any more jobs */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -3732,7 +3732,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
*/
KBASE_DEBUG_ASSERT(was_retained);
- dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+ dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx);
/* Cancel any remaining running jobs for this kctx - if any.
* Submit is disallowed which takes effect immediately, so no
@@ -3745,7 +3745,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
mutex_unlock(&js_devdata->queue_mutex);
mutex_unlock(&kctx->jctx.lock);
- dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+ dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)",
kctx);
kbasep_js_runpool_release_ctx(kbdev, kctx);
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
index 76cff41..cc8dd86 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.c
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -25,7 +25,7 @@
*/
#include "mali_kbase_kinstr_jm.h"
-#include "mali_kbase_kinstr_jm_reader.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h>
#include "mali_kbase.h"
#include "mali_kbase_linux.h"
diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h
index 74fe5cf..2b81636 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.h
+++ b/mali_kbase/mali_kbase_kinstr_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2019, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -63,7 +63,7 @@
#ifndef _KBASE_KINSTR_JM_H_
#define _KBASE_KINSTR_JM_H_
-#include "mali_kbase_kinstr_jm_reader.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h>
#ifdef __KERNEL__
#include <linux/version.h>
diff --git a/mali_kbase/mali_kbase_kinstr_jm_reader.h b/mali_kbase/mali_kbase_kinstr_jm_reader.h
deleted file mode 100644
index cbd495f..0000000
--- a/mali_kbase/mali_kbase_kinstr_jm_reader.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * mali_kbase_kinstr_jm_reader.h
- * Provides an ioctl API to read kernel atom state changes. The flow of the
- * API is:
- * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD``
- * 2. Determine the buffer structure layout via the above ioctl's returned
- * size and version fields in ``struct kbase_kinstr_jm_fd_out``
- * 4. Poll the file descriptor for ``POLLIN``
- * 5. Get data with read() on the fd
- * 6. Use the structure version to understand how to read the data from the
- * buffer
- * 7. Repeat 4-6
- * 8. Close the file descriptor
- */
-
-#ifndef _KBASE_KINSTR_JM_READER_H_
-#define _KBASE_KINSTR_JM_READER_H_
-
-/**
- * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
- * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has
- * entered a hardware queue
- * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started
- * on an atom
- * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped
- * on an atom
- * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has
- * completed on an atom
- * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations
- *
- * We can add new states to the end of this if they do not break the existing
- * state machine. Old user mode code can gracefully ignore states they do not
- * understand.
- *
- * If we need to make a breaking change to the state machine, we can do that by
- * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will
- * mean that old user mode code will fail to understand the new state field in
- * the structure and gracefully not use the state change API.
- */
-enum kbase_kinstr_jm_reader_atom_state {
- KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE,
- KBASE_KINSTR_JM_READER_ATOM_STATE_START,
- KBASE_KINSTR_JM_READER_ATOM_STATE_STOP,
- KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE,
- KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
-};
-
-#endif /* _KBASE_KINSTR_JM_READER_H_ */
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index fd992e2..326917c 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -849,7 +849,7 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx)
*
* Return: true if any allocs exist on any zone, false otherwise
*/
-bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
+static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
{
unsigned int zone_idx;
@@ -1393,7 +1393,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
if (WARN_ON(kbase_is_region_invalid(reg)))
return;
- dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n",
+ dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n",
(void *)reg);
#if MALI_USE_CSF
if (reg->flags & KBASE_REG_CSF_EVENT)
@@ -1916,7 +1916,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
KBASE_DEBUG_ASSERT(kctx != NULL);
KBASE_DEBUG_ASSERT(reg != NULL);
- dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n",
+ dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n",
__func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
@@ -1975,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
struct kbase_va_region *reg;
KBASE_DEBUG_ASSERT(kctx != NULL);
- dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n",
+ dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n",
__func__, gpu_addr, (void *)kctx);
if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
@@ -2772,6 +2772,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
}
}
+KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked);
#if MALI_USE_CSF
/**
@@ -4233,8 +4234,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
div_u64(old_pages * (100 - kctx->trim_level), 100));
u64 delta = old_pages - new_size;
- if (delta)
+ if (delta) {
+ mutex_lock(&kctx->reg_lock);
kbase_mem_shrink(kctx, reg, old_pages - delta);
+ mutex_unlock(&kctx->reg_lock);
+ }
}
#if MALI_JIT_PRESSURE_LIMIT_BASE
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index cda6b57..d12ec31 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -31,7 +31,7 @@
#endif
#include <linux/kref.h>
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_hw.h>
#include "mali_kbase_pm.h"
#include "mali_kbase_defs.h"
@@ -549,7 +549,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
WARN_ON(!region->va_refcnt);
/* non-atomic as kctx->reg_lock is held */
- dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n",
+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
region->va_refcnt, (void *)region);
region->va_refcnt++;
@@ -566,7 +566,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
/* non-atomic as kctx->reg_lock is held */
region->va_refcnt--;
- dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n",
+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
region->va_refcnt, (void *)region);
if (!region->va_refcnt)
kbase_region_refcnt_free(region);
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 7c9c08e..cc80927 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -42,7 +42,7 @@
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
-#include <mali_kbase_ioctl.h>
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_caps.h>
#include <mali_kbase_trace_gpu_mem.h>
@@ -1104,7 +1104,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
- };
+ }
if (unlikely(ret))
dev_warn(kctx->kbdev->dev,
@@ -2718,7 +2718,7 @@ int kbase_context_mmap(struct kbase_context *const kctx,
{
struct kbase_va_region *reg = NULL;
void *kaddr = NULL;
- size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ size_t nr_pages = vma_pages(vma);
int err = 0;
int free_on_close = 0;
struct device *dev = kctx->kbdev->dev;
@@ -3333,7 +3333,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
{
unsigned long cookie =
vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
- size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ size_t nr_pages = vma_pages(vma);
struct kbase_queue *queue;
int err = 0;
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index 9b5854a..1874a6f 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -309,7 +309,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
kbase_mem_pool_unlock(pool);
}
-
+KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size);
static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
struct shrink_control *sc)
@@ -804,8 +804,8 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
nr_to_pool = kbase_mem_pool_capacity(pool);
nr_to_pool = min(nr_pages, nr_to_pool);
- kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
- dirty);
+ kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false,
+ dirty);
i += nr_to_pool;
}
diff --git a/mali_kbase/mali_kbase_mipe_gen_header.h b/mali_kbase/mali_kbase_mipe_gen_header.h
index 87eb65b..d1ea7ad 100644
--- a/mali_kbase/mali_kbase_mipe_gen_header.h
+++ b/mali_kbase/mali_kbase_mipe_gen_header.h
@@ -39,14 +39,14 @@
* defined. See documentation below:
*/
-/**
+/*
* The name of the variable where the result BLOB will be stored.
*/
#if !defined(MIPE_HEADER_BLOB_VAR_NAME)
#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!"
#endif
-/**
+/*
* A compiler attribute for the BLOB variable.
*
* e.g. __attribute__((section("my_section")))
@@ -77,7 +77,7 @@
#error "MIPE_HEADER_STREAM_ID must be defined!"
#endif
-/**
+/*
* MIPE packet class.
*
* See enum tl_packet_class.
@@ -86,7 +86,7 @@
#error "MIPE_HEADER_PKT_CLASS must be defined!"
#endif
-/**
+/*
* The list of tracepoints to process.
*
* It should be defined as follows:
@@ -105,14 +105,14 @@
#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!"
#endif
-/**
+/*
* The number of entries in MIPE_HEADER_TRACEPOINT_LIST.
*/
#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE)
#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!"
#endif
-/**
+/*
* The list of enums to process.
*
* It should be defined as follows:
@@ -129,7 +129,7 @@
*/
#if defined(MIPE_HEADER_ENUM_LIST)
-/**
+/*
* Tracepoint message ID used for enums declaration.
*/
#if !defined(MIPE_HEADER_ENUM_MSG_ID)
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index da09a97..3ded47b 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -256,9 +256,15 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
kbase_pm_context_idle(kbdev);
/* Re-enable GPU hardware counters */
+#if MALI_USE_CSF
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+#else
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
/* Resume vinstr */
kbase_vinstr_resume(kbdev->vinstr_ctx);
diff --git a/mali_kbase/mali_kbase_reset_gpu.h b/mali_kbase/mali_kbase_reset_gpu.h
index 4f66972..cb8a082 100644
--- a/mali_kbase/mali_kbase_reset_gpu.h
+++ b/mali_kbase/mali_kbase_reset_gpu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -143,8 +143,16 @@ void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev);
void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev);
/**
+ * Flags for kbase_prepare_to_reset_gpu
+ */
+#define RESET_FLAGS_NONE ((unsigned int)0)
+/* This reset should be treated as an unrecoverable error by HW counter logic */
+#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0))
+
+/**
* kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
* @kbdev: Device pointer
+ * @flags: Bitfield indicating impact of reset (see flag defines)
*
* Caller is expected to hold the kbdev->hwaccess_lock.
*
@@ -153,18 +161,20 @@ void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev);
* - false - Another thread is performing a reset, kbase_reset_gpu should
* not be called.
*/
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
+ unsigned int flags);
/**
* kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
* @kbdev: Device pointer
- *
+ * @flags: Bitfield indicating impact of reset (see flag defines)
+
* Return: a boolean which should be interpreted as follows:
* - true - Prepared for reset, kbase_reset_gpu should be called.
* - false - Another thread is performing a reset, kbase_reset_gpu should
* not be called.
*/
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags);
/**
* kbase_reset_gpu - Reset the GPU
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index 654c029..e14a4be 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -27,7 +27,7 @@
#include <mali_kbase_sync.h>
#endif
#include <linux/dma-mapping.h>
-#include <mali_base_kernel.h>
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_mem_linux.h>
@@ -145,6 +145,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
* delay suspend until we process the atom (which may be at the end of a
* long chain of dependencies
*/
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ atomic_inc(&kctx->kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
if (pm_active_err) {
struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
@@ -162,6 +165,10 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
return pm_active_err;
}
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ else
+ atomic_dec(&kctx->kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
&ts);
@@ -291,7 +298,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
if (!kbase_sync_fence_in_info_get(dep, &info)) {
dev_warn(dev,
- "\tVictim trigger atom %d fence [%p] %s: %s\n",
+ "\tVictim trigger atom %d fence [%pK] %s: %s\n",
kbase_jd_atom_id(kctx, dep),
info.fence,
info.name,
@@ -320,11 +327,11 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
return;
}
- dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+ dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n",
kctx->tgid, kctx->id,
kbase_jd_atom_id(kctx, katom),
info.fence, timeout_ms);
- dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+ dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n",
info.fence, info.name,
kbase_sync_status_string(info.status));
@@ -1422,41 +1429,27 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
struct base_external_resource_list *ext_res;
u64 count = 0;
size_t copy_size;
- int ret;
user_ext_res = (__user struct base_external_resource_list *)
(uintptr_t) katom->jc;
/* Fail the job if there is no info structure */
- if (!user_ext_res) {
- ret = -EINVAL;
- goto fail;
- }
+ if (!user_ext_res)
+ return -EINVAL;
- if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
- ret = -EINVAL;
- goto fail;
- }
+ if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0)
+ return -EINVAL;
/* Is the number of external resources in range? */
- if (!count || count > BASE_EXT_RES_COUNT_MAX) {
- ret = -EINVAL;
- goto fail;
- }
+ if (!count || count > BASE_EXT_RES_COUNT_MAX)
+ return -EINVAL;
/* Copy the information for safe access and future storage */
copy_size = sizeof(*ext_res);
copy_size += sizeof(struct base_external_resource) * (count - 1);
- ext_res = kzalloc(copy_size, GFP_KERNEL);
- if (!ext_res) {
- ret = -ENOMEM;
- goto fail;
- }
-
- if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
- ret = -EINVAL;
- goto free_info;
- }
+ ext_res = memdup_user(user_ext_res, copy_size);
+ if (IS_ERR(ext_res))
+ return PTR_ERR(ext_res);
/*
* Overwrite the count with the first value incase it was changed
@@ -1467,11 +1460,6 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
katom->softjob_data = ext_res;
return 0;
-
-free_info:
- kfree(ext_res);
-fail:
- return ret;
}
static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
@@ -1793,6 +1781,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
if (kbase_process_soft_job(katom_iter) == 0) {
kbase_finish_soft_job(katom_iter);
resched |= jd_done_nolock(katom_iter, NULL);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ atomic_dec(&kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
}
mutex_unlock(&kctx->jctx.lock);
}
diff --git a/mali_kbase/mali_kbase_sync_common.c b/mali_kbase/mali_kbase_sync_common.c
index 2061f53..39a68c2 100644
--- a/mali_kbase/mali_kbase_sync_common.c
+++ b/mali_kbase/mali_kbase_sync_common.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
/*
- * @file mali_kbase_sync_common.c
+ * @file
*
* Common code for our explicit fence functionality
*/
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index bc985cb..4ac0d0e 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,9 +22,9 @@
#include "mali_kbase_vinstr.h"
#include "mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase_hwcnt_reader.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include "mali_malisw.h"
#include "mali_kbase_debug.h"
@@ -898,11 +898,12 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
struct kbase_vinstr_client *cli, unsigned long arg, size_t size)
{
long ret = -EINVAL;
- u8 clk_cnt = cli->vctx->metadata->clk_cnt;
if (size == sizeof(u32)) {
ret = put_user(HWCNT_READER_API, (u32 __user *)arg);
} else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) {
+ u8 clk_cnt = cli->vctx->metadata->clk_cnt;
+ unsigned long bytes = 0;
struct kbase_hwcnt_reader_api_version api_version = {
.version = HWCNT_READER_API,
.features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE,
@@ -915,8 +916,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
api_version.features |=
KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES;
- ret = copy_to_user(
+ bytes = copy_to_user(
(void __user *)arg, &api_version, sizeof(api_version));
+
+ /* copy_to_user returns zero in case of success.
+ * If it fails, it returns the number of bytes that could NOT be copied
+ */
+ if (bytes == 0)
+ ret = 0;
+ else
+ ret = -EFAULT;
}
return ret;
}
@@ -1042,7 +1051,16 @@ static int kbasep_vinstr_hwcnt_reader_mmap(
return -EINVAL;
vm_size = vma->vm_end - vma->vm_start;
- size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes;
+
+ /* The mapping is allowed to span the entirety of the page allocation,
+ * not just the chunk where the dump buffers are allocated.
+ * This accommodates the corner case where the combined size of the
+ * dump buffers is smaller than a single page.
+ * This does not pose a security risk as the pages are zeroed on
+ * allocation, and anything out of bounds of the dump buffers is never
+ * written to.
+ */
+ size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE;
if (vma->vm_pgoff > (size >> PAGE_SHIFT))
return -EINVAL;
diff --git a/mali_kbase/mali_uk.h b/mali_kbase/mali_uk.h
deleted file mode 100644
index a499e02..0000000
--- a/mali_kbase/mali_uk.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/**
- * Types and definitions that are common across OSs for both the user
- * and kernel side of the User-Kernel interface.
- */
-
-#ifndef _UK_H_
-#define _UK_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * DOC: uk_api User-Kernel Interface API
- *
- * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
- * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
- *
- * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
- * kernel-side API (UKK) via an OS-specific communication mechanism.
- *
- * This API is internal to the Midgard DDK and is not exposed to any applications.
- *
- */
-
-/**
- * enum uk_client_id - These are identifiers for kernel-side drivers
- * implementing a UK interface, aka UKK clients.
- * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client.
- * @UK_CLIENT_COUNT: The number of uk clients supported. This must be
- * the last member of the enum
- *
- * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
- * identifier to select a UKK client to the uku_open() function.
- *
- * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
- * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
- * provide a mapping of the identifier to the OS specific device name.
- *
- */
-enum uk_client_id {
- UK_CLIENT_MALI_T600_BASE,
- UK_CLIENT_COUNT
-};
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* _UK_H_ */
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 6b7cb42..8240817 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -83,10 +83,19 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr,
.addr = fault->addr,
};
- if (WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)))
+ /*
+ * A page fault work item could already be pending for the
+ * context's address space, when the page fault occurs for
+ * MCU's address space.
+ */
+ if (!queue_work(as->pf_wq, &as->work_pagefault))
kbase_ctx_sched_release_ctx(kctx);
- else
+ else {
+ dev_dbg(kbdev->dev,
+ "Page fault is already pending for as %u\n",
+ as_nr);
atomic_inc(&kbdev->faults_pending);
+ }
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
@@ -117,15 +126,9 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev,
for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++)
submit_work_pagefault(kbdev, as_no, fault);
- /* MCU AS fault could mean hardware counters will stop working.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
-
/* GPU reset is required to recover */
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(kbdev,
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset);
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index 18a74ab..ae334c1 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -206,7 +206,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
lockdep_assert_held(&kbdev->hwaccess_lock);
dev_dbg(kbdev->dev,
- "Entering %s kctx %p, as %p\n",
+ "Entering %s kctx %pK, as %pK\n",
__func__, (void *)kctx, (void *)as);
if (!kctx) {
@@ -255,14 +255,10 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
*/
kbasep_js_clear_submit_allowed(js_devdata, kctx);
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
- dev_warn(kbdev->dev,
- "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
- as->number, fault->addr,
- fault->extra_addr);
- else
- dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
- as->number, fault->addr);
+ dev_warn(kbdev->dev,
+ "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+ as->number, fault->addr,
+ fault->extra_addr);
/*
* We need to switch to UNMAPPED mode - but we do this in a
@@ -276,7 +272,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
}
dev_dbg(kbdev->dev,
- "Leaving %s kctx %p, as %p\n",
+ "Leaving %s kctx %pK, as %pK\n",
__func__, (void *)kctx, (void *)as);
}
@@ -375,14 +371,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
/* record the fault status */
fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
AS_FAULTSTATUS));
-
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
- fault->extra_addr = kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
- fault->extra_addr <<= 32;
- fault->extra_addr |= kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
- }
+ fault->extra_addr = kbase_reg_read(kbdev,
+ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+ fault->extra_addr <<= 32;
+ fault->extra_addr |= kbase_reg_read(kbdev,
+ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
if (kbase_as_has_bus_fault(as, fault)) {
/* Mark bus fault as handled.
@@ -423,7 +416,7 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx,
struct kbase_va_region *const reg)
{
dev_dbg(kctx->kbdev->dev,
- "Switching to incremental rendering for region %p\n",
+ "Switching to incremental rendering for region %pK\n",
(void *)reg);
return kbase_job_slot_softstop_start_rp(kctx, reg);
}
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 51bee43..0761f68 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -561,7 +561,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
dev_dbg(kbdev->dev,
- "Entering %s %p, fault_pfn %lld, as_no %d\n",
+ "Entering %s %pK, fault_pfn %lld, as_no %d\n",
__func__, (void *)data, fault_pfn, as_no);
/* Grab the context that was already refcounted in kbase_mmu_interrupt()
@@ -634,21 +634,13 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
goto fault_done;
case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
- "Address size fault", fault);
- else
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
- "Unknown fault code", fault);
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Address size fault", fault);
goto fault_done;
case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
- "Memory attributes fault", fault);
- else
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
- "Unknown fault code", fault);
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Memory attributes fault", fault);
goto fault_done;
default:
@@ -852,7 +844,7 @@ page_fault_retry:
if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
dev_dbg(kctx->kbdev->dev,
- "Get region %p for IR\n",
+ "Get region %pK for IR\n",
(void *)region);
kbase_va_region_alloc_get(kctx, region);
}
@@ -980,7 +972,7 @@ fault_done:
release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
- dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data);
+ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
}
static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
@@ -1557,7 +1549,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
*/
dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
- if (kbase_prepare_to_reset_gpu_locked(kbdev))
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
}
}
@@ -1613,17 +1605,8 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
*/
dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
-#if MALI_USE_CSF
- /* A GPU hang could mean hardware counters will stop working.
- * Put the backend into the unrecoverable error state to cause
- * current and subsequent counter operations to immediately
- * fail, avoiding the risk of a hang.
- */
- kbase_hwcnt_backend_csf_on_unrecoverable_error(
- &kbdev->hwcnt_gpu_iface);
-#endif /* MALI_USE_CSF */
-
- if (kbase_prepare_to_reset_gpu(kbdev))
+ if (kbase_prepare_to_reset_gpu(
+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
@@ -1659,7 +1642,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
mutex_unlock(&kbdev->js_data.queue_mutex);
#else
- ctx_is_in_runpool = kbase_ctx_sched_refcount_mmu_flush(kctx, sync);
+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
#endif /* !MALI_USE_CSF */
if (ctx_is_in_runpool) {
@@ -1681,11 +1664,6 @@ void kbase_mmu_update(struct kbase_device *kbdev,
KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
kbdev->mmu_mode->update(kbdev, mmut, as_nr);
-
-#if MALI_USE_CSF
- if (mmut->kctx)
- mmut->kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND;
-#endif
}
KBASE_EXPORT_TEST_API(kbase_mmu_update);
@@ -1719,10 +1697,6 @@ void kbase_mmu_disable(struct kbase_context *kctx)
kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
-
-#if MALI_USE_CSF
- kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND;
-#endif
}
KBASE_EXPORT_TEST_API(kbase_mmu_disable);
@@ -2312,30 +2286,3 @@ void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
flush_workqueue(as->pf_wq);
}
}
-
-#if MALI_USE_CSF
-void kbase_mmu_deferred_flush_invalidate(struct kbase_context *kctx)
-{
- struct kbase_device *kbdev = kctx->kbdev;
-
- lockdep_assert_held(&kbdev->mmu_hw_mutex);
-
- if (kctx->as_nr == KBASEP_AS_NR_INVALID)
- return;
-
- if (kctx->mmu_flush_pend_state == KCTX_MMU_FLUSH_NOT_PEND)
- return;
-
- WARN_ON(!atomic_read(&kctx->refcount));
-
- /* Specify the entire address space as the locked region.
- * The flush of entire L2 cache and complete TLB invalidation will
- * anyways happen for the exisiting CSF GPUs, regardless of the locked
- * range. This may have to be revised later on.
- */
- kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0,
- kctx->mmu_flush_pend_state == KCTX_MMU_FLUSH_PEND_SYNC);
-
- kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND;
-}
-#endif
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 1d877ac..bf4fd91 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -152,21 +152,4 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status,
void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
u32 as_nr, u64 address, bool as_valid);
-#if MALI_USE_CSF
-/**
- * kbase_mmu_deferred_flush_invalidate() - Perform deferred MMU flush
- * operations for a Kbase context.
- * @kctx: Pointer to the Kbase context for which MMU flush operations
- * are pending.
- *
- * This function performs the MMU flush operations that are pending for a Kbase
- * context. The flush operations will be deferred if the context is inactive,
- * i.e. kctx->refcount is zero which happens when all the queue groups of a
- * context have gone off CSG slots.
- * This needs to be called when first queue group of the context is put back
- * on the CSG slot.
- */
-void kbase_mmu_deferred_flush_invalidate(struct kbase_context *kctx);
-#endif
-
#endif /* _KBASE_MMU_H_ */
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index b0596af..88fd9cf 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -124,38 +124,33 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
struct kbase_mmu_setup *current_setup = &as->current_setup;
u64 transcfg = 0;
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
- transcfg = current_setup->transcfg;
+ transcfg = current_setup->transcfg;
- /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK
- * Clear PTW_MEMATTR bits
- */
- transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
- /* Enable correct PTW_MEMATTR bits */
- transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
- /* Ensure page-tables reads use read-allocate cache-policy in
- * the L2
- */
- transcfg |= AS_TRANSCFG_R_ALLOCATE;
-
- if (kbdev->system_coherency != COHERENCY_NONE) {
- /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable)
- * Clear PTW_SH bits
- */
- transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
- /* Enable correct PTW_SH bits */
- transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
- }
+ /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK
+ * Clear PTW_MEMATTR bits
+ */
+ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+ /* Enable correct PTW_MEMATTR bits */
+ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+ /* Ensure page-tables reads use read-allocate cache-policy in
+ * the L2
+ */
+ transcfg |= AS_TRANSCFG_R_ALLOCATE;
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
- transcfg);
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
- (transcfg >> 32) & 0xFFFFFFFFUL);
- } else {
- if (kbdev->system_coherency != COHERENCY_NONE)
- current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+ if (kbdev->system_coherency != COHERENCY_NONE) {
+ /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable)
+ * Clear PTW_SH bits
+ */
+ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+ /* Enable correct PTW_SH bits */
+ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
}
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+ transcfg);
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+ (transcfg >> 32) & 0xFFFFFFFFUL);
+
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
current_setup->transtab & 0xFFFFFFFFUL);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c b/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c
deleted file mode 100644
index 09793e1..0000000
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#include "mali_kbase.h"
-#include <gpu/mali_kbase_gpu_regmap.h>
-#include "mali_kbase_defs.h"
-
-#define ENTRY_TYPE_MASK 3ULL
-#define ENTRY_IS_ATE 1ULL
-#define ENTRY_IS_INVAL 2ULL
-#define ENTRY_IS_PTE 3ULL
-
-#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */
-#define ENTRY_RD_BIT (1ULL << 6)
-#define ENTRY_WR_BIT (1ULL << 7)
-#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */
-#define ENTRY_ACCESS_BIT (1ULL << 10)
-#define ENTRY_NX_BIT (1ULL << 54)
-
-#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
- ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
-
-/* Helper Function to perform assignment of page table entries, to
- * ensure the use of strd, which is required on LPAE systems.
- */
-static inline void page_table_entry_set(u64 *pte, u64 phy)
-{
- WRITE_ONCE(*pte, phy);
-}
-
-static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
- struct kbase_mmu_setup * const setup)
-{
- /* Set up the required caching policies at the correct indices
- * in the memattr register.
- */
- setup->memattr =
- (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
- (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
- (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL <<
- (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
- (AS_MEMATTR_LPAE_WRITE_ALLOC <<
- (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
- (AS_MEMATTR_LPAE_OUTER_IMPL_DEF <<
- (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
- (AS_MEMATTR_LPAE_OUTER_WA <<
- (AS_MEMATTR_INDEX_OUTER_WA * 8)) |
- 0; /* The other indices are unused for now */
-
- setup->transtab = ((u64)mmut->pgd &
- ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
- AS_TRANSTAB_LPAE_ADRMODE_TABLE |
- AS_TRANSTAB_LPAE_READ_INNER;
-
- setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
-}
-
-static void mmu_update(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- int as_nr)
-{
- struct kbase_as *as;
- struct kbase_mmu_setup *current_setup;
-
- if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
- return;
-
- as = &kbdev->as[as_nr];
- current_setup = &as->current_setup;
-
- mmu_get_as_setup(mmut, current_setup);
-
- /* Apply the address space setting */
- kbase_mmu_hw_configure(kbdev, as);
-}
-
-static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
-{
- struct kbase_as * const as = &kbdev->as[as_nr];
- struct kbase_mmu_setup * const current_setup = &as->current_setup;
-
- current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
-
- /* Apply the address space setting */
- kbase_mmu_hw_configure(kbdev, as);
-}
-
-static phys_addr_t pte_to_phy_addr(u64 entry)
-{
- if (!(entry & 1))
- return 0;
-
- return entry & ~0xFFF;
-}
-
-static int ate_is_valid(u64 ate, int const level)
-{
- return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
-}
-
-static int pte_is_valid(u64 pte, int const level)
-{
- return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
-}
-
-/*
- * Map KBASE_REG flags to MMU flags
- */
-static u64 get_mmu_flags(unsigned long flags)
-{
- u64 mmu_flags;
- unsigned long memattr_idx;
-
- memattr_idx = KBASE_REG_MEMATTR_VALUE(flags);
- if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE,
- "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n"))
- memattr_idx = AS_MEMATTR_INDEX_DEFAULT;
- /* store mem_attr index as 4:2, noting that:
- * - macro called above ensures 3 bits already
- * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits
- */
- mmu_flags = memattr_idx << 2;
-
- /* write perm if requested */
- mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
- /* read perm if requested */
- mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
- /* nx if requested */
- mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
-
- if (flags & KBASE_REG_SHARE_BOTH) {
- /* inner and outer shareable */
- mmu_flags |= SHARE_BOTH_BITS;
- } else if (flags & KBASE_REG_SHARE_IN) {
- /* inner shareable coherency */
- mmu_flags |= SHARE_INNER_BITS;
- }
-
- return mmu_flags;
-}
-
-static void entry_set_ate(u64 *entry,
- struct tagged_addr phy,
- unsigned long flags,
- int const level)
-{
- page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
- ENTRY_IS_ATE);
-}
-
-static void entry_set_pte(u64 *entry, phys_addr_t phy)
-{
- page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
-}
-
-static void entry_invalidate(u64 *entry)
-{
- page_table_entry_set(entry, ENTRY_IS_INVAL);
-}
-
-static struct kbase_mmu_mode const lpae_mode = {
- .update = mmu_update,
- .get_as_setup = mmu_get_as_setup,
- .disable_as = mmu_disable_as,
- .pte_to_phy_addr = pte_to_phy_addr,
- .ate_is_valid = ate_is_valid,
- .pte_is_valid = pte_is_valid,
- .entry_set_ate = entry_set_ate,
- .entry_set_pte = entry_set_pte,
- .entry_invalidate = entry_invalidate,
- .flags = 0
-};
-
-struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
-{
- return &lpae_mode;
-}
diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig
index 2630736..a21810b 100644
--- a/mali_kbase/tests/Mconfig
+++ b/mali_kbase/tests/Mconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
-# (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -40,6 +40,10 @@ config BUILD_CSF_TESTS
config BUILD_ARBIF_TESTS
bool
- default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT
+ default y if UNIT_TEST_CODE && MALI_ARBITER_SUPPORT
default n
+config BUILD_ARBIF_KERNEL_TESTS
+ bool
+ default y if BUILD_KERNEL_MODULES && BUILD_ARBIF_TESTS
+ default n
diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
index 42f1e2d..7455ce2 100644
--- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
+++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -159,7 +159,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
* this iteration of the loop, so will start to correctly update
* the object model state.
*/
- };
+ }
mutex_unlock(&timeline->tl_kctx_list_lock);
diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_jm.c b/mali_kbase/tl/backend/mali_kbase_timeline_jm.c
index f016e8b..6659d2d 100644
--- a/mali_kbase/tl/backend/mali_kbase_timeline_jm.c
+++ b/mali_kbase/tl/backend/mali_kbase_timeline_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -74,7 +74,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
kctx,
kctx->id,
(u32)(kctx->tgid));
- };
+ }
/* Reset body stream buffers while holding the kctx lock.
* This ensures we can't fire both summary and normal tracepoints for
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index 4f955a1..20d7b16 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -186,7 +186,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev)
int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
{
- int ret;
+ int ret = 0;
u32 timeline_flags = TLSTREAM_ENABLED | flags;
struct kbase_timeline *timeline = kbdev->timeline;
@@ -262,6 +262,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
ret = -EBUSY;
}
+ if (ret >= 0)
+ timeline->last_acquire_time = ktime_get();
+
return ret;
}
diff --git a/mali_kbase/tl/mali_kbase_timeline.h b/mali_kbase/tl/mali_kbase_timeline.h
index 9315fcc..0465352 100644
--- a/mali_kbase/tl/mali_kbase_timeline.h
+++ b/mali_kbase/tl/mali_kbase_timeline.h
@@ -107,32 +107,6 @@ void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx);
void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx);
#if MALI_UNIT_TEST
-/**
- * kbase_timeline_test - start timeline stream data generator
- * @kbdev: Kernel common context
- * @tpw_count: Number of trace point writers in each context
- * @msg_delay: Time delay in milliseconds between trace points written by one
- * writer
- * @msg_count: Number of trace points written by one writer
- * @aux_msg: If non-zero aux messages will be included
- *
- * This test starts a requested number of asynchronous writers in both IRQ and
- * thread context. Each writer will generate required number of test
- * tracepoints (tracepoints with embedded information about writer that
- * should be verified by user space reader). Tracepoints will be emitted in
- * all timeline body streams. If aux_msg is non-zero writer will also
- * generate not testable tracepoints (tracepoints without information about
- * writer). These tracepoints are used to check correctness of remaining
- * timeline message generating functions. Writer will wait requested time
- * between generating another set of messages. This call blocks until all
- * writers finish.
- */
-void kbase_timeline_test(
- struct kbase_device *kbdev,
- unsigned int tpw_count,
- unsigned int msg_delay,
- unsigned int msg_count,
- int aux_msg);
/**
* kbase_timeline_stats - read timeline stream statistics
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 8587ba0..e3b6fbc 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -24,6 +24,7 @@
#include "mali_kbase_tracepoints.h"
#include "mali_kbase_timeline.h"
+#include <linux/delay.h>
#include <linux/poll.h>
/* The timeline stream file operations functions. */
@@ -46,7 +47,8 @@ const struct file_operations kbasep_tlstream_fops = {
/**
* kbasep_timeline_io_packet_pending - check timeline streams for pending
- *packets
+ * packets
+ *
* @timeline: Timeline instance
* @ready_stream: Pointer to variable where stream will be placed
* @rb_idx_raw: Pointer to variable where read buffer index will be placed
@@ -86,8 +88,8 @@ kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline,
}
/**
- * kbasep_timeline_has_header_data() -
- * check timeline headers for pending packets
+ * kbasep_timeline_has_header_data() - check timeline headers for pending
+ * packets
*
* @timeline: Timeline instance
*
@@ -139,6 +141,7 @@ static inline int copy_stream_header(char __user *buffer, size_t size,
/**
* kbasep_timeline_copy_header - copy timeline headers to the user
+ *
* @timeline: Timeline instance
* @buffer: Pointer to the buffer provided by user
* @size: Maximum amount of data that can be stored in the buffer
@@ -174,6 +177,7 @@ static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline,
/**
* kbasep_timeline_io_read - copy data from streams to buffer provided by user
+ *
* @filp: Pointer to file structure
* @buffer: Pointer to the buffer provided by user
* @size: Maximum amount of data that can be stored in the buffer
@@ -198,7 +202,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
if (!buffer)
return -EINVAL;
- if ((*f_pos < 0) || (size < PACKET_SIZE))
+ if (*f_pos < 0)
return -EINVAL;
mutex_lock(&timeline->reader_lock);
@@ -217,10 +221,10 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
}
/* If we already read some packets and there is no
- * packet pending then return back to user.
- * If we don't have any data yet, wait for packet to be
- * submitted.
- */
+ * packet pending then return back to user.
+ * If we don't have any data yet, wait for packet to be
+ * submitted.
+ */
if (copy_len > 0) {
if (!kbasep_timeline_io_packet_pending(
timeline, &stream, &rb_idx_raw))
@@ -241,8 +245,8 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
}
/* Check if this packet fits into the user buffer.
- * If so copy its content.
- */
+ * If so copy its content.
+ */
rb_idx = rb_idx_raw % PACKET_COUNT;
rb_size = atomic_read(&stream->buffer[rb_idx].size);
if (rb_size > size - copy_len)
@@ -254,10 +258,10 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
}
/* If the distance between read buffer index and write
- * buffer index became more than PACKET_COUNT, then overflow
- * happened and we need to ignore the last portion of bytes
- * that we have just sent to user.
- */
+ * buffer index became more than PACKET_COUNT, then overflow
+ * happened and we need to ignore the last portion of bytes
+ * that we have just sent to user.
+ */
smp_rmb();
wb_idx_raw = atomic_read(&stream->wbi);
@@ -321,6 +325,8 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
{
struct kbase_timeline *timeline;
+ ktime_t elapsed_time;
+ s64 elapsed_time_ms, time_to_sleep;
KBASE_DEBUG_ASSERT(inode);
KBASE_DEBUG_ASSERT(filp);
@@ -330,6 +336,18 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
timeline = (struct kbase_timeline *)filp->private_data;
+ /* Get the amount of time passed since the timeline was acquired and ensure
+ * we sleep for long enough such that it has been at least
+ * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release.
+ * This prevents userspace from spamming acquire and release too quickly.
+ */
+ elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time);
+ elapsed_time_ms = ktime_to_ms(elapsed_time);
+ time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS,
+ TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms);
+ if (time_to_sleep > 0)
+ msleep(time_to_sleep);
+
#if MALI_USE_CSF
kbase_csf_tl_reader_stop(&timeline->csf_tl_reader);
#endif
diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h
index 2825f77..8a58a13 100644
--- a/mali_kbase/tl/mali_kbase_timeline_priv.h
+++ b/mali_kbase/tl/mali_kbase_timeline_priv.h
@@ -34,6 +34,11 @@
#include <linux/atomic.h>
#include <linux/mutex.h>
+/* The minimum amount of time timeline must be acquired for before release is
+ * allowed, to prevent DoS attacks.
+ */
+#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500)
+
/**
* struct kbase_timeline - timeline state structure
* @streams: The timeline streams generated by kernel
@@ -49,6 +54,7 @@
* otherwise. See kbase_timeline_io_acquire().
* @obj_header_btc: Remaining bytes to copy for the object stream header
* @aux_header_btc: Remaining bytes to copy for the aux stream header
+ * @last_acquire_time: The time at which timeline was last acquired.
* @csf_tl_reader: CSFFW timeline reader
*/
struct kbase_timeline {
@@ -65,6 +71,7 @@ struct kbase_timeline {
atomic_t *timeline_flags;
size_t obj_header_btc;
size_t aux_header_btc;
+ ktime_t last_acquire_time;
#if MALI_USE_CSF
struct kbase_csf_tl_reader csf_tl_reader;
#endif
diff --git a/mali_kbase/tl/mali_kbase_tlstream.c b/mali_kbase/tl/mali_kbase_tlstream.c
index c6eb3c8..202c12f 100644
--- a/mali_kbase/tl/mali_kbase_tlstream.c
+++ b/mali_kbase/tl/mali_kbase_tlstream.c
@@ -56,20 +56,19 @@ static void kbasep_packet_header_setup(
* @numbered: non-zero if the stream is numbered
*
* Function updates mutable part of packet header in the given buffer.
- * Note that value of data_size must not including size of the header.
+ * Note that value of data_size must not include size of the header.
*/
static void kbasep_packet_header_update(
char *buffer,
size_t data_size,
int numbered)
{
- u32 word0;
u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered);
KBASE_DEBUG_ASSERT(buffer);
- CSTD_UNUSED(word0);
- memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+ /* we copy the contents of word1 to its respective position in the buffer */
+ memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1));
}
/**
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index 479f0f4..ece23b3 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -69,6 +69,7 @@ enum tl_msg_id_obj {
KBASE_TL_ARBITER_STARTED,
KBASE_TL_ARBITER_STOP_REQUESTED,
KBASE_TL_ARBITER_STOPPED,
+ KBASE_TL_ARBITER_REQUESTED,
KBASE_JD_GPU_SOFT_RESET,
KBASE_TL_KBASE_NEW_DEVICE,
KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
@@ -288,6 +289,10 @@ enum tl_msg_id_aux {
"Driver has stopped using gpu", \
"@p", \
"gpu") \
+ TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \
+ "Driver has requested the arbiter for gpu access", \
+ "@p", \
+ "gpu") \
TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \
"gpu soft reset", \
"@p", \
@@ -1565,6 +1570,28 @@ void __kbase_tlstream_tl_arbiter_stopped(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_arbiter_requested(
+ struct kbase_tlstream *stream,
+ const void *gpu)
+{
+ const u32 msg_id = KBASE_TL_ARBITER_REQUESTED;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(gpu)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &gpu, sizeof(gpu));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_jd_gpu_soft_reset(
struct kbase_tlstream *stream,
const void *gpu)
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index a3fd7c1..f3f554a 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -237,6 +237,9 @@ void __kbase_tlstream_tl_arbiter_stop_requested(
void __kbase_tlstream_tl_arbiter_stopped(
struct kbase_tlstream *stream,
const void *gpu);
+void __kbase_tlstream_tl_arbiter_requested(
+ struct kbase_tlstream *stream,
+ const void *gpu);
void __kbase_tlstream_jd_gpu_soft_reset(
struct kbase_tlstream *stream,
const void *gpu);
@@ -1301,6 +1304,25 @@ struct kbase_tlstream;
} while (0)
/**
+ * KBASE_TLSTREAM_TL_ARBITER_REQUESTED -
+ * Driver has requested the arbiter for gpu access
+ *
+ * @kbdev: Kbase device
+ * @gpu: Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \
+ kbdev, \
+ gpu \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_arbiter_requested( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ gpu); \
+ } while (0)
+
+/**
* KBASE_TLSTREAM_JD_GPU_SOFT_RESET -
* gpu soft reset
*