summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h57
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h13
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h56
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h11
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h6
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h4
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h154
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h28
-rw-r--r--mali_kbase/Kbuild7
-rw-r--r--mali_kbase/Makefile17
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_pm.c4
-rw-r--r--mali_kbase/arbitration/Kconfig18
-rw-r--r--mali_kbase/backend/gpu/Kbuild5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h3
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c14
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.h1
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c117
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_defs.h8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_defs.h25
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c50
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c81
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c2008
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.h177
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_error_generator.c174
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.c254
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.h32
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c51
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c7
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.h4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h6
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h27
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c168
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h128
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h75
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c7
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c4
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_csf.c1
-rw-r--r--mali_kbase/context/mali_kbase_context.c2
-rw-r--r--mali_kbase/csf/Kbuild4
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c24
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h2
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c432
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h204
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h86
-rw-r--r--mali_kbase/csf/mali_kbase_csf_event.c253
-rw-r--r--mali_kbase/csf/mali_kbase_csf_event.h171
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c78
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h116
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.c8
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.h14
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c26
-rw-r--r--mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h12
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c19
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h8
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c1
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c226
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h11
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c86
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.h24
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c58
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h7
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c20
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.h15
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.c33
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.h53
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h7
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h3
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace.h1
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace_internal.h2
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c39
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c10
-rw-r--r--mali_kbase/device/mali_kbase_device.c2
-rw-r--r--mali_kbase/device/mali_kbase_device.h32
-rw-r--r--mali_kbase/device/mali_kbase_device_hw.c120
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c9
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.c16
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h4
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h38
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h98
-rw-r--r--mali_kbase/mali_kbase_core_linux.c165
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c7
-rw-r--r--mali_kbase/mali_kbase_defs.h38
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c1
-rw-r--r--mali_kbase/mali_kbase_hwaccess_instr.h23
-rw-r--r--mali_kbase/mali_kbase_hwaccess_jm.h17
-rw-r--r--mali_kbase/mali_kbase_hwaccess_pm.h4
-rw-r--r--mali_kbase/mali_kbase_hwcnt.c1
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf.c238
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf.h10
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c29
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm.c45
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.c179
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.h93
-rw-r--r--mali_kbase/mali_kbase_hwcnt_watchdog_if.h90
-rw-r--r--mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c159
-rw-r--r--mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h50
-rw-r--r--mali_kbase/mali_kbase_jd.c4
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.c1254
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.h45
-rw-r--r--mali_kbase/mali_kbase_mem.c4
-rw-r--r--mali_kbase/mali_kbase_mem.h4
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c10
-rw-r--r--mali_kbase/mali_kbase_pm.c29
-rw-r--r--mali_kbase/mali_kbase_pm.h8
-rw-r--r--mali_kbase/mali_kbase_regs_history_debugfs.h2
-rw-r--r--mali_kbase/mali_kbase_softjobs.c17
-rw-r--r--mali_kbase/mali_kbase_sync.h15
-rw-r--r--mali_kbase/mali_malisw.h11
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c148
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h5
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw.h2
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c21
-rw-r--r--mali_kbase/tests/include/kutf/kutf_utils.h3
-rw-r--r--mali_kbase/tl/backend/mali_kbase_timeline_csf.c18
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c206
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h238
119 files changed, 7469 insertions, 1916 deletions
diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
new file mode 100644
index 0000000..9d677ca
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_DUMMY_H_
+#define _UAPI_KBASE_MODEL_DUMMY_H_
+
+#include <linux/types.h>
+
+#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60)
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4)
+#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
+ (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
+
+#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60
+#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \
+ (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + \
+ KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_BLOCK_SIZE \
+ (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32))
+#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8
+#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32
+#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \
+ (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES)
+#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \
+ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE)
+
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull)
+#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull)
+#define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull)
+#define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull)
+
+#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
index a5dc745..1d15f57 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
@@ -251,8 +251,20 @@
/* CS_KERNEL_INPUT_BLOCK register set definitions */
/* GLB_VERSION register */
#define GLB_VERSION_PATCH_SHIFT (0)
+#define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT)
+#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT)
+#define GLB_VERSION_PATCH_SET(reg_val, value) \
+ (((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK))
#define GLB_VERSION_MINOR_SHIFT (16)
+#define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT)
+#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT)
+#define GLB_VERSION_MINOR_SET(reg_val, value) \
+ (((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK))
#define GLB_VERSION_MAJOR_SHIFT (24)
+#define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT)
+#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT)
+#define GLB_VERSION_MAJOR_SET(reg_val, value) \
+ (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK))
/* CS_REQ register */
#define CS_REQ_STATE_SHIFT 0
@@ -935,6 +947,7 @@
(((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \
(((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
+
/* End of CSG_INPUT_BLOCK register set definitions */
/* CSG_OUTPUT_BLOCK register set definitions */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
index ec4870c..3df8a01 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -46,10 +46,14 @@
* trace configurations with CSF trace_command.
* 1.6:
* - Added new HW performance counters interface to all GPUs.
+ * 1.7:
+ * - Added reserved field to QUEUE_GROUP_CREATE ioctl for future use
+ * 1.8:
+ * - Removed Kernel legacy HWC interface
*/
#define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 5
+#define BASE_UK_VERSION_MINOR 8
/**
* struct kbase_ioctl_version_check - Check version compatibility between
@@ -179,6 +183,50 @@ struct kbase_ioctl_cs_queue_terminate {
_IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate)
/**
+ * union kbase_ioctl_cs_queue_group_create_1_6 - Create a GPU command queue
+ * group
+ * @in: Input parameters
+ * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask: Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min: Minimum number of CSs required.
+ * @in.priority: Queue group's priority within a process.
+ * @in.tiler_max: Maximum number of tiler endpoints the group is allowed
+ * to use.
+ * @in.fragment_max: Maximum number of fragment endpoints the group is
+ * allowed to use.
+ * @in.compute_max: Maximum number of compute endpoints the group is allowed
+ * to use.
+ * @in.padding: Currently unused, must be zero
+ * @out: Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding: Currently unused, must be zero
+ * @out.group_uid: UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_6 {
+ struct {
+ __u64 tiler_mask;
+ __u64 fragment_mask;
+ __u64 compute_mask;
+ __u8 cs_min;
+ __u8 priority;
+ __u8 tiler_max;
+ __u8 fragment_max;
+ __u8 compute_max;
+ __u8 padding[3];
+
+ } in;
+ struct {
+ __u8 group_handle;
+ __u8 padding[3];
+ __u32 group_uid;
+ } out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \
+ _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6)
+
+/**
* union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
* @in: Input parameters
* @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
@@ -209,7 +257,7 @@ union kbase_ioctl_cs_queue_group_create {
__u8 fragment_max;
__u8 compute_max;
__u8 padding[3];
-
+ __u64 reserved;
} in;
struct {
__u8 group_handle;
@@ -218,8 +266,8 @@ union kbase_ioctl_cs_queue_group_create {
} out;
};
-#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
- _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create)
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
+ _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create)
/**
* struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
index 4001a4c..b1720ed 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -250,6 +250,17 @@
GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
+/* Clean and invalidate L2, LSC, and Other caches */
+#define GPU_COMMAND_CACHE_CLN_INV_FULL \
+ GPU_COMMAND_CODE_PAYLOAD( \
+ GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE))
+
+/* Merge cache flush commands */
+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2))
+
/* Places the GPU in protected mode */
#define GPU_COMMAND_SET_PROTECTED_MODE \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0)
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
index dcadcc7..ecf812c 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -175,6 +175,7 @@
/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0)
#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8)
+#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8)
#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
#define JS_CONFIG_START_MMU (1u << 10)
#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11)
@@ -264,6 +265,11 @@
/* GPU_COMMAND cache flush alias to CSF command payload */
#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
+
+/* Merge cache flush commands */
+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \
+ ((cmd1) > (cmd2) ? (cmd1) : (cmd2))
/* IRQ flags */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
index 2598e20..d957dea 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -121,9 +121,11 @@
* - Added ioctl 55: set_limited_core_count.
* 11.32:
* - Added new HW performance counters interface to all GPUs.
+ * 11.33:
+ * - Removed Kernel legacy HWC interface
*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 31
+#define BASE_UK_VERSION_MINOR 33
/**
* struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
index 15843ee..2cdd29c 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -126,6 +126,7 @@ enum prfcnt_list_type {
#define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0)
#define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1)
+#define PRFCNT_REQUEST_TYPE_SCOPE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 2)
#define PRFCNT_SAMPLE_META_TYPE_SAMPLE \
FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0)
@@ -150,6 +151,7 @@ struct prfcnt_item_header {
* @PRFCNT_BLOCK_TYPE_TILER: Tiler.
* @PRFCNT_BLOCK_TYPE_MEMORY: Memory System.
* @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core.
+ * @PRFCNT_BLOCK_TYPE_RESERVED: Reserved.
*/
enum prfcnt_block_type {
PRFCNT_BLOCK_TYPE_FE,
@@ -160,10 +162,11 @@ enum prfcnt_block_type {
};
/**
- * enum prfcnt_block_set - Type of performance counter block set.
+ * enum prfcnt_set - Type of performance counter block set.
* @PRFCNT_SET_PRIMARY: Primary.
* @PRFCNT_SET_SECONDARY: Secondary.
* @PRFCNT_SET_TERTIARY: Tertiary.
+ * @PRFCNT_SET_RESERVED: Reserved.
*/
enum prfcnt_set {
PRFCNT_SET_PRIMARY,
@@ -176,19 +179,19 @@ enum prfcnt_set {
* struct prfcnt_enum_block_counter - Performance counter block descriptor.
* @block_type: Type of performance counter block.
* @set: Which SET this represents: primary, secondary or tertiary.
+ * @pad: Padding bytes.
* @num_instances: How many instances of this block type exist in the hardware.
* @num_values: How many entries in the values array there are for samples
* from this block.
- * @pad: Padding bytes.
* @counter_mask: Bitmask that indicates the availability of counters in this
* block.
*/
struct prfcnt_enum_block_counter {
__u8 block_type;
__u8 set;
- __u8 num_instances;
- __u8 num_values;
- __u8 pad[4];
+ __u8 pad[2];
+ __u16 num_instances;
+ __u16 num_values;
__u64 counter_mask[2];
};
@@ -206,12 +209,14 @@ struct prfcnt_enum_request {
/**
* struct prfcnt_enum_item - Performance counter enumeration item.
- * @hdr: Header describing the type of item in the list.
- * @block_counter: Performance counter block descriptor.
- * @request: Request descriptor.
+ * @hdr: Header describing the type of item in the list.
+ * @u: Structure containing discriptor for enumeration item type.
+ * @u.block_counter: Performance counter block descriptor.
+ * @u.request: Request descriptor.
*/
struct prfcnt_enum_item {
struct prfcnt_item_header hdr;
+ /** union u - union of block_counter and request */
union {
struct prfcnt_enum_block_counter block_counter;
struct prfcnt_enum_request request;
@@ -222,6 +227,7 @@ struct prfcnt_enum_item {
* enum prfcnt_mode - Capture mode for counter sampling.
* @PRFCNT_MODE_MANUAL: Manual sampling mode.
* @PRFCNT_MODE_PERIODIC: Periodic sampling mode.
+ * @PRFCNT_MODE_RESERVED: Reserved.
*/
enum prfcnt_mode {
PRFCNT_MODE_MANUAL,
@@ -231,16 +237,19 @@ enum prfcnt_mode {
/**
* struct prfcnt_request_mode - Mode request descriptor.
- * @mode: Capture mode for the session, either manual or periodic.
- * @pad: Padding bytes.
- * @period_us: Period in microseconds, for periodic mode.
+ * @mode: Capture mode for the session, either manual or periodic.
+ * @pad: Padding bytes.
+ * @mode_config: Structure containing configuration for periodic mode.
+ * @mode_config.period: Periodic config.
+ * @mode_config.period.period_ns: Period in nanoseconds, for periodic mode.
*/
struct prfcnt_request_mode {
__u8 mode;
__u8 pad[7];
+ /** union mode_config - request mode configuration*/
union {
struct {
- __u64 period_us;
+ __u64 period_ns;
} periodic;
} mode_config;
};
@@ -261,16 +270,40 @@ struct prfcnt_request_enable {
};
/**
+ * enum prfcnt_scope - Scope of performance counters.
+ * @PRFCNT_SCOPE_GLOBAL: Global scope.
+ * @PRFCNT_SCOPE_RESERVED: Reserved.
+ */
+enum prfcnt_scope {
+ PRFCNT_SCOPE_GLOBAL,
+ PRFCNT_SCOPE_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_request_scope - Scope request descriptor.
+ * @scope: Scope of the performance counters to capture.
+ * @pad: Padding bytes.
+ */
+struct prfcnt_request_scope {
+ __u8 scope;
+ __u8 pad[7];
+};
+
+/**
* struct prfcnt_request_item - Performance counter request item.
- * @hdr: Header describing the type of item in the list.
- * @req_mode: Mode request descriptor.
- * @req_enable: Enable request descriptor.
+ * @hdr: Header describing the type of item in the list.
+ * @u: Structure containing descriptor for request type.
+ * @u.req_mode: Mode request descriptor.
+ * @u.req_enable: Enable request descriptor.
+ * @u.req_scope: Scope request descriptor.
*/
struct prfcnt_request_item {
struct prfcnt_item_header hdr;
+ /** union u - union on req_mode and req_enable */
union {
struct prfcnt_request_mode req_mode;
struct prfcnt_request_enable req_enable;
+ struct prfcnt_request_scope req_scope;
} u;
};
@@ -278,12 +311,19 @@ struct prfcnt_request_item {
* enum prfcnt_request_type - Type of request descriptor.
* @PRFCNT_REQUEST_MODE: Specify the capture mode to be used for the session.
* @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture.
+ * @PRFCNT_REQUEST_SCOPE: Specify the scope of the performance counters.
*/
enum prfcnt_request_type {
PRFCNT_REQUEST_MODE,
PRFCNT_REQUEST_ENABLE,
+ PRFCNT_REQUEST_SCOPE,
};
+/* This sample contains overflows from dump duration stretch because the sample buffer was full */
+#define SAMPLE_FLAG_OVERFLOW (1u << 0)
+/* This sample has had an error condition for sample duration */
+#define SAMPLE_FLAG_ERROR (1u << 30)
+
/**
* struct prfcnt_sample_metadata - Metadata for counter sample data.
* @timestamp_start: Earliest timestamp that values in this sample represent.
@@ -292,6 +332,7 @@ enum prfcnt_request_type {
* GET_SAMPLE.
* @user_data: User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_*
* @flags: Property flags.
+ * @pad: Padding bytes.
*/
struct prfcnt_sample_metadata {
__u64 timestamp_start;
@@ -302,18 +343,25 @@ struct prfcnt_sample_metadata {
__u32 pad;
};
+/* Maximum number of domains a metadata for clock cycles can refer to */
+#define MAX_REPORTED_DOMAINS (4)
+
/**
* struct prfcnt_clock_metadata - Metadata for clock cycles.
* @num_domains: Number of domains this metadata refers to.
+ * @pad: Padding bytes.
* @cycles: Number of cycles elapsed in each counter domain between
- * timestamp_start and timestamp_end.
+ * timestamp_start and timestamp_end. Valid only for the
+ * first @p num_domains.
*/
struct prfcnt_clock_metadata {
__u32 num_domains;
__u32 pad;
- __u64 *cycles;
+ __u64 cycles[MAX_REPORTED_DOMAINS];
};
+/* This block state is unknown */
+#define BLOCK_STATE_UNKNOWN (0)
/* This block was powered on for at least some portion of the sample */
#define BLOCK_STATE_ON (1 << 0)
/* This block was powered off for at least some portion of the sample */
@@ -336,10 +384,12 @@ struct prfcnt_clock_metadata {
* @block_type: Type of performance counter block.
* @block_idx: Index of performance counter block.
* @set: Set of performance counter block.
+ * @pad_u8: Padding bytes.
* @block_state: Bits set indicate the states which the block is known
* to have operated in during this sample.
* @values_offset: Offset from the start of the mmapped region, to the values
* for this block. The values themselves are an array of __u64.
+ * @pad_u32: Padding bytes.
*/
struct prfcnt_block_metadata {
__u8 block_type;
@@ -351,6 +401,14 @@ struct prfcnt_block_metadata {
__u32 pad_u32;
};
+/**
+ * struct prfcnt_metadata - Performance counter metadata item.
+ * @hdr: Header describing the type of item in the list.
+ * @u: Structure containing descriptor for metadata type.
+ * @u.sample_md: Counter sample data metadata descriptor.
+ * @u.clock_md: Clock cycles metadata descriptor.
+ * @u.block_md: Counter block metadata descriptor.
+ */
struct prfcnt_metadata {
struct prfcnt_item_header hdr;
union {
@@ -360,5 +418,67 @@ struct prfcnt_metadata {
} u;
};
+/**
+ * enum prfcnt_control_cmd_code - Control command code for client session.
+ * @PRFCNT_CONTROL_CMD_START: Start the counter data dump run for
+ * the calling client session.
+ * @PRFCNT_CONTROL_CMD_STOP: Stop the counter data dump run for the
+ * calling client session.
+ * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC: Trigger a synchronous manual sample.
+ * @PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: Trigger an asynchronous manual sample.
+ * @PRFCNT_CONTROL_CMD_DISCARD: Discard all samples which have not yet
+ * been consumed by userspace. Note that
+ * this can race with new samples if
+ * HWC_CMD_STOP is not called first.
+ */
+enum prfcnt_control_cmd_code {
+ PRFCNT_CONTROL_CMD_START = 1,
+ PRFCNT_CONTROL_CMD_STOP,
+ PRFCNT_CONTROL_CMD_SAMPLE_SYNC,
+ PRFCNT_CONTROL_CMD_SAMPLE_ASYNC,
+ PRFCNT_CONTROL_CMD_DISCARD,
+};
+
+/** struct prfcnt_control_cmd - Control command
+ * @cmd: Control command for the session.
+ * @pad: Padding bytes.
+ * @user_data: Pointer to user data, which will be returned as part of
+ * sample metadata. It only affects a single sample if used
+ * with CMD_SAMPLE_SYNC or CMD_SAMPLE_ASYNC. It affects all
+ * samples between CMD_START and CMD_STOP if used with the
+ * periodic sampling.
+ */
+struct prfcnt_control_cmd {
+ __u16 cmd;
+ __u16 pad[3];
+ __u64 user_data;
+};
+
+/** struct prfcnt_sample_access - Metadata to access a sample.
+ * @sequence: Sequence number for the sample.
+ * For GET_SAMPLE, it will be set by the kernel.
+ * For PUT_SAMPLE, it shall be equal to the same value
+ * provided by the kernel for GET_SAMPLE.
+ * @sample_offset_bytes: Offset from the start of the mapped area to the first
+ * entry in the metadata list (sample_metadata) for this
+ * sample.
+ */
+struct prfcnt_sample_access {
+ __u64 sequence;
+ __u64 sample_offset_bytes;
+};
+
+/* The ids of ioctl commands, on a reader file descriptor, magic number */
+#define KBASE_KINSTR_PRFCNT_READER 0xBF
+/* Ioctl ID for issuing a session operational command */
+#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \
+ _IOW(KBASE_KINSTR_PRFCNT_READER, 0x00, struct prfcnt_control_cmd)
+/* Ioctl ID for fetching a dumpped sample */
+#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \
+ _IOR(KBASE_KINSTR_PRFCNT_READER, 0x01, struct prfcnt_sample_access)
+/* Ioctl ID for release internal buffer of the previously fetched sample */
+#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \
+ _IOW(KBASE_KINSTR_PRFCNT_READER, 0x10, struct prfcnt_sample_access)
+
#endif /* _UAPI_KBASE_HWCNT_READER_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
index 8e1ed55..63dd3c8 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -171,34 +171,6 @@ struct kbase_ioctl_hwcnt_reader_setup {
_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
/**
- * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
- * @dump_buffer: GPU address to write counters to
- * @fe_bm: counters selection bitmask (Front end)
- * @shader_bm: counters selection bitmask (Shader)
- * @tiler_bm: counters selection bitmask (Tiler)
- * @mmu_l2_bm: counters selection bitmask (MMU_L2)
- */
-struct kbase_ioctl_hwcnt_enable {
- __u64 dump_buffer;
- __u32 fe_bm;
- __u32 shader_bm;
- __u32 tiler_bm;
- __u32 mmu_l2_bm;
-};
-
-/* This IOCTL is deprecated as of R33, and will be removed in R35. */
-#define KBASE_IOCTL_HWCNT_ENABLE \
- _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
-
-/* This IOCTL is deprecated as of R33, and will be removed in R35. */
-#define KBASE_IOCTL_HWCNT_DUMP \
- _IO(KBASE_IOCTL_TYPE, 10)
-
-/* This IOCTL is deprecated as of R33, and will be removed in R35. */
-#define KBASE_IOCTL_HWCNT_CLEAR \
- _IO(KBASE_IOCTL_TYPE, 11)
-
-/**
* struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
* @data: Counter samples for the dummy model.
* @size: Size of the counter sample data.
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index e253f1c..afc0f83 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -71,7 +71,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r34p0-00dev1"'
+MALI_RELEASE_NAME ?= '"r35p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1
@@ -82,8 +82,6 @@ else
endif
MALI_COVERAGE ?= 0
-CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
-
# Kconfig passes in the name with quotes for in-tree builds - remove them.
MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
@@ -122,7 +120,6 @@ ccflags-y = \
-DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \
-DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
-DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \
- -DMALI_KBASE_BUILD \
-DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR)
@@ -166,7 +163,6 @@ mali_kbase-y := \
mali_kbase_hwcnt.o \
mali_kbase_hwcnt_gpu.o \
mali_kbase_hwcnt_gpu_narrow.o \
- mali_kbase_hwcnt_legacy.o \
mali_kbase_hwcnt_types.o \
mali_kbase_hwcnt_virtualizer.o \
mali_kbase_softjobs.o \
@@ -206,6 +202,7 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
mali_kbase-y += \
mali_kbase_hwcnt_backend_csf.o \
+ mali_kbase_hwcnt_watchdog_if_timer.o \
mali_kbase_hwcnt_backend_csf_if_fw.o
else
mali_kbase-y += \
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 099da33..850b257 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -34,10 +34,19 @@ endif
CONFIG_MALI_MIDGARD ?= m
ifeq ($(CONFIG_MALI_MIDGARD),m)
+ CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
CONFIG_MALI_GATOR_SUPPORT ?= y
CONFIG_MALI_ARBITRATION ?= n
CONFIG_MALI_PARTITION_MANAGER ?= n
+ ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined)
+ CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION)
+ endif
+
+ ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined)
+ CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION)
+ endif
+
ifneq ($(CONFIG_MALI_NO_MALI),y)
# Prevent misuse when CONFIG_MALI_NO_MALI=y
CONFIG_MALI_REAL_HW ?= y
@@ -135,6 +144,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_MIDGARD=n
CONFIG_MALI_ARBITRATION = n
+ CONFIG_MALI_ARBITER_MODULES = n
+ CONFIG_MALI_GPU_POWER_MODULES = n
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
@@ -148,6 +159,8 @@ CONFIGS := \
CONFIG_MALI_DMA_FENCE \
CONFIG_MALI_ARBITER_SUPPORT \
CONFIG_MALI_ARBITRATION \
+ CONFIG_MALI_ARBITER_MODULES \
+ CONFIG_MALI_GPU_POWER_MODULES \
CONFIG_MALI_PARTITION_MANAGER \
CONFIG_MALI_REAL_HW \
CONFIG_MALI_GEM5_BUILD \
@@ -191,6 +204,8 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \
$(value config)=$(value $(value config)), \
$(value config)=n))
+MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+
#
# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build
#
@@ -201,6 +216,8 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
$(if $(filter y m,$(value $(value config))), \
-D$(value config)=1))
+EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+
#
# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
#
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
index 62ff4fd..5425f2b 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
@@ -1053,8 +1053,8 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
mutex_lock(&arb_freq->arb_freq_lock);
if (arb_freq->arb_freq != freq) {
- ndata.new_rate = freq * KHZ_TO_HZ;
- ndata.old_rate = arb_freq->arb_freq * KHZ_TO_HZ;
+ ndata.new_rate = (unsigned long)freq * KHZ_TO_HZ;
+ ndata.old_rate = (unsigned long)arb_freq->arb_freq * KHZ_TO_HZ;
ndata.gpu_clk_handle = arb_freq;
arb_freq->arb_freq = freq;
arb_freq->freq_updated = true;
diff --git a/mali_kbase/arbitration/Kconfig b/mali_kbase/arbitration/Kconfig
index b4d6202..1935c81 100644
--- a/mali_kbase/arbitration/Kconfig
+++ b/mali_kbase/arbitration/Kconfig
@@ -27,5 +27,23 @@ config MALI_XEN
virtualization setup for Mali
If unsure, say N.
+config MALI_ARBITER_MODULES
+ tristate "Enable mali arbiter modules"
+ depends on MALI_ARBITRATION
+ default y
+ help
+ Enables the build of the arbiter modules used in the reference
+ virtualization setup for Mali
+ If unsure, say N
+
+config MALI_GPU_POWER_MODULES
+ tristate "Enable gpu power modules"
+ depends on MALI_ARBITRATION
+ default y
+ help
+ Enables the build of the gpu power modules used in the reference
+ virtualization setup for Mali
+ If unsure, say N
+
source "drivers/gpu/arm/midgard/arbitration/ptm/Kconfig"
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 5dbcff3..90bf6cd 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -47,3 +47,8 @@ endif
mali_kbase-$(CONFIG_MALI_DEVFREQ) += \
backend/gpu/mali_kbase_devfreq.o
+# Dummy model
+mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
+mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
+# HW error simulation
+mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
index df30b63..a6ee959 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -64,13 +64,12 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
* kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can
* run as using the device tree, and save this
* within kbdev.
+ * @kbdev: Pointer to kbase device.
*
* This function could be called from kbase_clk_rate_trace_manager_init,
* but is left separate as it can be called as soon as
* dev_pm_opp_of_add_table() has been called to initialize the OPP table.
*
- * @kbdev: Pointer to kbase device.
- *
* Return: 0 in any case.
*/
int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev);
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index b117e57..a7110b3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -43,7 +43,7 @@
* This function will be called only when the opp table which is compatible with
* "operating-points-v2-mali", is not present in the devicetree for GPU device.
*
- * Return: Voltage value in milli volts, 0 in case of error.
+ * Return: Voltage value in micro volts, 0 in case of error.
*/
static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
{
@@ -69,8 +69,8 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
rcu_read_unlock();
#endif
- /* Return the voltage in milli volts */
- return voltage / 1000;
+ /* Return the voltage in micro volts */
+ return voltage;
}
void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
@@ -116,6 +116,9 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
struct dev_pm_opp *opp;
unsigned long nominal_freq;
unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
+#if IS_ENABLED(CONFIG_REGULATOR)
+ unsigned long original_freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
+#endif
unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
unsigned int i;
u64 core_mask;
@@ -187,6 +190,9 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
err = clk_set_rate(kbdev->clocks[i], freqs[i]);
if (!err) {
+#if IS_ENABLED(CONFIG_REGULATOR)
+ original_freqs[i] = kbdev->current_freqs[i];
+#endif
kbdev->current_freqs[i] = freqs[i];
} else {
dev_err(dev, "Failed to set clock %lu (target %lu)\n",
@@ -200,7 +206,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
for (i = 0; i < kbdev->nr_clocks; i++) {
if (kbdev->regulators[i] &&
kbdev->current_voltages[i] != volts[i] &&
- kbdev->current_freqs[i] > freqs[i]) {
+ original_freqs[i] > freqs[i]) {
int err;
err = regulator_set_voltage(kbdev->regulators[i],
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.h b/mali_kbase/backend/gpu/mali_kbase_devfreq.h
index 901827e..ac88b02 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.h
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.h
@@ -55,6 +55,7 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
* This function will only perform translation if an operating-points-v2-mali
* table is present in devicetree. If one is not present then it will return an
* untranslated frequency (and corresponding voltage) and all cores enabled.
+ * The voltages returned are in micro Volts (uV).
*/
void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
u64 *core_mask, unsigned long *freqs, unsigned long *volts);
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
index 7b04286..268a888 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -46,10 +46,10 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
registers.core_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(CORE_FEATURES));
#else /* !MALI_USE_CSF */
- if (((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
- GPU_ID2_PRODUCT_TGRX) ||
- ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
- GPU_ID2_PRODUCT_TVAX))
+ if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+ GPU_ID2_PRODUCT_TDUX) ||
+ ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+ GPU_ID2_PRODUCT_TODX)))
registers.core_features =
kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
#endif /* MALI_USE_CSF */
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 90cc537..1691a87 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -119,29 +119,62 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
return err;
}
+static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
+{
+ u32 irq_mask;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&kbdev->hwcnt.lock);
+
+ if (kbase_is_gpu_removed(kbdev))
+ /* GPU has been removed by Arbiter */
+ return;
+
+ /* Disable interrupt */
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
+
+ /* Disable the counters */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
+
+ kbdev->hwcnt.kctx = NULL;
+ kbdev->hwcnt.addr = 0ULL;
+ kbdev->hwcnt.addr_bytes = 0ULL;
+}
+
int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
- u32 irq_mask;
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
+ /* Instrumentation is in unrecoverable error state,
+ * there is nothing for us to do.
+ */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+ /* Already disabled, return no error. */
+ return 0;
+ }
+
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- goto out;
+ return err;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- goto out;
+ return err;
}
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
@@ -158,25 +191,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
kbdev->hwcnt.backend.triggered = 0;
- if (kbase_is_gpu_removed(kbdev)) {
- /* GPU has been removed by Arbiter */
- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- err = 0;
- goto out;
- }
-
- /* Disable interrupt */
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
-
- /* Disable the counters */
- kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
-
- kbdev->hwcnt.kctx = NULL;
- kbdev->hwcnt.addr = 0ULL;
- kbdev->hwcnt.addr_bytes = 0ULL;
+ kbasep_instr_hwc_disable_hw_prfcnt(kbdev);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
@@ -184,9 +199,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK",
kctx);
- err = 0;
- out:
- return err;
+ return 0;
}
int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
@@ -204,7 +217,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
/* HW counters are disabled or another dump is ongoing, or we're
- * resetting
+ * resetting, or we are in unrecoverable error state.
*/
goto unlock;
}
@@ -274,6 +287,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ /* If the state is in unrecoverable error, we already wake_up the waiter
+ * and don't need to do any action when sample is done.
+ */
+
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
@@ -302,6 +319,8 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
err = -EINVAL;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
+ err = -EIO;
} else {
/* Dump done */
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
@@ -322,8 +341,8 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- /* Check it's the context previously set up and we're not already
- * dumping
+ /* Check it's the context previously set up and we're not in IDLE
+ * state.
*/
if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_IDLE)
@@ -347,6 +366,48 @@ out:
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ /* If we already in unrecoverable error state, early return. */
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ return;
+ }
+
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR;
+
+ /* Need to disable HW if it's not disabled yet. */
+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED)
+ kbasep_instr_hwc_disable_hw_prfcnt(kbdev);
+
+ /* Wake up any waiters. */
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_unrecoverable_error);
+
+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ /* A reset is the only way to exit the unrecoverable error state */
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR)
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_before_reset);
+
int kbase_instr_backend_init(struct kbase_device *kbdev)
{
spin_lock_init(&kbdev->hwcnt.lock);
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
index e356348..7190f42 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
@@ -38,8 +38,12 @@ enum kbase_instr_state {
KBASE_INSTR_STATE_IDLE,
/* Hardware is currently dumping a frame. */
KBASE_INSTR_STATE_DUMPING,
- /* An error has occured during DUMPING (page fault). */
- KBASE_INSTR_STATE_FAULT
+ /* An error has occurred during DUMPING (page fault). */
+ KBASE_INSTR_STATE_FAULT,
+ /* An unrecoverable error has occurred, a reset is the only way to exit
+ * from unrecoverable error state.
+ */
+ KBASE_INSTR_STATE_UNRECOVERABLE_ERROR,
};
/* Structure used for instrumentation and HW counters dumping */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
index e29ace7..3ce3903 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
@@ -38,10 +38,31 @@ struct rb_entry {
struct kbase_jd_atom *katom;
};
+/* SLOT_RB_TAG_PURGED assumes a value that is different from
+ * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of
+ * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x).
+ */
+#define SLOT_RB_TAG_PURGED ((u64)(1 << 1))
+#define SLOT_RB_NULL_TAG_VAL ((u64)0)
+
+/**
+ * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a
+ * u64 for serving as tagged value.
+ */
+#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx))
/**
* struct slot_rb - Slot ringbuffer
* @entries: Ringbuffer entries
- * @last_context: The last context to submit a job on this slot
+ * @last_kctx_tagged: The last context that submitted a job to the slot's
+ * HEAD_NEXT register. The value is a tagged variant so
+ * must not be dereferenced. It is used in operation to
+ * track when shader core L1 caches might contain a
+ * previous context's data, and so must only be set to
+ * SLOT_RB_NULL_TAG_VAL after reset/powerdown of the
+ * cores. In slot job submission, if there is a kctx
+ * change, and the relevant katom is configured with
+ * BASE_JD_REQ_SKIP_CACHE_START, a L1 read only cache
+ * maintenace operation is enforced.
* @read_idx: Current read index of buffer
* @write_idx: Current write index of buffer
* @job_chain_flag: Flag used to implement jobchain disambiguation
@@ -49,7 +70,7 @@ struct rb_entry {
struct slot_rb {
struct rb_entry entries[SLOT_RB_SIZE];
- struct kbase_context *last_context;
+ u64 last_kctx_tagged;
u8 read_idx;
u8 write_idx;
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 001efd9..ec3b906 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -33,6 +33,7 @@
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_kinstr_jm.h>
+#include <mali_kbase_hwaccess_instr.h>
#include <mali_kbase_hwcnt_context.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -198,7 +199,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
u32 cfg;
u64 const jc_head = select_job_chain(katom);
u64 affinity;
+ struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];
+ lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_DEBUG_ASSERT(kbdev);
KBASE_DEBUG_ASSERT(katom);
@@ -227,9 +230,23 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
- if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
- cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
- else
+ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) {
+ /* Force a cache maintenance operation if the newly submitted
+ * katom to the slot is from a different kctx. For a JM GPU
+ * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+ * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a
+ * FLUSH_CLEAN_INVALIDATE.
+ */
+ u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged;
+
+ if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) {
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER))
+ cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER;
+ else
+ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+ } else
+ cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+ } else
cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
@@ -246,13 +263,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
(katom->core_req & BASE_JD_REQ_END_RENDERPASS))
cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
- if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+ if (!ptr_slot_rb->job_chain_flag) {
cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
- kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = true;
+ ptr_slot_rb->job_chain_flag = true;
} else {
katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
- kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = false;
+ ptr_slot_rb->job_chain_flag = false;
}
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
@@ -290,6 +307,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
&kbdev->gpu_props.props.raw_props.js_features[js],
"ctx_nr,atom_nr");
kbase_kinstr_jm_atom_hw_submit(katom);
+
+ /* Update the slot's last katom submission kctx */
+ ptr_slot_rb->last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx);
+
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
/* If this is the only job on the slot, trace it as starting */
@@ -300,7 +321,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
sizeof(js_string)),
ktime_to_ns(katom->start_timestamp),
(u32)katom->kctx->id, 0, katom->work_id);
- kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
}
#endif
@@ -823,7 +843,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
if (timeout != 0)
goto exit;
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) {
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) {
dev_err(kbdev->dev,
"Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
ZAP_TIMEOUT);
@@ -938,6 +958,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
target_katom,
JS_COMMAND_HARD_STOP);
+ CSTD_UNUSED(stopped);
}
/**
@@ -1177,6 +1198,13 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
kbase_pm_metrics_update(kbdev, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ /* Tell hardware counters a reset is about to occur.
+ * If the instr backend is in an unrecoverable error state (e.g. due to
+ * HW being unresponsive), this will transition the backend out of
+ * it, on the assumption a reset will fix whatever problem there was.
+ */
+ kbase_instr_hwcnt_on_before_reset(kbdev);
+
/* Reset the GPU */
kbase_pm_init_hw(kbdev, 0);
@@ -1309,7 +1337,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
* @kbdev: kbase device
* @flags: Bitfield indicating impact of reset (see flag defines)
*
- * This function just soft-stops all the slots to ensure that as many jobs as
+ * This function soft-stops all the slots to ensure that as many jobs as
* possible are saved.
*
* Return:
@@ -1323,7 +1351,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
{
int i;
- CSTD_UNUSED(flags);
KBASE_DEBUG_ASSERT(kbdev);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
@@ -1335,6 +1362,9 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
}
#endif
+ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
+ kbase_instr_hwcnt_on_unrecoverable_error(kbdev);
+
if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_NOT_PENDING,
KBASE_RESET_GPU_PREPARED) !=
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 1906286..0f2f296 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -760,6 +760,13 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
/* ***TRANSITION TO HIGHER STATE*** */
fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_RESET:
+ /* L2 cache has been turned off (which is needed prior to the reset of GPU
+ * to exit the protected mode), so the override flag can be safely cleared.
+ * Even if L2 cache is powered up again before the actual reset, it should
+ * not be an issue (there are no jobs running on the GPU).
+ */
+ kbase_pm_protected_override_disable(kbdev);
+
/* Issue the reset to the GPU */
err = kbase_gpu_protected_mode_reset(kbdev);
@@ -768,7 +775,6 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
if (err) {
kbdev->protected_mode_transition = false;
- kbase_pm_protected_override_disable(kbdev);
/* Failed to exit protected mode, fail atom */
katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
@@ -1069,9 +1075,9 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
/**
* kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
* related to a failed JSn_HEAD atom
- * @kbdev kbase device
- * @js job slot to check
- * @completion_code completion code of the failed atom
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the failed atom
*
* Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
* unlike other failure codes we _can_ re-run them.
@@ -1129,6 +1135,14 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
if (next_katom->core_req & BASE_JD_REQ_PERMON)
kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+ /* On evicting the next_katom, the last submission kctx on the
+ * given job slot then reverts back to the one that owns katom.
+ * The aim is to enable the next submission that can determine
+ * if the read only shader core L1 cache should be invalidated.
+ */
+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+ SLOT_RB_TAG_KCTX(katom->kctx);
+
return true;
}
@@ -1137,11 +1151,11 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
/**
* kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD
- * @kbdev kbase device
- * @js job slot to check
- * @completion_code completion code of the completed atom
- * @job_tail value read from JSn_TAIL, for STOPPED atoms
- * @end_timestamp pointer to approximate ktime value when the katom completed
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the completed atom
+ * @job_tail: value read from JSn_TAIL, for STOPPED atoms
+ * @end_timestamp: pointer to approximate ktime value when the katom completed
*
* Among other operations, this also executes step 2 of a 2-step process of
* removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1),
@@ -1323,8 +1337,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
ktime_to_ns(*end_timestamp),
(u32)next_katom->kctx->id, 0,
next_katom->work_id);
- kbdev->hwaccess.backend.slot_rb[js].last_context =
- next_katom->kctx;
} else {
char js_string[16];
@@ -1333,7 +1345,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
sizeof(js_string)),
ktime_to_ns(ktime_get()), 0, 0,
0);
- kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
}
}
#endif
@@ -1427,6 +1438,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
kbase_jm_complete(kbdev, katom, end_timestamp);
}
+
+ /* Clear the slot's last katom submission kctx on reset */
+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL;
}
/* Re-enable GPU hardware counters if we're resetting from protected
@@ -1649,6 +1663,11 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
kbase_gpu_remove_atom(kbdev,
katom_idx1,
action, true);
+ /* Revert the last_context. */
+ kbdev->hwaccess.backend.slot_rb[js]
+ .last_kctx_tagged =
+ SLOT_RB_TAG_KCTX(katom_idx0->kctx);
+
stop_x_dep_idx1 =
should_stop_x_dep_slot(katom_idx1);
@@ -1724,6 +1743,10 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
kbase_gpu_remove_atom(kbdev, katom_idx1,
action,
false);
+ /* Revert the last_context, or mark as purged */
+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+ kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) :
+ SLOT_RB_TAG_PURGED;
} else {
/* idx0 has already completed - stop
* idx1
@@ -1753,7 +1776,8 @@ void kbase_backend_cache_clean(struct kbase_device *kbdev,
struct kbase_jd_atom *katom)
{
if (katom->need_cache_flush_cores_retained) {
- kbase_gpu_start_cache_clean(kbdev);
+ kbase_gpu_start_cache_clean(kbdev,
+ GPU_COMMAND_CACHE_CLN_INV_FULL);
kbase_gpu_wait_cache_clean(kbdev);
katom->need_cache_flush_cores_retained = false;
@@ -1811,3 +1835,34 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
+
+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ int js;
+ bool tracked = false;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged;
+
+ if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) {
+ /* Marking the slot kctx tracking field is purged */
+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED;
+ tracked = true;
+ }
+ }
+
+ if (tracked) {
+ /* The context had run some jobs before the purge, other slots
+ * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as
+ * purged as well.
+ */
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged ==
+ SLOT_RB_NULL_TAG_VAL)
+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+ SLOT_RB_TAG_PURGED;
+ }
+ }
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
index 7131546..c2d7a26 100644
--- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -26,7 +26,7 @@
#include "mali_kbase_l2_mmu_config.h"
/**
- * struct l2_mmu_config_limit_region
+ * struct l2_mmu_config_limit_region - L2 MMU limit field
*
* @value: The default value to load into the L2_MMU_CONFIG register
* @mask: The shifted mask of the field in the L2_MMU_CONFIG register
@@ -39,7 +39,7 @@ struct l2_mmu_config_limit_region {
};
/**
- * struct l2_mmu_config_limit
+ * struct l2_mmu_config_limit - L2 MMU read and write limit
*
* @product_model: The GPU for which this entry applies
* @read: Values for the read limit field
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
new file mode 100644
index 0000000..ccf0e7c
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -0,0 +1,2008 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/* NOTES:
+ * - A default GPU can be compiled in during the build, by defining
+ * CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that
+ * insmod'ing mali_kbase.ko with no arguments after a build with "scons
+ * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be
+ * overridden by passing the 'no_mali_gpu' argument to insmod.
+ *
+ * - if CONFIG_MALI_ERROR_INJECT is defined the error injection system is
+ * activated.
+ */
+
+/* Implementation of failure injection system:
+ *
+ * Error conditions are generated by gpu_generate_error().
+ * According to CONFIG_MALI_ERROR_INJECT definition gpu_generate_error() either
+ * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or
+ * checks if there is (in error_track_list) an error configuration to be set for
+ * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined).
+ * Each error condition will trigger a specific "state" for a certain set of
+ * registers as per Midgard Architecture Specifications doc.
+ *
+ * According to Midgard Architecture Specifications doc the following registers
+ * are always affected by error conditions:
+ *
+ * JOB Exception:
+ * JOB_IRQ_RAWSTAT
+ * JOB<n> STATUS AREA
+ *
+ * MMU Exception:
+ * MMU_IRQ_RAWSTAT
+ * AS<n>_FAULTSTATUS
+ * AS<n>_FAULTADDRESS
+ *
+ * GPU Exception:
+ * GPU_IRQ_RAWSTAT
+ * GPU_FAULTSTATUS
+ * GPU_FAULTADDRESS
+ *
+ * For further clarification on the model behaviour upon specific error
+ * conditions the user may refer to the Midgard Architecture Specification
+ * document
+ */
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_regmap.h>
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <mali_kbase_mem_linux.h>
+
+#if MALI_USE_CSF
+#include <csf/mali_kbase_csf_firmware.h>
+
+/* Index of the last value register for each type of core, with the 1st value
+ * register being at index 0.
+ */
+#define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1)
+
+/* Array for storing the value of SELECT register for each type of core */
+static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM];
+static bool ipa_control_timer_enabled;
+#endif
+
+#define LO_MASK(M) ((M) & 0xFFFFFFFF)
+
+static u32 get_implementation_register(u32 reg)
+{
+ switch (reg) {
+ case GPU_CONTROL_REG(SHADER_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
+ case GPU_CONTROL_REG(TILER_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
+ case GPU_CONTROL_REG(L2_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
+ case GPU_CONTROL_REG(STACK_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
+
+ case GPU_CONTROL_REG(SHADER_PRESENT_HI):
+ case GPU_CONTROL_REG(TILER_PRESENT_HI):
+ case GPU_CONTROL_REG(L2_PRESENT_HI):
+ case GPU_CONTROL_REG(STACK_PRESENT_HI):
+ /* *** FALLTHROUGH *** */
+ default:
+ return 0;
+ }
+}
+
+struct {
+ unsigned long prfcnt_base;
+ u32 *prfcnt_base_cpu;
+ struct kbase_device *kbdev;
+ struct tagged_addr *pages;
+ size_t page_count;
+
+ u32 time;
+
+ struct {
+ u32 jm;
+ u32 tiler;
+ u32 l2;
+ u32 shader;
+ } prfcnt_en;
+
+ u64 l2_present;
+ u64 shader_present;
+
+#if !MALI_USE_CSF
+ u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#else
+ u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#endif /* !MALI_USE_CSF */
+ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+ u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+ u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+
+} performance_counters = {
+ .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+};
+
+struct job_slot {
+ int job_active;
+ int job_queued;
+ int job_complete_irq_asserted;
+ int job_irq_mask;
+ int job_disabled;
+};
+
+/**
+ * struct control_reg_values_t - control register values specific to the GPU being 'emulated'
+ * @name: GPU name
+ * @gpu_id: GPU ID to report
+ * @as_present: Bitmap of address spaces present
+ * @thread_max_threads: Maximum number of threads per core
+ * @thread_max_workgroup_size: Maximum number of threads per workgroup
+ * @thread_max_barrier_size: Maximum number of threads per barrier
+ * @thread_features: Thread features, NOT INCLUDING the 2
+ * most-significant bits, which are always set to
+ * IMPLEMENTATION_MODEL.
+ * @core_features: Core features
+ * @tiler_features: Tiler features
+ * @mmu_features: MMU features
+ * @gpu_features_lo: GPU features (low)
+ * @gpu_features_hi: GPU features (high)
+ */
+struct control_reg_values_t {
+ const char *name;
+ u32 gpu_id;
+ u32 as_present;
+ u32 thread_max_threads;
+ u32 thread_max_workgroup_size;
+ u32 thread_max_barrier_size;
+ u32 thread_features;
+ u32 core_features;
+ u32 tiler_features;
+ u32 mmu_features;
+ u32 gpu_features_lo;
+ u32 gpu_features_hi;
+};
+
+struct dummy_model_t {
+ int reset_completed;
+ int reset_completed_mask;
+ int prfcnt_sample_completed;
+ int power_changed_mask; /* 2bits: _ALL,_SINGLE */
+ int power_changed; /* 1bit */
+ bool clean_caches_completed;
+ bool clean_caches_completed_irq_enabled;
+ int power_on; /* 6bits: SHADER[4],TILER,L2 */
+ u32 stack_power_on_lo;
+ u32 coherency_enable;
+ unsigned int job_irq_js_state;
+ struct job_slot slots[NUM_SLOTS];
+ const struct control_reg_values_t *control_reg_values;
+ u32 l2_config;
+ void *data;
+};
+
+void gpu_device_set_data(void *model, void *data)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+
+ dummy->data = data;
+}
+
+void *gpu_device_get_data(void *model)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+
+ return dummy->data;
+}
+
+#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
+
+/* SCons should pass in a default GPU, but other ways of building (e.g.
+ * in-tree) won't, so define one here in case.
+ */
+#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
+#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
+#endif
+
+static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
+module_param(no_mali_gpu, charp, 0000);
+MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
+
+/* Construct a value for the THREAD_FEATURES register, *except* the two most
+ * significant bits, which are set to IMPLEMENTATION_MODEL in
+ * midgard_model_read_reg().
+ */
+#if MALI_USE_CSF
+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
+#else
+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
+#endif
+
+/* Array associating GPU names with control register values. The first
+ * one is used in the case of no match.
+ */
+static const struct control_reg_values_t all_control_reg_values[] = {
+ {
+ .name = "tMIx",
+ .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tHEx",
+ .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tSIx",
+ .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x300,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .tiler_features = 0x209,
+ .mmu_features = 0x2821,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tDVx",
+ .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x300,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .tiler_features = 0x209,
+ .mmu_features = 0x2821,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tNOx",
+ .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tGOx_r0p0",
+ .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tGOx_r1p0",
+ .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
+ .core_features = 0x2,
+ .tiler_features = 0x209,
+ .mmu_features = 0x2823,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tTRx",
+ .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tNAx",
+ .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tBEx",
+ .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tBAx",
+ .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tDUx",
+ .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tODx",
+ .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tGRx",
+ .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .core_features = 0x0, /* core_1e16fma2tex */
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tVAx",
+ .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x180,
+ .thread_max_workgroup_size = 0x180,
+ .thread_max_barrier_size = 0x180,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
+ .core_features = 0x0, /* core_1e16fma2tex */
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0,
+ .gpu_features_hi = 0,
+ },
+ {
+ .name = "tTUx",
+ .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x800,
+ .thread_max_workgroup_size = 0x400,
+ .thread_max_barrier_size = 0x400,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
+ .core_features = 0x0, /* core_1e32fma2tex */
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0xf,
+ .gpu_features_hi = 0,
+ },
+};
+
+struct error_status_t hw_error_status;
+
+#if MALI_USE_CSF
+static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
+ u32 cnt_idx, bool is_low_word)
+{
+ u64 *counters_data;
+ u32 core_count = 0;
+ u32 event_index;
+ u64 value = 0;
+ u32 core;
+
+ if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
+ return 0;
+
+ if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))
+ return 0;
+
+ event_index =
+ (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
+
+ /* Currently only primary counter blocks are supported */
+ if (WARN_ON(event_index >= 64))
+ return 0;
+
+ /* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
+ * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
+ * IPA counters. If selected, the value returned for them will be zero.
+ */
+ if (WARN_ON(event_index <= 3))
+ return 0;
+
+ event_index -= 4;
+
+ switch (core_type) {
+ case KBASE_IPA_CORE_TYPE_CSHW:
+ core_count = 1;
+ counters_data = performance_counters.cshw_counters;
+ break;
+ case KBASE_IPA_CORE_TYPE_MEMSYS:
+ core_count = hweight64(performance_counters.l2_present);
+ counters_data = performance_counters.l2_counters;
+ break;
+ case KBASE_IPA_CORE_TYPE_TILER:
+ core_count = 1;
+ counters_data = performance_counters.tiler_counters;
+ break;
+ case KBASE_IPA_CORE_TYPE_SHADER:
+ core_count = hweight64(performance_counters.shader_present);
+ counters_data = performance_counters.shader_counters;
+ break;
+ default:
+ WARN(1, "Invalid core_type %d\n", core_type);
+ break;
+ }
+
+ for (core = 0; core < core_count; core++) {
+ value += counters_data[event_index];
+ event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
+ }
+
+ if (is_low_word)
+ return (value & U32_MAX);
+ else
+ return (value >> 32);
+}
+
+void gpu_model_clear_prfcnt_values(void)
+{
+ memset(performance_counters.cshw_counters, 0,
+ sizeof(performance_counters.cshw_counters));
+
+ memset(performance_counters.tiler_counters, 0,
+ sizeof(performance_counters.tiler_counters));
+
+ memset(performance_counters.l2_counters, 0,
+ sizeof(performance_counters.l2_counters));
+
+ memset(performance_counters.shader_counters, 0,
+ sizeof(performance_counters.shader_counters));
+}
+KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
+#endif
+
+/**
+ * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
+ *
+ * @values: Array of values to be written out
+ * @out_index: Index into performance counter buffer
+ * @block_count: Number of blocks to dump
+ * @prfcnt_enable_mask: Counter enable mask
+ * @blocks_present: Available blocks bit mask
+ */
+static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
+ u32 block_count,
+ u32 prfcnt_enable_mask,
+ u64 blocks_present)
+{
+ u32 block_idx, counter;
+ u32 counter_value = 0;
+ u32 *prfcnt_base;
+ u32 index = 0;
+
+ prfcnt_base = performance_counters.prfcnt_base_cpu;
+
+ for (block_idx = 0; block_idx < block_count; block_idx++) {
+ /* only dump values if core is present */
+ if (!(blocks_present & (1 << block_idx))) {
+#if MALI_USE_CSF
+ /* if CSF dump zeroed out block */
+ memset(&prfcnt_base[*out_index], 0,
+ KBASE_DUMMY_MODEL_BLOCK_SIZE);
+ *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK;
+#endif /* MALI_USE_CSF */
+ continue;
+ }
+
+ /* write the header */
+ prfcnt_base[*out_index] = performance_counters.time++;
+ prfcnt_base[*out_index+2] = prfcnt_enable_mask;
+ *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
+
+ /* write the counters */
+ for (counter = 0;
+ counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
+ counter++) {
+ /* HW counter values retrieved through
+ * PRFCNT_SAMPLE request are of 32 bits only.
+ */
+ counter_value = (u32)values[index++];
+ if (KBASE_DUMMY_MODEL_COUNTER_ENABLED(
+ prfcnt_enable_mask, (counter +
+ KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) {
+ prfcnt_base[*out_index + counter] =
+ counter_value;
+ }
+ }
+ *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
+ }
+}
+
+/**
+ * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values
+ *
+ * Used to ensure counter values are not lost if cache invalidation is performed
+ * prior to reading.
+ */
+static void gpu_model_sync_dummy_prfcnt(void)
+{
+ int i;
+ struct page *pg;
+
+ for (i = 0; i < performance_counters.page_count; i++) {
+ pg = as_page(performance_counters.pages[i]);
+ kbase_sync_single_for_device(performance_counters.kbdev,
+ kbase_dma_addr(pg), PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+}
+
+static void midgard_model_dump_prfcnt(void)
+{
+ u32 index = 0;
+
+#if !MALI_USE_CSF
+ gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index,
+ 1, 0xffffffff, 0x1);
+#else
+ gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index,
+ 1, 0xffffffff, 0x1);
+#endif /* !MALI_USE_CSF */
+ gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
+ &index, 1,
+ performance_counters.prfcnt_en.tiler,
+ DUMMY_IMPLEMENTATION_TILER_PRESENT);
+ gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index,
+ KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
+ performance_counters.prfcnt_en.l2,
+ performance_counters.l2_present);
+ gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters,
+ &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES,
+ performance_counters.prfcnt_en.shader,
+ performance_counters.shader_present);
+
+ gpu_model_sync_dummy_prfcnt();
+
+ /* simulate a 'long' time between samples */
+ performance_counters.time += 10;
+}
+
+static void init_register_statuses(struct dummy_model_t *dummy)
+{
+ int i;
+
+ hw_error_status.errors_mask = 0;
+ hw_error_status.gpu_error_irq = 0;
+ hw_error_status.gpu_fault_status = 0;
+ hw_error_status.job_irq_rawstat = 0;
+ hw_error_status.job_irq_status = 0;
+ hw_error_status.mmu_irq_rawstat = 0;
+ hw_error_status.mmu_irq_mask = 0;
+
+ for (i = 0; i < NUM_SLOTS; i++) {
+ hw_error_status.js_status[i] = 0;
+ hw_error_status.job_irq_rawstat |=
+ (dummy->slots[i].job_complete_irq_asserted) << i;
+ hw_error_status.job_irq_status |=
+ (dummy->slots[i].job_complete_irq_asserted) << i;
+ }
+ for (i = 0; i < NUM_MMU_AS; i++) {
+ hw_error_status.as_command[i] = 0;
+ hw_error_status.as_faultstatus[i] = 0;
+ hw_error_status.mmu_irq_mask |= 1 << i;
+ }
+
+ performance_counters.time = 0;
+}
+
+static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
+{
+ if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
+ if (job_slot == hw_error_status.current_job_slot) {
+#if !MALI_USE_CSF
+ if (hw_error_status.js_status[job_slot] == 0) {
+ /* status reg is clean; it can be written */
+
+ switch (hw_error_status.errors_mask &
+ IS_A_JOB_ERROR) {
+ case KBASE_JOB_INTERRUPTED:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INTERRUPTED;
+ break;
+
+ case KBASE_JOB_STOPPED:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_STOPPED;
+ break;
+
+ case KBASE_JOB_TERMINATED:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_TERMINATED;
+ break;
+
+ case KBASE_JOB_CONFIG_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_CONFIG_FAULT;
+ break;
+
+ case KBASE_JOB_POWER_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_POWER_FAULT;
+ break;
+
+ case KBASE_JOB_READ_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_READ_FAULT;
+ break;
+
+ case KBASE_JOB_WRITE_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_WRITE_FAULT;
+ break;
+
+ case KBASE_JOB_AFFINITY_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_AFFINITY_FAULT;
+ break;
+
+ case KBASE_JOB_BUS_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_BUS_FAULT;
+ break;
+
+ case KBASE_INSTR_INVALID_PC:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_INVALID_PC;
+ break;
+
+ case KBASE_INSTR_INVALID_ENC:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_INVALID_ENC;
+ break;
+
+ case KBASE_INSTR_TYPE_MISMATCH:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_TYPE_MISMATCH;
+ break;
+
+ case KBASE_INSTR_OPERAND_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_OPERAND_FAULT;
+ break;
+
+ case KBASE_INSTR_TLS_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_TLS_FAULT;
+ break;
+
+ case KBASE_INSTR_BARRIER_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_BARRIER_FAULT;
+ break;
+
+ case KBASE_INSTR_ALIGN_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_INSTR_ALIGN_FAULT;
+ break;
+
+ case KBASE_DATA_INVALID_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_DATA_INVALID_FAULT;
+ break;
+
+ case KBASE_TILE_RANGE_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_TILE_RANGE_FAULT;
+ break;
+
+ case KBASE_ADDR_RANGE_FAULT:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_ADDRESS_RANGE_FAULT;
+ break;
+
+ case KBASE_OUT_OF_MEMORY:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_OUT_OF_MEMORY;
+ break;
+
+ case KBASE_UNKNOWN:
+ hw_error_status.js_status[job_slot] =
+ JS_STATUS_UNKNOWN;
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "\nAtom Chain 0x%llx: Invalid Error Mask!",
+ hw_error_status.current_jc);
+ break;
+ }
+ }
+#endif /* !MALI_USE_CSF */
+
+ /* we set JOB_FAIL_<n> */
+ hw_error_status.job_irq_rawstat |=
+ (dummy->slots[job_slot].job_complete_irq_asserted) <<
+ (job_slot + 16);
+ hw_error_status.job_irq_status |=
+ (((dummy->slots[job_slot].job_complete_irq_asserted) <<
+ (job_slot)) &
+ (dummy->slots[job_slot].job_irq_mask <<
+ job_slot)) << 16;
+ } else {
+ hw_error_status.job_irq_rawstat |=
+ (dummy->slots[job_slot].job_complete_irq_asserted) <<
+ job_slot;
+ hw_error_status.job_irq_status |=
+ ((dummy->slots[job_slot].job_complete_irq_asserted) <<
+ (job_slot)) &
+ (dummy->slots[job_slot].job_irq_mask <<
+ job_slot);
+ }
+ } else {
+ hw_error_status.job_irq_rawstat |=
+ (dummy->slots[job_slot].job_complete_irq_asserted) <<
+ job_slot;
+ hw_error_status.job_irq_status |=
+ ((dummy->slots[job_slot].job_complete_irq_asserted) <<
+ (job_slot)) &
+ (dummy->slots[job_slot].job_irq_mask << job_slot);
+ } /* end of job register statuses */
+
+ if (hw_error_status.errors_mask & IS_A_MMU_ERROR) {
+ int i;
+
+ for (i = 0; i < NUM_MMU_AS; i++) {
+ if (i == hw_error_status.faulty_mmu_as) {
+ if (hw_error_status.as_faultstatus[i] == 0) {
+ u32 status =
+ hw_error_status.as_faultstatus[i];
+ /* status reg is clean; it can be
+ * written
+ */
+ switch (hw_error_status.errors_mask &
+ IS_A_MMU_ERROR) {
+ case KBASE_TRANSLATION_FAULT:
+ /* 0xCm means TRANSLATION FAULT
+ * (m is mmu_table_level)
+ */
+ status =
+ ((1 << 7) | (1 << 6) |
+ hw_error_status.mmu_table_level
+ );
+ break;
+
+ case KBASE_PERMISSION_FAULT:
+ /*0xC8 means PERMISSION FAULT */
+ status = ((1 << 7) | (1 << 6) |
+ (1 << 3));
+ break;
+
+ case KBASE_TRANSTAB_BUS_FAULT:
+ /* 0xDm means TRANSITION TABLE
+ * BUS FAULT (m is
+ * mmu_table_level)
+ */
+ status = ((1 << 7) | (1 << 6) |
+ (1 << 4) |
+ hw_error_status.mmu_table_level
+ );
+ break;
+
+ case KBASE_ACCESS_FLAG:
+ /* 0xD8 means ACCESS FLAG */
+ status = ((1 << 7) | (1 << 6) |
+ (1 << 4) | (1 << 3));
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "\nAtom Chain 0x%llx: Invalid Error Mask!",
+ hw_error_status.current_jc);
+ break;
+ }
+ hw_error_status.as_faultstatus[i] =
+ status;
+ }
+
+ if (hw_error_status.errors_mask &
+ KBASE_TRANSTAB_BUS_FAULT)
+ hw_error_status.mmu_irq_rawstat |=
+ 1 << (16 + i); /* bus error */
+ else
+ hw_error_status.mmu_irq_rawstat |=
+ 1 << i; /* page fault */
+ }
+ }
+ } /*end of mmu register statuses */
+ if (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
+ if (hw_error_status.gpu_fault_status) {
+ /* not the first GPU error reported */
+ hw_error_status.gpu_error_irq |= (1 << 7);
+ } else {
+ hw_error_status.gpu_error_irq |= 1;
+ switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
+ case KBASE_DELAYED_BUS_FAULT:
+ hw_error_status.gpu_fault_status = (1 << 7);
+ break;
+
+ case KBASE_SHAREABILITY_FAULT:
+ hw_error_status.gpu_fault_status = (1 << 7) |
+ (1 << 3);
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "\nAtom Chain 0x%llx: Invalid Error Mask!",
+ hw_error_status.current_jc);
+ break;
+ }
+ }
+ }
+ hw_error_status.errors_mask = 0; /*clear error mask */
+}
+
+#if !MALI_USE_CSF
+static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
+{
+ int i;
+
+ pr_debug("%s", "Updating the JS_ACTIVE register");
+
+ for (i = 0; i < NUM_SLOTS; i++) {
+ int slot_active = dummy->slots[i].job_active;
+ int next_busy = dummy->slots[i].job_queued;
+
+ if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) {
+ /* clear the bits we're updating */
+ dummy->job_irq_js_state &= ~((1 << (16 + i)) |
+ (1 << i));
+ if (hw_error_status.js_status[i]) {
+ dummy->job_irq_js_state |= next_busy <<
+ (i + 16);
+ if (mask & (1 << (i + 16))) {
+ /* clear job slot status */
+ hw_error_status.js_status[i] = 0;
+ /* continue execution of jobchain */
+ dummy->slots[i].job_active =
+ dummy->slots[i].job_queued;
+ }
+ } else {
+ /* set bits if needed */
+ dummy->job_irq_js_state |= ((slot_active << i) |
+ (next_busy << (i + 16)));
+ }
+ }
+ }
+ pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state);
+}
+#endif /* !MALI_USE_CSF */
+
+/**
+ * find_control_reg_values() - Look up constant control register values.
+ * @gpu: GPU name
+ *
+ * Look up the GPU name to find the correct set of control register values for
+ * that GPU. If not found, warn and use the first values in the array.
+ *
+ * Return: Pointer to control register values for that GPU.
+ */
+static const struct control_reg_values_t *find_control_reg_values(const char *gpu)
+{
+ size_t i;
+ const struct control_reg_values_t *ret = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
+ const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
+
+ if (!strcmp(fcrv->name, gpu)) {
+ ret = fcrv;
+ pr_debug("Found control register values for %s\n", gpu);
+ break;
+ }
+ }
+
+ if (!ret) {
+ ret = &all_control_reg_values[0];
+ pr_warn("Couldn't find control register values for GPU %s; using default %s\n",
+ gpu, ret->name);
+ }
+
+ return ret;
+}
+
+void *midgard_model_create(const void *config)
+{
+ struct dummy_model_t *dummy = NULL;
+
+ dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
+
+ if (dummy) {
+ dummy->job_irq_js_state = 0;
+ init_register_statuses(dummy);
+ dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
+ }
+ return dummy;
+}
+
+void midgard_model_destroy(void *h)
+{
+ kfree((void *)h);
+}
+
+static void midgard_model_get_outputs(void *h)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+
+ if (hw_error_status.job_irq_status)
+ gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
+
+ if ((dummy->power_changed && dummy->power_changed_mask) ||
+ (dummy->reset_completed & dummy->reset_completed_mask) ||
+ hw_error_status.gpu_error_irq ||
+ (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ||
+ dummy->prfcnt_sample_completed)
+ gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
+
+ if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
+ gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
+}
+
+static void midgard_model_update(void *h)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+ int i;
+
+ for (i = 0; i < NUM_SLOTS; i++) {
+ if (!dummy->slots[i].job_active)
+ continue;
+
+ if (dummy->slots[i].job_disabled) {
+ update_register_statuses(dummy, i);
+ continue;
+ }
+
+ /* If there are any pending interrupts that have not
+ * been cleared we cannot run the job in the next register
+ * as we will overwrite the register status of the job in
+ * the head registers - which has not yet been read
+ */
+ if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) ||
+ (hw_error_status.job_irq_rawstat & (1 << i))) {
+ continue;
+ }
+
+ /*this job is done assert IRQ lines */
+ signal_int(dummy, i);
+#ifdef CONFIG_MALI_ERROR_INJECT
+ midgard_set_error(i);
+#endif /* CONFIG_MALI_ERROR_INJECT */
+ update_register_statuses(dummy, i);
+ /*if this job slot returned failures we cannot use it */
+ if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) {
+ dummy->slots[i].job_active = 0;
+ continue;
+ }
+ /*process next job */
+ dummy->slots[i].job_active = dummy->slots[i].job_queued;
+ dummy->slots[i].job_queued = 0;
+ if (dummy->slots[i].job_active) {
+ if (hw_error_status.job_irq_rawstat & (1 << (i + 16)))
+ model_error_log(KBASE_CORE,
+ "\natom %lld running a job on a dirty slot",
+ hw_error_status.current_jc);
+ }
+ }
+}
+
+static void invalidate_active_jobs(struct dummy_model_t *dummy)
+{
+ int i;
+
+ for (i = 0; i < NUM_SLOTS; i++) {
+ if (dummy->slots[i].job_active) {
+ hw_error_status.job_irq_rawstat |= (1 << (16 + i));
+
+ hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/
+ }
+ }
+}
+
+u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+#if !MALI_USE_CSF
+ if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
+ (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
+ int slot_idx = (addr >> 7) & 0xf;
+
+ KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
+ if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
+ hw_error_status.current_jc &=
+ ~((u64) (0xFFFFFFFF));
+ hw_error_status.current_jc |= (u64) value;
+ }
+ if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) {
+ hw_error_status.current_jc &= (u64) 0xFFFFFFFF;
+ hw_error_status.current_jc |=
+ ((u64) value) << 32;
+ }
+ if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) &&
+ value == 1) {
+ pr_debug("%s", "start detected");
+ KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active ||
+ !dummy->slots[slot_idx].job_queued);
+ if ((dummy->slots[slot_idx].job_active) ||
+ (hw_error_status.job_irq_rawstat &
+ (1 << (slot_idx + 16)))) {
+ pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~"
+ );
+ dummy->slots[slot_idx].job_queued = 1;
+ } else {
+ dummy->slots[slot_idx].job_active = 1;
+ }
+ }
+
+ if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value ==
+ 0)
+ dummy->slots[slot_idx].job_queued = 0;
+
+ if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) &&
+ (value == JS_COMMAND_SOFT_STOP ||
+ value == JS_COMMAND_HARD_STOP)) {
+ /*dummy->slots[slot_idx].job_active = 0; */
+ hw_error_status.current_job_slot = slot_idx;
+ if (value == JS_COMMAND_SOFT_STOP) {
+ hw_error_status.errors_mask = KBASE_JOB_STOPPED;
+ } else { /*value == 3 */
+
+ if (dummy->slots[slot_idx].job_disabled != 0) {
+ pr_debug("enabling slot after HARD_STOP"
+ );
+ dummy->slots[slot_idx].job_disabled = 0;
+ }
+ hw_error_status.errors_mask =
+ KBASE_JOB_TERMINATED;
+ }
+ }
+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
+ int i;
+
+ for (i = 0; i < NUM_SLOTS; i++) {
+ if (value & ((1 << i) | (1 << (i + 16))))
+ dummy->slots[i].job_complete_irq_asserted = 0;
+ /* hw_error_status.js_status[i] is cleared in
+ * update_job_irq_js_state
+ */
+ }
+ pr_debug("%s", "job irq cleared");
+ update_job_irq_js_state(dummy, value);
+ /*remove error condition for JOB */
+ hw_error_status.job_irq_rawstat &= ~(value);
+ hw_error_status.job_irq_status &= ~(value);
+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
+ int i;
+
+ for (i = 0; i < NUM_SLOTS; i++)
+ dummy->slots[i].job_irq_mask = (value >> i) & 0x01;
+ pr_debug("job irq mask to value %x", value);
+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
+#else /* !MALI_USE_CSF */
+ if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
+ pr_debug("%s", "job irq cleared");
+
+ hw_error_status.job_irq_rawstat &= ~(value);
+ hw_error_status.job_irq_status &= ~(value);
+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
+ /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
+#endif /* !MALI_USE_CSF */
+ pr_debug("GPU_IRQ_MASK set to 0x%x", value);
+ dummy->reset_completed_mask = (value >> 8) & 0x01;
+ dummy->power_changed_mask = (value >> 9) & 0x03;
+ dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
+ } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
+ dummy->coherency_enable = value;
+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
+ if (value & (1 << 8)) {
+ pr_debug("%s", "gpu RESET_COMPLETED irq cleared");
+ dummy->reset_completed = 0;
+ }
+ if (value & (3 << 9))
+ dummy->power_changed = 0;
+
+ if (value & (1 << 17))
+ dummy->clean_caches_completed = false;
+ if (value & (1 << 16))
+ dummy->prfcnt_sample_completed = 0;
+
+ /*update error status */
+ hw_error_status.gpu_error_irq &= ~(value);
+ } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
+ switch (value) {
+ case GPU_COMMAND_SOFT_RESET:
+ case GPU_COMMAND_HARD_RESET:
+ pr_debug("gpu reset (%d) requested", value);
+ /* no more fault status */
+ hw_error_status.gpu_fault_status = 0;
+ /* completed reset instantly */
+ dummy->reset_completed = 1;
+ break;
+#if MALI_USE_CSF
+ case GPU_COMMAND_CACHE_CLN_INV_L2:
+ case GPU_COMMAND_CACHE_CLN_INV_L2_LSC:
+ case GPU_COMMAND_CACHE_CLN_INV_FULL:
+#else
+ case GPU_COMMAND_CLEAN_CACHES:
+ case GPU_COMMAND_CLEAN_INV_CACHES:
+#endif
+ pr_debug("clean caches requested");
+ dummy->clean_caches_completed = true;
+ break;
+ case GPU_COMMAND_PRFCNT_SAMPLE:
+ midgard_model_dump_prfcnt();
+ dummy->prfcnt_sample_completed = 1;
+ default:
+ break;
+ }
+ } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
+ dummy->l2_config = value;
+ }
+#if MALI_USE_CSF
+ else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
+ addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
+ (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
+ if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
+ hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
+ } else if (addr == IPA_CONTROL_REG(COMMAND)) {
+ pr_debug("Received IPA_CONTROL command");
+ } else if (addr == IPA_CONTROL_REG(TIMER)) {
+ ipa_control_timer_enabled = value ? true : false;
+ } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) &&
+ (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) {
+ enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)(
+ (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3);
+ bool is_low_word =
+ !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7);
+
+ if (is_low_word) {
+ ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX;
+ ipa_ctl_select_config[core_type] |= value;
+ } else {
+ ipa_ctl_select_config[core_type] &= U32_MAX;
+ ipa_ctl_select_config[core_type] |= ((u64)value << 32);
+ }
+ }
+#endif
+ else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+ hw_error_status.mmu_irq_mask = value;
+ } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
+ hw_error_status.mmu_irq_rawstat &= (~value);
+ } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
+ (addr <= MMU_AS_REG(15, AS_STATUS))) {
+ int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
+ >> 6;
+
+ switch (addr & 0x3F) {
+ case AS_COMMAND:
+ switch (value) {
+ case AS_COMMAND_NOP:
+ hw_error_status.as_command[mem_addr_space] =
+ value;
+ break;
+
+ case AS_COMMAND_UPDATE:
+ hw_error_status.as_command[mem_addr_space] =
+ value;
+ if ((hw_error_status.as_faultstatus[
+ mem_addr_space])
+ && ((hw_error_status.as_transtab[
+ mem_addr_space] & 0x3) != 0)) {
+ model_error_log(KBASE_CORE,
+ "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n"
+ );
+ } else if ((hw_error_status.as_faultstatus[
+ mem_addr_space])
+ && ((hw_error_status.as_transtab[
+ mem_addr_space] & 0x3) == 0)) {
+
+ /*invalidate all active jobs */
+ invalidate_active_jobs(dummy);
+ /* error handled */
+ hw_error_status.as_faultstatus[
+ mem_addr_space] = 0;
+ }
+ break;
+
+ case AS_COMMAND_LOCK:
+ case AS_COMMAND_UNLOCK:
+ hw_error_status.as_command[mem_addr_space] =
+ value;
+ break;
+
+ case AS_COMMAND_FLUSH_PT:
+ case AS_COMMAND_FLUSH_MEM:
+ if (hw_error_status.as_command[mem_addr_space]
+ != AS_COMMAND_LOCK)
+ model_error_log(KBASE_CORE,
+ "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n"
+ );
+ else /* error handled if any */
+ hw_error_status.as_faultstatus[
+ mem_addr_space] = 0;
+ hw_error_status.as_command[mem_addr_space] =
+ value;
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n",
+ value);
+ break;
+ }
+ break;
+
+ case AS_TRANSTAB_LO:
+ hw_error_status.as_transtab[mem_addr_space] &=
+ ~((u64) (0xffffffff));
+ hw_error_status.as_transtab[mem_addr_space] |=
+ (u64) value;
+ break;
+
+ case AS_TRANSTAB_HI:
+ hw_error_status.as_transtab[mem_addr_space] &=
+ (u64) 0xffffffff;
+ hw_error_status.as_transtab[mem_addr_space] |=
+ ((u64) value) << 32;
+ break;
+
+ case AS_LOCKADDR_LO:
+ case AS_LOCKADDR_HI:
+ case AS_MEMATTR_LO:
+ case AS_MEMATTR_HI:
+ case AS_TRANSCFG_LO:
+ case AS_TRANSCFG_HI:
+ /* Writes ignored */
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n",
+ mem_addr_space, addr, value);
+ break;
+ }
+ } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) &&
+ addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) {
+ switch (addr) {
+ case PRFCNT_BASE_LO:
+ performance_counters.prfcnt_base |= value;
+ break;
+ case PRFCNT_BASE_HI:
+ performance_counters.prfcnt_base |= ((u64) value) << 32;
+ break;
+#if !MALI_USE_CSF
+ case PRFCNT_JM_EN:
+ performance_counters.prfcnt_en.jm = value;
+ break;
+#endif /* !MALI_USE_CSF */
+ case PRFCNT_SHADER_EN:
+ performance_counters.prfcnt_en.shader = value;
+ break;
+ case PRFCNT_TILER_EN:
+ performance_counters.prfcnt_en.tiler = value;
+ break;
+ case PRFCNT_MMU_L2_EN:
+ performance_counters.prfcnt_en.l2 = value;
+ break;
+ }
+ } else {
+ switch (addr) {
+ case TILER_PWRON_LO:
+ dummy->power_on |= (value & 1) << 1;
+ /* Also ensure L2 is powered on */
+ dummy->power_on |= value & 1;
+ dummy->power_changed = 1;
+ break;
+ case SHADER_PWRON_LO:
+ dummy->power_on |= (value & 0xF) << 2;
+ dummy->power_changed = 1;
+ break;
+ case L2_PWRON_LO:
+ dummy->power_on |= value & 1;
+ dummy->power_changed = 1;
+ break;
+ case STACK_PWRON_LO:
+ dummy->stack_power_on_lo |= value;
+ dummy->power_changed = 1;
+ break;
+ case TILER_PWROFF_LO:
+ dummy->power_on &= ~((value & 1) << 1);
+ dummy->power_changed = 1;
+ break;
+ case SHADER_PWROFF_LO:
+ dummy->power_on &= ~((value & 0xF) << 2);
+ dummy->power_changed = 1;
+ break;
+ case L2_PWROFF_LO:
+ dummy->power_on &= ~(value & 1);
+ /* Also ensure tiler is powered off */
+ dummy->power_on &= ~((value & 1) << 1);
+ dummy->power_changed = 1;
+ break;
+ case STACK_PWROFF_LO:
+ dummy->stack_power_on_lo &= ~value;
+ dummy->power_changed = 1;
+ break;
+
+ case TILER_PWROFF_HI:
+ case SHADER_PWROFF_HI:
+ case L2_PWROFF_HI:
+ case PWR_KEY:
+ case PWR_OVERRIDE0:
+#if !MALI_USE_CSF
+ case JM_CONFIG:
+#else /* !MALI_USE_CSF */
+ case CSF_CONFIG:
+#endif /* !MALI_USE_CSF */
+ case SHADER_CONFIG:
+ case TILER_CONFIG:
+ case L2_MMU_CONFIG:
+ /* Writes ignored */
+ break;
+ default:
+ model_error_log(KBASE_CORE,
+ "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n",
+ addr, value);
+ break;
+ }
+ }
+
+ midgard_model_update(dummy);
+ midgard_model_get_outputs(dummy);
+
+ return 1;
+}
+
+u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+ *value = 0; /* 0 by default */
+#if !MALI_USE_CSF
+ if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
+ pr_debug("%s", "JS_ACTIVE being read");
+
+ *value = dummy->job_irq_js_state;
+ } else if (addr == GPU_CONTROL_REG(GPU_ID)) {
+#else /* !MALI_USE_CSF */
+ if (addr == GPU_CONTROL_REG(GPU_ID)) {
+#endif /* !MALI_USE_CSF */
+
+ *value = dummy->control_reg_values->gpu_id;
+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) {
+ *value = hw_error_status.job_irq_rawstat;
+ pr_debug("%s", "JS_IRQ_RAWSTAT being read");
+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) {
+ *value = hw_error_status.job_irq_status;
+ pr_debug("JS_IRQ_STATUS being read %x", *value);
+ }
+#if !MALI_USE_CSF
+ else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
+ int i;
+
+ *value = 0;
+ for (i = 0; i < NUM_SLOTS; i++)
+ *value |= dummy->slots[i].job_irq_mask << i;
+ pr_debug("JS_IRQ_MASK being read %x", *value);
+ }
+#else /* !MALI_USE_CSF */
+ else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
+ /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
+ }
+#endif /* !MALI_USE_CSF */
+ else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
+ *value = (dummy->reset_completed_mask << 8) |
+ (dummy->power_changed_mask << 9) | (1 << 7) | 1;
+ pr_debug("GPU_IRQ_MASK read %x", *value);
+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
+ *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
+ (dummy->reset_completed << 8) |
+ ((dummy->clean_caches_completed ? 1u : 0u) << 17) |
+ (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
+ pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
+ *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
+ ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
+ ((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
+ (((dummy->clean_caches_completed &&
+ dummy->clean_caches_completed_irq_enabled) ?
+ 1u :
+ 0u)
+ << 17) |
+ (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
+ pr_debug("GPU_IRQ_STAT read %x", *value);
+ } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
+ *value = 0;
+#if !MALI_USE_CSF
+ } else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) {
+ *value = 0;
+#endif
+ } else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) {
+ *value = hw_error_status.gpu_fault_status;
+ } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
+ *value = dummy->l2_config;
+ } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
+ (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
+ switch (addr) {
+ case GPU_CONTROL_REG(SHADER_PRESENT_LO):
+ case GPU_CONTROL_REG(SHADER_PRESENT_HI):
+ case GPU_CONTROL_REG(TILER_PRESENT_LO):
+ case GPU_CONTROL_REG(TILER_PRESENT_HI):
+ case GPU_CONTROL_REG(L2_PRESENT_LO):
+ case GPU_CONTROL_REG(L2_PRESENT_HI):
+ case GPU_CONTROL_REG(STACK_PRESENT_LO):
+ case GPU_CONTROL_REG(STACK_PRESENT_HI):
+ *value = get_implementation_register(addr);
+ break;
+ case GPU_CONTROL_REG(SHADER_READY_LO):
+ *value = (dummy->power_on >> 0x02) &
+ get_implementation_register(
+ GPU_CONTROL_REG(SHADER_PRESENT_LO));
+ break;
+ case GPU_CONTROL_REG(TILER_READY_LO):
+ *value = (dummy->power_on >> 0x01) &
+ get_implementation_register(
+ GPU_CONTROL_REG(TILER_PRESENT_LO));
+ break;
+ case GPU_CONTROL_REG(L2_READY_LO):
+ *value = dummy->power_on &
+ get_implementation_register(
+ GPU_CONTROL_REG(L2_PRESENT_LO));
+ break;
+ case GPU_CONTROL_REG(STACK_READY_LO):
+ *value = dummy->stack_power_on_lo &
+ get_implementation_register(
+ GPU_CONTROL_REG(STACK_PRESENT_LO));
+ break;
+
+ case GPU_CONTROL_REG(SHADER_READY_HI):
+ case GPU_CONTROL_REG(TILER_READY_HI):
+ case GPU_CONTROL_REG(L2_READY_HI):
+ case GPU_CONTROL_REG(STACK_READY_HI):
+ *value = 0;
+ break;
+
+ case GPU_CONTROL_REG(SHADER_PWRTRANS_LO):
+ case GPU_CONTROL_REG(SHADER_PWRTRANS_HI):
+ case GPU_CONTROL_REG(TILER_PWRTRANS_LO):
+ case GPU_CONTROL_REG(TILER_PWRTRANS_HI):
+ case GPU_CONTROL_REG(L2_PWRTRANS_LO):
+ case GPU_CONTROL_REG(L2_PWRTRANS_HI):
+ case GPU_CONTROL_REG(STACK_PWRTRANS_LO):
+ case GPU_CONTROL_REG(STACK_PWRTRANS_HI):
+ *value = 0;
+ break;
+
+ case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO):
+ case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI):
+ case GPU_CONTROL_REG(TILER_PWRACTIVE_LO):
+ case GPU_CONTROL_REG(TILER_PWRACTIVE_HI):
+ case GPU_CONTROL_REG(L2_PWRACTIVE_LO):
+ case GPU_CONTROL_REG(L2_PWRACTIVE_HI):
+ *value = 0;
+ break;
+
+#if !MALI_USE_CSF
+ case GPU_CONTROL_REG(JM_CONFIG):
+#else /* !MALI_USE_CSF */
+ case GPU_CONTROL_REG(CSF_CONFIG):
+#endif /* !MALI_USE_CSF */
+
+ case GPU_CONTROL_REG(SHADER_CONFIG):
+ case GPU_CONTROL_REG(TILER_CONFIG):
+ case GPU_CONTROL_REG(L2_MMU_CONFIG):
+ *value = 0;
+ break;
+
+ case GPU_CONTROL_REG(COHERENCY_FEATURES):
+ *value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */
+ break;
+ case GPU_CONTROL_REG(COHERENCY_ENABLE):
+ *value = dummy->coherency_enable;
+ break;
+
+ case GPU_CONTROL_REG(THREAD_TLS_ALLOC):
+ *value = 0;
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "Dummy model register access: Reading unknown control reg 0x%x\n",
+ addr);
+ break;
+ }
+#if !MALI_USE_CSF
+ } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
+ (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
+ int slot_idx = (addr >> 7) & 0xf;
+ int sub_reg = addr & 0x7F;
+
+ KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
+ switch (sub_reg) {
+ case JS_HEAD_NEXT_LO:
+ *value = (u32) ((hw_error_status.current_jc) &
+ 0xFFFFFFFF);
+ break;
+ case JS_HEAD_NEXT_HI:
+ *value = (u32) (hw_error_status.current_jc >> 32);
+ break;
+ case JS_STATUS:
+ if (hw_error_status.js_status[slot_idx])
+ *value = hw_error_status.js_status[slot_idx];
+ else /* 0x08 means active, 0x00 idle */
+ *value = (dummy->slots[slot_idx].job_active)
+ << 3;
+ break;
+ case JS_COMMAND_NEXT:
+ *value = dummy->slots[slot_idx].job_queued;
+ break;
+
+ /* The dummy model does not implement these registers
+ * avoid printing error messages
+ */
+ case JS_HEAD_HI:
+ case JS_HEAD_LO:
+ case JS_TAIL_HI:
+ case JS_TAIL_LO:
+ case JS_FLUSH_ID_NEXT:
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "Dummy model register access: unknown job slot reg 0x%02X being read\n",
+ sub_reg);
+ break;
+ }
+#endif /* !MALI_USE_CSF */
+ } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) {
+ *value = dummy->control_reg_values->as_present;
+#if !MALI_USE_CSF
+ } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) {
+ *value = 0x7;
+#endif /* !MALI_USE_CSF */
+ } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) &&
+ addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) {
+ switch (addr) {
+ case GPU_CONTROL_REG(TEXTURE_FEATURES_0):
+ *value = 0xfffff;
+ break;
+
+ case GPU_CONTROL_REG(TEXTURE_FEATURES_1):
+ *value = 0xffff;
+ break;
+
+ case GPU_CONTROL_REG(TEXTURE_FEATURES_2):
+ *value = 0x9f81ffff;
+ break;
+
+ case GPU_CONTROL_REG(TEXTURE_FEATURES_3):
+ *value = 0;
+ break;
+ }
+#if !MALI_USE_CSF
+ } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) &&
+ addr <= GPU_CONTROL_REG(JS15_FEATURES)) {
+ switch (addr) {
+ case GPU_CONTROL_REG(JS0_FEATURES):
+ *value = 0x20e;
+ break;
+
+ case GPU_CONTROL_REG(JS1_FEATURES):
+ *value = 0x1fe;
+ break;
+
+ case GPU_CONTROL_REG(JS2_FEATURES):
+ *value = 0x7e;
+ break;
+
+ default:
+ *value = 0;
+ break;
+ }
+#endif /* !MALI_USE_CSF */
+ } else if (addr >= GPU_CONTROL_REG(L2_FEATURES)
+ && addr <= GPU_CONTROL_REG(MMU_FEATURES)) {
+ switch (addr) {
+ case GPU_CONTROL_REG(L2_FEATURES):
+ *value = 0x6100206;
+ break;
+
+ case GPU_CONTROL_REG(CORE_FEATURES):
+ *value = dummy->control_reg_values->core_features;
+ break;
+
+ case GPU_CONTROL_REG(TILER_FEATURES):
+ *value = dummy->control_reg_values->tiler_features;
+ break;
+
+ case GPU_CONTROL_REG(MEM_FEATURES):
+ /* Bit 0: Core group is coherent */
+ *value = 0x01;
+ /* Bits 11:8: L2 slice count - 1 */
+ *value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8;
+ break;
+
+ case GPU_CONTROL_REG(MMU_FEATURES):
+ *value = dummy->control_reg_values->mmu_features;
+ break;
+ }
+ } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS)
+ && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) {
+ switch (addr) {
+ case GPU_CONTROL_REG(THREAD_FEATURES):
+ *value = dummy->control_reg_values->thread_features
+ | (IMPLEMENTATION_MODEL << 30);
+ break;
+ case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE):
+ *value = dummy->control_reg_values->thread_max_barrier_size;
+ break;
+ case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE):
+ *value = dummy->control_reg_values->thread_max_workgroup_size;
+ break;
+ case GPU_CONTROL_REG(THREAD_MAX_THREADS):
+ *value = dummy->control_reg_values->thread_max_threads;
+ break;
+ }
+ } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
+ && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
+ *value = 0;
+ } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
+ && addr <= MMU_AS_REG(15, AS_STATUS)) {
+ int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
+ >> 6;
+
+ switch (addr & 0x3F) {
+ case AS_TRANSTAB_LO:
+ *value = (u32)
+ (hw_error_status.as_transtab[mem_addr_space] &
+ 0xffffffff);
+ break;
+
+ case AS_TRANSTAB_HI:
+ *value = (u32)
+ (hw_error_status.as_transtab[mem_addr_space] >>
+ 32);
+ break;
+
+ case AS_STATUS:
+ *value = 0;
+ break;
+
+ case AS_FAULTSTATUS:
+ if (mem_addr_space == hw_error_status.faulty_mmu_as)
+ *value = hw_error_status.as_faultstatus[
+ hw_error_status.faulty_mmu_as];
+ else
+ *value = 0;
+ break;
+
+ case AS_LOCKADDR_LO:
+ case AS_LOCKADDR_HI:
+ case AS_MEMATTR_LO:
+ case AS_MEMATTR_HI:
+ case AS_TRANSCFG_LO:
+ case AS_TRANSCFG_HI:
+ /* Read ignored */
+ *value = 0;
+ break;
+
+ default:
+ model_error_log(KBASE_CORE,
+ "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n",
+ mem_addr_space, addr);
+ *value = 0;
+ break;
+ }
+ } else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+ *value = hw_error_status.mmu_irq_mask;
+ } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
+ *value = hw_error_status.mmu_irq_rawstat;
+ } else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
+ *value = hw_error_status.mmu_irq_mask &
+ hw_error_status.mmu_irq_rawstat;
+ }
+#if MALI_USE_CSF
+ else if (addr == IPA_CONTROL_REG(STATUS)) {
+ *value = (ipa_control_timer_enabled << 31);
+ } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
+ (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
+ IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ u32 counter_index =
+ (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
+ bool is_low_word =
+ !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7);
+
+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
+ counter_index, is_low_word);
+ } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
+ (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
+ IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ u32 counter_index =
+ (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
+ bool is_low_word =
+ !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7);
+
+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
+ counter_index, is_low_word);
+ } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
+ (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
+ IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ u32 counter_index =
+ (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
+ bool is_low_word =
+ !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7);
+
+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
+ counter_index, is_low_word);
+ } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
+ (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
+ IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ u32 counter_index =
+ (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
+ bool is_low_word =
+ !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7);
+
+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
+ counter_index, is_low_word);
+ }
+#endif
+ else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
+ *value = dummy->control_reg_values->gpu_features_lo;
+ } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) {
+ *value = dummy->control_reg_values->gpu_features_hi;
+ } else {
+ model_error_log(KBASE_CORE,
+ "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n",
+ addr);
+ *value = 0;
+ }
+
+ CSTD_UNUSED(dummy);
+
+ return 1;
+}
+
+static u32 set_user_sample_core_type(u64 *counters,
+ u32 *usr_data_start, u32 usr_data_offset,
+ u32 usr_data_size, u32 core_count)
+{
+ u32 sample_size;
+ u32 *usr_data = NULL;
+
+ sample_size =
+ core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);
+
+ if ((usr_data_size >= usr_data_offset) &&
+ (sample_size <= usr_data_size - usr_data_offset))
+ usr_data = usr_data_start + (usr_data_offset / sizeof(u32));
+
+ if (!usr_data)
+ model_error_log(KBASE_CORE, "Unable to set counter sample 1");
+ else {
+ u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
+ u32 i;
+
+ for (i = 0; i < loop_cnt; i++) {
+ if (copy_from_user(&counters[i], &usr_data[i],
+ sizeof(u32))) {
+ model_error_log(KBASE_CORE, "Unable to set counter sample 2");
+ break;
+ }
+ }
+ }
+
+ return usr_data_offset + sample_size;
+}
+
+static u32 set_kernel_sample_core_type(u64 *counters,
+ u64 *usr_data_start, u32 usr_data_offset,
+ u32 usr_data_size, u32 core_count)
+{
+ u32 sample_size;
+ u64 *usr_data = NULL;
+
+ sample_size =
+ core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);
+
+ if ((usr_data_size >= usr_data_offset) &&
+ (sample_size <= usr_data_size - usr_data_offset))
+ usr_data = usr_data_start + (usr_data_offset / sizeof(u64));
+
+ if (!usr_data)
+ model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1");
+ else
+ memcpy(counters, usr_data, sample_size);
+
+ return usr_data_offset + sample_size;
+}
+
+/* Counter values injected through ioctl are of 32 bits */
+void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size)
+{
+ u32 offset = 0;
+
+#if !MALI_USE_CSF
+ offset = set_user_sample_core_type(performance_counters.jm_counters,
+ usr_data, offset, usr_data_size, 1);
+#else
+ offset = set_user_sample_core_type(performance_counters.cshw_counters,
+ usr_data, offset, usr_data_size, 1);
+#endif /* !MALI_USE_CSF */
+ offset = set_user_sample_core_type(performance_counters.tiler_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_user_sample_core_type(performance_counters.l2_counters,
+ usr_data, offset, usr_data_size,
+ KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
+ offset = set_user_sample_core_type(performance_counters.shader_counters,
+ usr_data, offset, usr_data_size,
+ KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
+}
+
+/* Counter values injected through kutf are of 64 bits */
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size)
+{
+ u32 offset = 0;
+
+#if !MALI_USE_CSF
+ offset = set_kernel_sample_core_type(performance_counters.jm_counters,
+ usr_data, offset, usr_data_size, 1);
+#else
+ offset = set_kernel_sample_core_type(performance_counters.cshw_counters,
+ usr_data, offset, usr_data_size, 1);
+#endif /* !MALI_USE_CSF */
+ offset = set_kernel_sample_core_type(performance_counters.tiler_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_kernel_sample_core_type(performance_counters.l2_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(performance_counters.l2_present));
+ offset = set_kernel_sample_core_type(performance_counters.shader_counters,
+ usr_data, offset, usr_data_size,
+ hweight64(performance_counters.shader_present));
+}
+KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);
+
+void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
+ u64 *l2_present, u64 *shader_present)
+{
+ if (shader_present)
+ *shader_present = performance_counters.shader_present;
+ if (l2_present)
+ *l2_present = performance_counters.l2_present;
+}
+KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores);
+
+void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
+ u64 l2_present, u64 shader_present)
+{
+ if (WARN_ON(!l2_present || !shader_present
+ || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS
+ || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES))
+ return;
+
+ performance_counters.l2_present = l2_present;
+ performance_counters.shader_present = shader_present;
+
+ /* Update the GPU properties used by vinstr to calculate the counter
+ * dump buffer size.
+ */
+ kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present);
+ kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present;
+ kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present);
+ kbdev->gpu_props.curr_config.shader_present = shader_present;
+}
+KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);
+
+void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
+ struct tagged_addr *pages,
+ size_t page_count)
+{
+ performance_counters.prfcnt_base_cpu = base;
+ performance_counters.kbdev = kbdev;
+ performance_counters.pages = pages;
+ performance_counters.page_count = page_count;
+}
+
+int gpu_model_control(void *model,
+ struct kbase_model_control_params *params)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+ int i;
+
+ if (params->command == KBASE_MC_DISABLE_JOBS) {
+ for (i = 0; i < NUM_SLOTS; i++)
+ dummy->slots[i].job_disabled = params->value;
+ } else {
+ return -EINVAL;
+ }
+
+ midgard_model_update(dummy);
+ midgard_model_get_outputs(dummy);
+
+ return 0;
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
new file mode 100644
index 0000000..e092134
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _KBASE_MODEL_DUMMY_H_
+#define _KBASE_MODEL_DUMMY_H_
+
+#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h>
+
+#define model_error_log(module, ...) pr_err(__VA_ARGS__)
+
+#define NUM_SLOTS 4 /*number of job slots */
+
+/*Errors Mask Codes*/
+/* each bit of errors_mask is associated to a specific error:
+ * NON FAULT STATUS CODES: only the following are implemented since the others
+ * represent normal working statuses
+ */
+#define KBASE_JOB_INTERRUPTED (1<<0)
+#define KBASE_JOB_STOPPED (1<<1)
+#define KBASE_JOB_TERMINATED (1<<2)
+
+/* JOB EXCEPTIONS: */
+#define KBASE_JOB_CONFIG_FAULT (1<<3)
+#define KBASE_JOB_POWER_FAULT (1<<4)
+#define KBASE_JOB_READ_FAULT (1<<5)
+#define KBASE_JOB_WRITE_FAULT (1<<6)
+#define KBASE_JOB_AFFINITY_FAULT (1<<7)
+#define KBASE_JOB_BUS_FAULT (1<<8)
+#define KBASE_INSTR_INVALID_PC (1<<9)
+#define KBASE_INSTR_INVALID_ENC (1<<10)
+#define KBASE_INSTR_TYPE_MISMATCH (1<<11)
+#define KBASE_INSTR_OPERAND_FAULT (1<<12)
+#define KBASE_INSTR_TLS_FAULT (1<<13)
+#define KBASE_INSTR_BARRIER_FAULT (1<<14)
+#define KBASE_INSTR_ALIGN_FAULT (1<<15)
+#define KBASE_DATA_INVALID_FAULT (1<<16)
+#define KBASE_TILE_RANGE_FAULT (1<<17)
+#define KBASE_ADDR_RANGE_FAULT (1<<18)
+#define KBASE_OUT_OF_MEMORY (1<<19)
+#define KBASE_UNKNOWN (1<<20)
+
+/* GPU EXCEPTIONS:*/
+#define KBASE_DELAYED_BUS_FAULT (1<<21)
+#define KBASE_SHAREABILITY_FAULT (1<<22)
+
+/* MMU EXCEPTIONS:*/
+#define KBASE_TRANSLATION_FAULT (1<<23)
+#define KBASE_PERMISSION_FAULT (1<<24)
+#define KBASE_TRANSTAB_BUS_FAULT (1<<25)
+#define KBASE_ACCESS_FLAG (1<<26)
+
+/* generic useful bitmasks */
+#define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED)
+#define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT)
+#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT)
+
+/* number of possible MMU address spaces */
+#define NUM_MMU_AS 16 /* total number of MMU address spaces as in
+ * MMU_IRQ_RAWSTAT register
+ */
+
+/* Forward declaration */
+struct kbase_device;
+
+/*
+ * the function below is used to trigger the simulation of a faulty
+ * HW condition for a specific job chain atom
+ */
+
+struct kbase_error_params {
+ u64 jc;
+ u32 errors_mask;
+ u32 mmu_table_level;
+ u16 faulty_mmu_as;
+ u16 padding[3];
+};
+
+enum kbase_model_control_command {
+ /* Disable/Enable job completion in the dummy model */
+ KBASE_MC_DISABLE_JOBS
+};
+
+/* struct to control dummy model behavior */
+struct kbase_model_control_params {
+ s32 command;
+ s32 value;
+};
+
+/* struct to track faulty atoms */
+struct kbase_error_atom {
+ struct kbase_error_params params;
+ struct kbase_error_atom *next;
+};
+
+/*struct to track the system error state*/
+struct error_status_t {
+ u32 errors_mask;
+ u32 mmu_table_level;
+ int faulty_mmu_as;
+
+ u64 current_jc;
+ int current_job_slot;
+
+ u32 job_irq_rawstat;
+ u32 job_irq_status;
+ u32 js_status[NUM_SLOTS];
+
+ u32 mmu_irq_mask;
+ u32 mmu_irq_rawstat;
+
+ u32 gpu_error_irq;
+ u32 gpu_fault_status;
+
+ u32 as_faultstatus[NUM_MMU_AS];
+ u32 as_command[NUM_MMU_AS];
+ u64 as_transtab[NUM_MMU_AS];
+};
+
+void *midgard_model_create(const void *config);
+void midgard_model_destroy(void *h);
+u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
+u8 midgard_model_read_reg(void *h, u32 addr,
+ u32 * const value);
+void gpu_generate_error(void);
+void midgard_set_error(int job_slot);
+int job_atom_inject_error(struct kbase_error_params *params);
+int gpu_model_control(void *h,
+ struct kbase_model_control_params *params);
+
+void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size);
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size);
+void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
+ u64 *l2_present, u64 *shader_present);
+void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
+ u64 l2_present, u64 shader_present);
+void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
+ struct tagged_addr *pages,
+ size_t page_count);
+/* Clear the counter values array maintained by the dummy model */
+void gpu_model_clear_prfcnt_values(void);
+
+enum gpu_dummy_irq {
+ GPU_DUMMY_JOB_IRQ,
+ GPU_DUMMY_GPU_IRQ,
+ GPU_DUMMY_MMU_IRQ
+};
+
+void gpu_device_raise_irq(void *model,
+ enum gpu_dummy_irq irq);
+void gpu_device_set_data(void *model, void *data);
+void *gpu_device_get_data(void *model);
+
+extern struct error_status_t hw_error_status;
+
+#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
new file mode 100644
index 0000000..dfa7f62
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include "backend/gpu/mali_kbase_model_dummy.h"
+
+/* all the error conditions supported by the model */
+#define TOTAL_FAULTS 27
+/* maximum number of levels in the MMU translation table tree */
+#define MAX_MMU_TABLE_LEVEL 4
+/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
+#define MAX_CONCURRENT_FAULTS 3
+
+static struct kbase_error_atom *error_track_list;
+
+unsigned int rand_seed;
+
+/*following error probability are set quite high in order to stress the driver*/
+unsigned int error_probability = 50; /* to be set between 0 and 100 */
+/* probability to have multiple error give that there is an error */
+unsigned int multiple_error_probability = 50;
+
+void gpu_generate_error(void)
+{
+ unsigned int errors_num = 0;
+
+ /*is there at least one error? */
+ if ((prandom_u32() % 100) < error_probability) {
+ /* pick up a faulty mmu address space */
+ hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS;
+ /* pick up an mmu table level */
+ hw_error_status.mmu_table_level =
+ 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL);
+ hw_error_status.errors_mask =
+ (u32)(1 << (prandom_u32() % TOTAL_FAULTS));
+
+ /*is there also one or more errors? */
+ if ((prandom_u32() % 100) < multiple_error_probability) {
+ errors_num = 1 + (prandom_u32() %
+ (MAX_CONCURRENT_FAULTS - 1));
+ while (errors_num-- > 0) {
+ u32 temp_mask;
+
+ temp_mask = (u32)(
+ 1 << (prandom_u32() % TOTAL_FAULTS));
+ /* below we check that no bit of the same error
+ * type is set again in the error mask
+ */
+ if ((temp_mask & IS_A_JOB_ERROR) &&
+ (hw_error_status.errors_mask &
+ IS_A_JOB_ERROR)) {
+ errors_num++;
+ continue;
+ }
+ if ((temp_mask & IS_A_MMU_ERROR) &&
+ (hw_error_status.errors_mask &
+ IS_A_MMU_ERROR)) {
+ errors_num++;
+ continue;
+ }
+ if ((temp_mask & IS_A_GPU_ERROR) &&
+ (hw_error_status.errors_mask &
+ IS_A_GPU_ERROR)) {
+ errors_num++;
+ continue;
+ }
+ /* this error mask is already set */
+ if ((hw_error_status.errors_mask | temp_mask) ==
+ hw_error_status.errors_mask) {
+ errors_num++;
+ continue;
+ }
+ hw_error_status.errors_mask |= temp_mask;
+ }
+ }
+ }
+}
+
+int job_atom_inject_error(struct kbase_error_params *params)
+{
+ struct kbase_error_atom *new_elem;
+
+ KBASE_DEBUG_ASSERT(params);
+
+ new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL);
+
+ if (!new_elem) {
+ model_error_log(KBASE_CORE,
+ "\njob_atom_inject_error: kzalloc failed for new_elem\n"
+ );
+ return -ENOMEM;
+ }
+ new_elem->params.jc = params->jc;
+ new_elem->params.errors_mask = params->errors_mask;
+ new_elem->params.mmu_table_level = params->mmu_table_level;
+ new_elem->params.faulty_mmu_as = params->faulty_mmu_as;
+
+ /*circular list below */
+ if (error_track_list == NULL) { /*no elements */
+ error_track_list = new_elem;
+ new_elem->next = error_track_list;
+ } else {
+ struct kbase_error_atom *walker = error_track_list;
+
+ while (walker->next != error_track_list)
+ walker = walker->next;
+
+ new_elem->next = error_track_list;
+ walker->next = new_elem;
+ }
+ return 0;
+}
+
+void midgard_set_error(int job_slot)
+{
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+ gpu_generate_error();
+#else
+ struct kbase_error_atom *walker, *auxiliar;
+
+ if (error_track_list != NULL) {
+ walker = error_track_list->next;
+ auxiliar = error_track_list;
+ do {
+ if (walker->params.jc == hw_error_status.current_jc) {
+ /* found a faulty atom matching with the
+ * current one
+ */
+ hw_error_status.errors_mask =
+ walker->params.errors_mask;
+ hw_error_status.mmu_table_level =
+ walker->params.mmu_table_level;
+ hw_error_status.faulty_mmu_as =
+ walker->params.faulty_mmu_as;
+ hw_error_status.current_job_slot = job_slot;
+
+ if (walker->next == walker) {
+ /* only one element */
+ kfree(error_track_list);
+ error_track_list = NULL;
+ } else {
+ auxiliar->next = walker->next;
+ if (walker == error_track_list)
+ error_track_list = walker->next;
+
+ kfree(walker);
+ }
+ break;
+ }
+ auxiliar = walker;
+ walker = walker->next;
+ } while (auxiliar->next != error_track_list);
+ }
+#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
new file mode 100644
index 0000000..ed5d4ce
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Model interface
+ */
+
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_regmap.h>
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#include "backend/gpu/mali_kbase_model_linux.h"
+#include "device/mali_kbase_device.h"
+#include "mali_kbase_irq_internal.h"
+
+#include <linux/kthread.h>
+
+struct model_irq_data {
+ struct kbase_device *kbdev;
+ struct work_struct work;
+};
+
+static void serve_job_irq(struct work_struct *work)
+{
+ struct model_irq_data *data = container_of(work, struct model_irq_data,
+ work);
+ struct kbase_device *kbdev = data->kbdev;
+
+ /* Make sure no worker is already serving this IRQ */
+ while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) {
+ u32 val;
+
+ while ((val = kbase_reg_read(kbdev,
+ JOB_CONTROL_REG(JOB_IRQ_STATUS)))) {
+ unsigned long flags;
+
+ /* Handle the IRQ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+#if MALI_USE_CSF
+ kbase_csf_interrupt(kbdev, val);
+#else
+ kbase_job_done(kbdev, val);
+#endif
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+ }
+
+ kmem_cache_free(kbdev->irq_slab, data);
+}
+
+static void serve_gpu_irq(struct work_struct *work)
+{
+ struct model_irq_data *data = container_of(work, struct model_irq_data,
+ work);
+ struct kbase_device *kbdev = data->kbdev;
+
+ /* Make sure no worker is already serving this IRQ */
+ while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) {
+ u32 val;
+
+ while ((val = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(GPU_IRQ_STATUS)))) {
+ /* Handle the IRQ */
+ kbase_gpu_interrupt(kbdev, val);
+ }
+ }
+
+ kmem_cache_free(kbdev->irq_slab, data);
+}
+
+static void serve_mmu_irq(struct work_struct *work)
+{
+ struct model_irq_data *data = container_of(work, struct model_irq_data,
+ work);
+ struct kbase_device *kbdev = data->kbdev;
+
+ /* Make sure no worker is already serving this IRQ */
+ if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) {
+ u32 val;
+
+ while ((val = kbase_reg_read(kbdev,
+ MMU_REG(MMU_IRQ_STATUS)))) {
+ /* Handle the IRQ */
+ kbase_mmu_interrupt(kbdev, val);
+ }
+ }
+
+ kmem_cache_free(kbdev->irq_slab, data);
+}
+
+void gpu_device_raise_irq(void *model,
+ enum gpu_dummy_irq irq)
+{
+ struct model_irq_data *data;
+ struct kbase_device *kbdev = gpu_device_get_data(model);
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ data = kmem_cache_alloc(kbdev->irq_slab, GFP_ATOMIC);
+ if (data == NULL)
+ return;
+
+ data->kbdev = kbdev;
+
+ switch (irq) {
+ case GPU_DUMMY_JOB_IRQ:
+ INIT_WORK(&data->work, serve_job_irq);
+ atomic_set(&kbdev->serving_job_irq, 1);
+ break;
+ case GPU_DUMMY_GPU_IRQ:
+ INIT_WORK(&data->work, serve_gpu_irq);
+ atomic_set(&kbdev->serving_gpu_irq, 1);
+ break;
+ case GPU_DUMMY_MMU_IRQ:
+ INIT_WORK(&data->work, serve_mmu_irq);
+ atomic_set(&kbdev->serving_mmu_irq, 1);
+ break;
+ default:
+ dev_warn(kbdev->dev, "Unknown IRQ");
+ kmem_cache_free(kbdev->irq_slab, data);
+ }
+ queue_work(kbdev->irq_workq, &data->work);
+}
+
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+ midgard_model_write_reg(kbdev->model, offset, value);
+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
+{
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+ midgard_model_read_reg(kbdev->model, offset, &val);
+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+
+ return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+
+/**
+ * kbase_is_gpu_removed - Has the GPU been removed.
+ * @kbdev: Kbase device pointer
+ *
+ * This function would return true if the GPU has been removed.
+ * It is stubbed here
+ * Return: Always false
+ */
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+ return false;
+}
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ atomic_set(&kbdev->serving_job_irq, 0);
+ atomic_set(&kbdev->serving_gpu_irq, 0);
+ atomic_set(&kbdev->serving_mmu_irq, 0);
+
+ kbdev->irq_workq = alloc_ordered_workqueue("dummy irq queue", 0);
+ if (kbdev->irq_workq == NULL)
+ return -ENOMEM;
+
+ kbdev->irq_slab = kmem_cache_create("dummy_irq_slab",
+ sizeof(struct model_irq_data), 0, 0, NULL);
+ if (kbdev->irq_slab == NULL) {
+ destroy_workqueue(kbdev->irq_workq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+ destroy_workqueue(kbdev->irq_workq);
+ kmem_cache_destroy(kbdev->irq_slab);
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+ flush_workqueue(kbdev->irq_workq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
+
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+ irq_handler_t custom_handler,
+ int irq_type)
+{
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val)
+{
+ if (!val)
+ return IRQ_NONE;
+
+ return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
+
+int kbase_gpu_device_create(struct kbase_device *kbdev)
+{
+ kbdev->model = midgard_model_create(NULL);
+ if (kbdev->model == NULL)
+ return -ENOMEM;
+
+ gpu_device_set_data(kbdev->model, kbdev);
+
+ spin_lock_init(&kbdev->reg_op_lock);
+
+ dev_warn(kbdev->dev, "Using Dummy Model");
+
+ return 0;
+}
+
+void kbase_gpu_device_destroy(struct kbase_device *kbdev)
+{
+ midgard_model_destroy(kbdev->model);
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
new file mode 100644
index 0000000..dcb2e7c
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Model interface
+ */
+
+#ifndef _KBASE_MODEL_LINUX_H_
+#define _KBASE_MODEL_LINUX_H_
+
+int kbase_gpu_device_create(struct kbase_device *kbdev);
+void kbase_gpu_device_destroy(struct kbase_device *kbdev);
+
+#endif /* _KBASE_MODEL_LINUX_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index 5df7f67..8711a6c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -568,11 +568,14 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
* when system suspend takes place.
* The function first waits for the @gpu_poweroff_wait_work to complete, which
* could have been enqueued after the last PM reference was released.
+ *
+ * Return: 0 on success, negative value otherwise.
*/
-static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
+static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
unsigned long flags;
+ int ret = 0;
WARN_ON(kbdev->pm.active_count);
@@ -581,8 +584,8 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
kbase_pm_lock(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
WARN_ON(backend->poweroff_wait_in_progress);
+ WARN_ON(backend->gpu_sleep_mode_active);
if (backend->gpu_powered) {
- int ret;
backend->mcu_desired = false;
backend->l2_desired = false;
@@ -591,17 +594,11 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
ret = kbase_pm_wait_for_desired_state(kbdev);
if (ret) {
- dev_warn(kbdev->dev, "Wait failed on synchronous power off");
- kbase_pm_unlock(kbdev);
- /* Wait for the completion of reset, triggered due to
- * the previous failure.
- */
- kbase_reset_gpu_wait(kbdev);
- /* Wait again for the poweroff work which could have
- * been enqueued by the GPU reset worker.
- */
- kbase_pm_wait_for_poweroff_work_complete(kbdev);
- kbase_pm_lock(kbdev);
+ dev_warn(
+ kbdev->dev,
+ "Wait for pm state change failed on synchronous power off");
+ ret = -EBUSY;
+ goto out;
}
/* Due to the power policy, GPU could have been kept active
@@ -614,12 +611,19 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
backend->gpu_idled = true;
}
- kbase_pm_clock_off(kbdev);
+ if (!kbase_pm_clock_off(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "Failed to turn off GPU clocks on synchronous power off, MMU faults pending");
+ ret = -EBUSY;
+ }
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
+out:
kbase_pm_unlock(kbdev);
+ return ret;
}
#endif
@@ -793,7 +797,7 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
KBASE_DEBUG_ASSERT(kbdev != NULL);
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
- kbase_pm_do_poweroff_sync(kbdev);
+ WARN_ON(kbase_pm_do_poweroff_sync(kbdev));
#else
mutex_lock(&kbdev->pm.lock);
kbase_pm_do_poweroff(kbdev);
@@ -902,10 +906,14 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
kbase_pm_update_active(kbdev);
}
-void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
{
+ int ret = 0;
+
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
- kbase_pm_do_poweroff_sync(kbdev);
+ ret = kbase_pm_do_poweroff_sync(kbdev);
+ if (ret)
+ return ret;
#else
/* Force power off the GPU and all cores (regardless of policy), only
* after the PM active count reaches zero (otherwise, we risk turning it
@@ -929,6 +937,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
if (kbdev->pm.backend.callback_power_suspend)
kbdev->pm.backend.callback_power_suspend(kbdev);
+
+ return ret;
}
void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
@@ -1044,7 +1054,12 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
if (ret) {
- dev_warn(kbdev->dev, "Wait for MCU wake up failed on runtime suspend");
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ dev_warn(
+ kbdev->dev,
+ "Waiting for MCU to wake up failed on runtime suspend");
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index efc620f..803ba4d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -26,6 +26,9 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
#include <mali_kbase_dummy_job_wa.h>
int kbase_pm_ca_init(struct kbase_device *kbdev)
@@ -120,7 +123,9 @@ u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
-#if MALI_USE_CSF
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1);
+#elif MALI_USE_CSF
return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
#else
return kbdev->pm.backend.pm_shaders_core_mask;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h
index 8d169c3..90dcaf5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h
@@ -29,10 +29,10 @@
/**
* kbase_pm_ca_init - Initialize core availability framework
*
- * Must be called before calling any other core availability function
- *
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
+ * Must be called before calling any other core availability function
+ *
* Return: 0 if the core availability framework was successfully initialized,
* -errno otherwise
*/
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h
index 41f3c14..d1e4b53 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -30,12 +30,12 @@
/**
* struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
*
- * This contains data that is private to the devfreq core availability
- * policy.
- *
* @cores_desired: Cores that the policy wants to be available
* @cores_enabled: Cores that the policy is currently returning as available
* @cores_used: Cores currently powered or transitioning
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
*/
struct kbasep_pm_ca_policy_devfreq {
u64 cores_desired;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h b/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h
index 5e3f17e..a947e8f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -52,10 +52,8 @@
/**
* struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
* policy
- *
- * This contains data that is private to the coarse demand power policy.
- *
* @dummy: Dummy member - no state needed
+ * This contains data that is private to the coarse demand power policy.
*/
struct kbasep_pm_policy_coarse_demand {
int dummy;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index 52877f5..c7efe23 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -40,6 +40,11 @@ struct kbase_jd_atom;
/**
* enum kbase_pm_core_type - The types of core in a GPU.
*
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ *
* These enumerated values are used in calls to
* - kbase_pm_get_present_cores()
* - kbase_pm_get_active_cores()
@@ -49,11 +54,6 @@ struct kbase_jd_atom;
* They specify which type of core should be acted on. These values are set in
* a manner that allows core_type_to_reg() function to be simpler and more
* efficient.
- *
- * @KBASE_PM_CORE_L2: The L2 cache
- * @KBASE_PM_CORE_SHADER: Shader cores
- * @KBASE_PM_CORE_TILER: Tiler cores
- * @KBASE_PM_CORE_STACK: Core stacks
*/
enum kbase_pm_core_type {
KBASE_PM_CORE_L2 = L2_PRESENT_LO,
@@ -215,9 +215,6 @@ union kbase_pm_policy_data {
/**
* struct kbase_pm_backend_data - Data stored per device for power management.
*
- * This structure contains data for the power management framework. There is one
- * instance of this structure per device in the system.
- *
* @pm_current_policy: The policy that is currently actively controlling the
* power state.
* @pm_policy_data: Private data for current PM policy. This is automatically
@@ -324,6 +321,10 @@ union kbase_pm_policy_data {
* @policy_change_lock: Used to serialize the policy change calls. In CSF case,
* the change of policy may involve the scheduler to
* suspend running CSGs and then reconfigure the MCU.
+ * @core_idle_wq: Workqueue for executing the @core_idle_work.
+ * @core_idle_work: Work item used to wait for undesired cores to become inactive.
+ * The work item is enqueued when Host controls the power for
+ * shader cores and down scaling of cores is performed.
* @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be
* supported by the kernel driver or not. If this
* flag is not set, then HW state is directly saved
@@ -389,6 +390,9 @@ union kbase_pm_policy_data {
* @gpu_clock_control_work: work item to set GPU clock during L2 power cycle
* using gpu_clock_control
*
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
* Note:
* During an IRQ, @pm_current_policy can be NULL when the policy is being
* changed with kbase_pm_set_policy(). The change is protected under
@@ -455,6 +459,8 @@ struct kbase_pm_backend_data {
bool policy_change_clamp_state_to_off;
unsigned int csf_pm_sched_flags;
struct mutex policy_change_lock;
+ struct workqueue_struct *core_idle_wq;
+ struct work_struct core_idle_work;
#ifdef KBASE_PM_RUNTIME
bool gpu_sleep_supported;
@@ -547,9 +553,6 @@ enum kbase_pm_policy_event {
/**
* struct kbase_pm_policy - Power policy structure.
*
- * Each power policy exposes a (static) instance of this structure which
- * contains function pointers to the policy's methods.
- *
* @name: The name of this policy
* @init: Function called when the policy is selected
* @term: Function called when the policy is unselected
@@ -567,6 +570,8 @@ enum kbase_pm_policy_event {
* Pre-defined required flags exist for each of the
* ARM released policies, such as 'always_on', 'coarse_demand'
* and etc.
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
*/
struct kbase_pm_policy {
char *name;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index d65c684..81c922f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -54,6 +54,10 @@
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#endif
+#if MALI_USE_CSF
+#include <linux/delay.h>
+#endif
+
#include <linux/of.h>
#ifdef CONFIG_MALI_CORESTACK
@@ -72,16 +76,16 @@ KBASE_EXPORT_TEST_API(corestack_driver_control);
/**
* enum kbasep_pm_action - Actions that can be performed on a core.
*
- * This enumeration is private to the file. Its values are set to allow
- * core_type_to_reg() function, which decodes this enumeration, to be simpler
- * and more efficient.
- *
* @ACTION_PRESENT: The cores that are present
* @ACTION_READY: The cores that are ready
* @ACTION_PWRON: Power on the cores specified
* @ACTION_PWROFF: Power off the cores specified
* @ACTION_PWRTRANS: The cores that are transitioning
* @ACTION_PWRACTIVE: The cores that are active
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
*/
enum kbasep_pm_action {
ACTION_PRESENT = 0,
@@ -221,14 +225,14 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override)
/**
* core_type_to_reg - Decode a core type and action to a register.
*
+ * @core_type: The type of core
+ * @action: The type of action
+ *
* Given a core type (defined by kbase_pm_core_type) and an action (defined
* by kbasep_pm_action) this function will return the register offset that
* will perform the action on the core type. The register returned is the _LO
* register and an offset must be applied to use the _HI register.
*
- * @core_type: The type of core
- * @action: The type of action
- *
* Return: The register offset of the _LO register that performs an action of
* type @action on a core of type @core_type.
*/
@@ -291,14 +295,14 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev)
/**
* kbase_pm_invoke - Invokes an action on a core set
*
- * This function performs the action given by @action on a set of cores of a
- * type given by @core_type. It is a static function used by
- * kbase_pm_transition_core_type()
- *
* @kbdev: The kbase device structure of the device
* @core_type: The type of core that the action should be performed on
* @cores: A bit mask of cores to perform the action on (low 32 bits)
* @action: The action to perform on the cores
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
*/
static void kbase_pm_invoke(struct kbase_device *kbdev,
enum kbase_pm_core_type core_type,
@@ -376,15 +380,15 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
/**
* kbase_pm_get_state - Get information about a core set
*
+ * @kbdev: The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action: The property of the cores to query
+ *
* This function gets information (chosen by @action) about a set of cores of
* a type given by @core_type. It is a static function used by
* kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
* kbase_pm_get_ready_cores().
*
- * @kbdev: The kbase device structure of the device
- * @core_type: The type of core that the should be queried
- * @action: The property of the cores to query
- *
* Return: A bit mask specifying the state of the cores
*/
static u64 kbase_pm_get_state(struct kbase_device *kbdev,
@@ -753,17 +757,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
if (!kbase_pm_is_mcu_desired(kbdev))
backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE;
else if (kbdev->csf.firmware_hctl_core_pwr) {
- /* Host control add additional Cores to be active */
- if (backend->shaders_desired_mask & ~shaders_ready) {
+ /* Host control scale up/down cores as needed */
+ if (backend->shaders_desired_mask != shaders_ready) {
backend->hwcnt_desired = false;
if (!backend->hwcnt_disabled)
kbase_pm_trigger_hwcnt_disable(kbdev);
backend->mcu_state =
KBASE_MCU_HCTL_MCU_ON_RECHECK;
}
- } else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
- kbdev->pm.backend.mcu_state =
- KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+ } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) {
+ backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+ }
break;
case KBASE_MCU_HCTL_MCU_ON_RECHECK:
@@ -787,16 +791,54 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
ACTION_PWRON);
backend->mcu_state =
KBASE_MCU_HCTL_SHADERS_PEND_ON;
+
+ } else if (~backend->shaders_desired_mask & shaders_ready) {
+ kbase_csf_firmware_update_core_attr(kbdev, false, true,
+ backend->shaders_desired_mask);
+ backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND;
} else {
backend->mcu_state =
KBASE_MCU_HCTL_SHADERS_PEND_ON;
}
break;
+ case KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND:
+ if (kbase_csf_firmware_core_attr_updated(kbdev)) {
+ /* wait in queue until cores idle */
+ queue_work(backend->core_idle_wq, &backend->core_idle_work);
+ backend->mcu_state = KBASE_MCU_HCTL_CORE_INACTIVE_PEND;
+ }
+ break;
+
+ case KBASE_MCU_HCTL_CORE_INACTIVE_PEND:
+ {
+ u64 active_cores = kbase_pm_get_active_cores(
+ kbdev,
+ KBASE_PM_CORE_SHADER);
+ u64 cores_to_disable = shaders_ready &
+ ~backend->shaders_desired_mask;
+
+ if (!(cores_to_disable & active_cores)) {
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+ cores_to_disable,
+ ACTION_PWROFF);
+ backend->shaders_avail = backend->shaders_desired_mask;
+ backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND;
+ }
+ }
+ break;
+
+ case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND:
+ if (!shaders_trans && shaders_ready == backend->shaders_avail) {
+ /* Cores now stable */
+ backend->pm_shaders_core_mask = shaders_ready;
+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+ }
+ break;
+
case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND:
if (kbase_csf_firmware_core_attr_updated(kbdev)) {
- backend->shaders_avail =
- backend->shaders_desired_mask;
+ backend->shaders_avail = backend->shaders_desired_mask;
backend->mcu_state = KBASE_MCU_ON;
}
break;
@@ -832,6 +874,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_HALT:
if (kbase_csf_firmware_mcu_halted(kbdev)) {
+ KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL,
+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
if (kbdev->csf.firmware_hctl_core_pwr)
backend->mcu_state =
KBASE_MCU_HCTL_SHADERS_READY_OFF;
@@ -875,6 +919,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_SLEEP:
if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) {
+ KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL,
+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
backend->mcu_state = KBASE_MCU_IN_SLEEP;
kbase_pm_enable_db_mirror_interrupt(kbdev);
kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
@@ -884,6 +930,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_IN_SLEEP:
if (kbase_pm_is_mcu_desired(kbdev) &&
backend->l2_state == KBASE_L2_ON) {
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP(
+ kbdev, kbase_backend_get_cycle_cnt(kbdev));
kbase_pm_enable_mcu_db_notification(kbdev);
kbase_pm_disable_db_mirror_interrupt(kbdev);
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
@@ -910,6 +958,33 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
return 0;
}
+
+static void core_idle_worker(struct work_struct *work)
+{
+ struct kbase_device *kbdev =
+ container_of(work, struct kbase_device, pm.backend.core_idle_work);
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) {
+ const unsigned int core_inactive_wait_ms = 1;
+ u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER);
+ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+ u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask;
+
+ if (!(cores_to_disable & active_cores)) {
+ kbase_pm_update_state(kbdev);
+ break;
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ msleep(core_inactive_wait_ms);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
#endif
static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
@@ -925,6 +1000,23 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
return strings[state];
}
+#if !MALI_USE_CSF
+/* On powering on the L2, the tracked kctx becomes stale and can be cleared.
+ * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER
+ * operation on the first submitted katom after the L2 powering on.
+ */
+static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbdev)
+{
+ int js;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* Clear the slots' last katom submission kctx */
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL;
+}
+#endif
+
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -1015,6 +1107,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
l2_present & ~1,
ACTION_PWRON);
+ /* Clear backend slot submission kctx */
+ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev);
#else
/* With CSF firmware, Host driver doesn't need to
* handle power management with both shader and tiler cores.
@@ -1217,7 +1311,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
* powered off.
*/
kbase_gpu_start_cache_clean_nolock(
- kbdev);
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
#if !MALI_USE_CSF
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
#else
@@ -1594,10 +1688,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
break;
case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON:
- shader_poweroff_timer_queue_cancel(kbdev);
+ if (!backend->partial_shaderoff)
+ shader_poweroff_timer_queue_cancel(kbdev);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) {
- kbase_gpu_start_cache_clean_nolock(kbdev);
+ kbase_gpu_start_cache_clean_nolock(
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
backend->shaders_state =
KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON;
} else {
@@ -1895,11 +1991,24 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev)
stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
stt->configured_ticks = stt->default_ticks;
+#if MALI_USE_CSF
+ kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (!kbdev->pm.backend.core_idle_wq) {
+ destroy_workqueue(stt->wq);
+ return -ENOMEM;
+ }
+
+ INIT_WORK(&kbdev->pm.backend.core_idle_work, core_idle_worker);
+#endif
+
return 0;
}
void kbase_pm_state_machine_term(struct kbase_device *kbdev)
{
+#if MALI_USE_CSF
+ destroy_workqueue(kbdev->pm.backend.core_idle_wq);
+#endif
hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer);
destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq);
}
@@ -2419,9 +2528,9 @@ void kbase_pm_reset_done(struct kbase_device *kbdev)
/**
* kbase_pm_wait_for_reset - Wait for a reset to happen
*
- * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
- *
* @kbdev: Kbase device
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
*/
static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
{
@@ -2889,6 +2998,7 @@ exit:
/**
* kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ * @kbdev: The kbase device structure of the device
*
* Increase the count of cycle counter users and turn the cycle counters on if
* they were previously off
@@ -2899,8 +3009,6 @@ exit:
*
* When this function is called the l2 cache must be on - i.e., the GPU must be
* on.
- *
- * @kbdev: The kbase device structure of the device
*/
static void
kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
@@ -2918,11 +3026,13 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
/* This might happen after GPU reset.
* Then counter needs to be kicked.
*/
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
GPU_STATUS_CYCLE_COUNT_ACTIVE)) {
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CYCLE_COUNT_START);
}
+#endif
}
spin_unlock_irqrestore(
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index ef26c16..97e8607 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -35,18 +35,18 @@
/**
* kbase_pm_dev_idle - The GPU is idle.
*
- * The OS may choose to turn off idle devices
- *
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * The OS may choose to turn off idle devices
*/
void kbase_pm_dev_idle(struct kbase_device *kbdev);
/**
* kbase_pm_dev_activate - The GPU is active.
*
- * The OS should avoid opportunistically turning off the GPU while it is active
- *
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
*/
void kbase_pm_dev_activate(struct kbase_device *kbdev);
@@ -54,14 +54,14 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev);
* kbase_pm_get_present_cores - Get details of the cores that are present in
* the device.
*
- * This function can be called by the active power policy to return a bitmask of
- * the cores (of a specified type) present in the GPU device and also a count of
- * the number of cores.
- *
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
* Return: The bit mask of cores present
*/
u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
@@ -71,13 +71,13 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
* kbase_pm_get_active_cores - Get details of the cores that are currently
* active in the device.
*
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type: The type of core (see the enum kbase_pm_core_type enumeration)
+ *
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are actively processing work (i.e.
* turned on *and* busy).
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type: The type of core (see the enum kbase_pm_core_type enumeration)
- *
* Return: The bit mask of active cores
*/
u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
@@ -87,13 +87,13 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
* kbase_pm_get_trans_cores - Get details of the cores that are currently
* transitioning between power states.
*
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type: The type of core (see the enum kbase_pm_core_type enumeration)
+ *
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are currently transitioning between
* power states.
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type: The type of core (see the enum kbase_pm_core_type enumeration)
- *
* Return: The bit mask of transitioning cores
*/
u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
@@ -103,13 +103,13 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
* kbase_pm_get_ready_cores - Get details of the cores that are currently
* powered and ready for jobs.
*
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type: The type of core (see the enum kbase_pm_core_type enumeration)
+ *
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are powered and ready for jobs (they may
* or may not be currently executing jobs).
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type: The type of core (see the enum kbase_pm_core_type enumeration)
- *
* Return: The bit mask of ready cores
*/
u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
@@ -119,13 +119,13 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
* kbase_pm_clock_on - Turn the clock for the device on, and enable device
* interrupts.
*
- * This function can be used by a power policy to turn the clock for the GPU on.
- * It should be modified during integration to perform the necessary actions to
- * ensure that the GPU is fully powered and clocked.
- *
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_resume: true if clock on due to resume after suspend, false otherwise
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
*/
void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
@@ -133,6 +133,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
* kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
* device off.
*
+ * @kbdev: The kbase device structure for the device (must be a valid
+ * pointer)
+ *
* This function can be used by a power policy to turn the clock for the GPU
* off. It should be modified during integration to perform the necessary
* actions to turn the clock off (if this is possible in the integration).
@@ -141,9 +144,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
* then this function would usually be invoked from the runtime suspend
* callback function.
*
- * @kbdev: The kbase device structure for the device (must be a valid
- * pointer)
- *
* Return: true if clock was turned off, or
* false if clock can not be turned off due to pending page/bus fault
* workers. Caller must flush MMU workqueues and retry
@@ -153,22 +153,22 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev);
/**
* kbase_pm_enable_interrupts - Enable interrupts on the device.
*
- * Interrupts are also enabled after a call to kbase_pm_clock_on().
- *
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
*/
void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
/**
* kbase_pm_disable_interrupts - Disable interrupts on the device.
*
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
* This prevents delivery of Power Management interrupts to the CPU so that
* kbase_pm_update_state() will not be called from the IRQ handler
* until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
*
* Interrupts are also disabled after a call to kbase_pm_clock_off().
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
@@ -176,9 +176,9 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
* kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
* that does not take the hwaccess_lock
*
- * Caller must hold the hwaccess_lock.
- *
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Caller must hold the hwaccess_lock.
*/
void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
@@ -197,12 +197,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
/**
* kbase_pm_reset_done - The GPU has been reset successfully.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This function must be called by the GPU interrupt handler when the
* RESET_COMPLETED bit is set. It signals to the power management initialization
* code that the GPU has been successfully reset.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_reset_done(struct kbase_device *kbdev);
@@ -210,6 +209,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
/**
* kbase_pm_wait_for_desired_state - Wait for the desired power state to be
* reached
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Wait for the L2 and MCU state machines to reach the states corresponding
* to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'.
@@ -224,8 +224,6 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
* power off in progress and kbase_pm_context_active() was called instead of
* kbase_csf_scheduler_pm_active().
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
* Return: 0 on success, error code on error
*/
int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
@@ -233,6 +231,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
/**
* kbase_pm_wait_for_desired_state - Wait for the desired power state to be
* reached
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Wait for the L2 and shader power state machines to reach the states
* corresponding to the values of 'l2_desired' and 'shaders_desired'.
@@ -248,8 +247,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
* must ensure that this is not the case by, for example, calling
* kbase_pm_wait_for_poweroff_work_complete()
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
* Return: 0 on success, error code on error
*/
int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
@@ -258,6 +255,8 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
/**
* kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
*
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
* Wait for the L2 to be powered on, and for the L2 and the state machines of
* its dependent stack components to stabilise.
*
@@ -266,8 +265,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
* Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
* because this function will take that lock itself.
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
* Return: 0 on success, error code on error
*/
int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
@@ -276,13 +273,12 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
* kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state
* machines after changing shader core
* availability
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* It can be called in any status, so need to check the l2 and shader core
* power status in this function or it will break shader/l2 state machine
*
* Caller must hold hwaccess_lock
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev);
@@ -318,22 +314,21 @@ void kbase_pm_state_machine_term(struct kbase_device *kbdev);
* kbase_pm_update_cores_state - Update the desired state of shader cores from
* the Power Policy, and begin any power
* transitions.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This function will update the desired_xx_state members of
* struct kbase_pm_device_data by calling into the current Power Policy. It will
* then begin power transitions to make the hardware acheive the desired shader
* core state.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_update_cores_state(struct kbase_device *kbdev);
/**
* kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This must be called before other metric gathering APIs are called.
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: 0 on success, error code on error
*/
@@ -341,29 +336,27 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev);
/**
* kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This must be called when metric gathering is no longer required. It is an
* error to call any metrics gathering function (other than
* kbasep_pm_metrics_init()) after calling this function.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbasep_pm_metrics_term(struct kbase_device *kbdev);
/**
* kbase_pm_report_vsync - Function to be called by the frame buffer driver to
* update the vsync metric.
+ * @kbdev: The kbase device structure for the device (must be a
+ * valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ * false otherwise
*
* This function should be called by the frame buffer driver to update whether
* the system is hitting the vsync target or not. buffer_updated should be true
* if the vsync corresponded with a new frame being displayed, otherwise it
* should be false. This function does not need to be called every vsync, but
* only when the value of @buffer_updated differs from a previous call.
- *
- * @kbdev: The kbase device structure for the device (must be a
- * valid pointer)
- * @buffer_updated: True if the buffer has been updated on this VSync,
- * false otherwise
*/
void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
@@ -381,6 +374,7 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
/**
* kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
* needed
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* If the caller is the first caller then the GPU cycle counters will be enabled
* along with the l2 cache
@@ -388,13 +382,13 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
* The GPU must be powered when calling this function (i.e.
* kbase_pm_context_active() must have been called).
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
/**
* kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
* needed (l2 cache already on)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This is a version of the above function
* (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
@@ -405,14 +399,13 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
* The GPU must be powered when calling this function (i.e.
* kbase_pm_context_active() must have been called) and the l2 cache must be
* powered on.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
/**
* kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
* longer in use
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* If the caller is the last caller then the GPU cycle counters will be
* disabled. A request must have been made before a call to this.
@@ -420,18 +413,15 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
* Caller must not hold the hwaccess_lock, as it will be taken in this function.
* If the caller is already holding this lock then
* kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
/**
* kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
* that does not take hwaccess_lock
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Caller must hold the hwaccess_lock.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
@@ -458,12 +448,11 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
/**
* kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Setup the power management callbacks and initialize/enable the runtime-pm
* for the Mali GPU platform device, using the callback function. This must be
* called before the kbase_pm_register_access_enable() function.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
int kbase_pm_runtime_init(struct kbase_device *kbdev);
@@ -476,6 +465,7 @@ void kbase_pm_runtime_term(struct kbase_device *kbdev);
/**
* kbase_pm_register_access_enable - Enable access to GPU registers
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Enables access to the GPU registers before power management has powered up
* the GPU with kbase_pm_powerup().
@@ -486,13 +476,12 @@ void kbase_pm_runtime_term(struct kbase_device *kbdev);
*
* This should only be used before power management is powered up with
* kbase_pm_powerup()
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_register_access_enable(struct kbase_device *kbdev);
/**
* kbase_pm_register_access_disable - Disable early register access
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Disables access to the GPU registers enabled earlier by a call to
* kbase_pm_register_access_enable().
@@ -503,8 +492,6 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev);
*
* This should only be used before power management is powered up with
* kbase_pm_powerup()
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_register_access_disable(struct kbase_device *kbdev);
@@ -515,6 +502,7 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev);
/**
* kbase_pm_metrics_is_active - Check if the power management metrics
* collection is active.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Note that this returns if the power management metrics collection was
* active at the time of calling, it is possible that after the call the metrics
@@ -522,7 +510,6 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev);
*
* The caller must handle the consequence that the state may have changed.
*
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
* Return: true if metrics collection was active else false.
*/
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
@@ -558,12 +545,13 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
/**
* kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU
*
- * Function provided by platform specific code when DVFS is enabled to allow
- * the power management metrics system to report utilisation.
- *
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
* @utilisation: The current calculated utilisation by the metrics system.
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
* Return: Returns 0 on failure and non zero on success.
*/
int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
@@ -571,15 +559,15 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
/**
* kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU
*
- * Function provided by platform specific code when DVFS is enabled to allow
- * the power management metrics system to report utilisation.
- *
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
* @utilisation: The current calculated utilisation by the metrics system.
* @util_gl_share: The current calculated gl share of utilisation.
* @util_cl_share: The current calculated cl share of utilisation per core
* group.
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
* Return: Returns 0 on failure and non zero on success.
*/
int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
index 96f196f..5e57c9d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -25,37 +25,47 @@
* this header file. This header file can be included multiple times in the
* same compilation unit with different definitions of KBASEP_MCU_STATE().
*
- * @OFF: The MCU is powered off.
- * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with
- * firmware reloading) is in progress.
- * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration
- * requests have been sent to the firmware.
- * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now
- * ready for use and hwcnt is being enabled.
- * @ON: The MCU is active and hwcnt has been enabled.
- * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores
- * is being updated.
- * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled.
- * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU
- * halt would be triggered.
- * @ON_PEND_HALT: MCU halt in progress, confirmation pending.
- * @POWER_DOWN: MCU halted operations, pending being disabled.
- * @PEND_OFF: MCU is being disabled, pending on powering off.
- * @RESET_WAIT: The GPU is resetting, MCU state is unknown.
- * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware
- * have completed and shaders have been requested to
- * power on.
- * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being
- * notified of the mask of enabled shader cores.
- * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered
- * and checks are done to increase the number of
- * enabled cores.
- * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down
- * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down.
- * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU
- * is being put to sleep.
- * @ON_PEND_SLEEP: MCU sleep is in progress.
- * @IN_SLEEP: Sleep request is completed and MCU has halted.
+ * @OFF: The MCU is powered off.
+ * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with
+ * firmware reloading) is in progress.
+ * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration
+ * requests have been sent to the firmware.
+ * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now
+ * ready for use and hwcnt is being enabled.
+ * @ON: The MCU is active and hwcnt has been enabled.
+ * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores
+ * is being updated.
+ * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled.
+ * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU
+ * halt would be triggered.
+ * @ON_PEND_HALT: MCU halt in progress, confirmation pending.
+ * @POWER_DOWN: MCU halted operations, pending being disabled.
+ * @PEND_OFF: MCU is being disabled, pending on powering off.
+ * @RESET_WAIT: The GPU is resetting, MCU state is unknown.
+ * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware
+ * have completed and shaders have been requested to
+ * power on.
+ * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being
+ * notified of the mask of enabled shader cores.
+ * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered
+ * and checks are done to update the number of
+ * enabled cores.
+ * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down
+ * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down.
+ * @HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: Firmware has been informed to stop using
+ * specific cores, due to core_mask change request.
+ * After the ACK from FW, the wait will be done for
+ * undesired cores to become inactive.
+ * @HCTL_CORE_INACTIVE_PEND: Waiting for specific cores to become inactive.
+ * Once the cores become inactive their power down
+ * will be initiated.
+ * @HCTL_SHADERS_CORE_OFF_PEND: Waiting for specific cores to complete the
+ * transition to power down. Once powered down,
+ * HW counters will be re-enabled.
+ * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU
+ * is being put to sleep.
+ * @ON_PEND_SLEEP: MCU sleep is in progress.
+ * @IN_SLEEP: Sleep request is completed and MCU has halted.
*/
KBASEP_MCU_STATE(OFF)
KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -75,6 +85,9 @@ KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND)
KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK)
KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF)
KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF)
+KBASEP_MCU_STATE(HCTL_CORES_DOWN_SCALE_NOTIFY_PEND)
+KBASEP_MCU_STATE(HCTL_CORE_INACTIVE_PEND)
+KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND)
/* Additional MCU states to support GPU sleep feature */
KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
KBASEP_MCU_STATE(ON_PEND_SLEEP)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index 7b126a1..bc05bd7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -36,8 +36,13 @@
#include <linux/of.h>
static const struct kbase_pm_policy *const all_policy_list[] = {
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ &kbase_pm_always_on_policy_ops,
&kbase_pm_coarse_demand_policy_ops,
- &kbase_pm_always_on_policy_ops
+#else /* CONFIG_MALI_NO_MALI */
+ &kbase_pm_coarse_demand_policy_ops,
+ &kbase_pm_always_on_policy_ops,
+#endif /* CONFIG_MALI_NO_MALI */
};
void kbase_pm_policy_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 92a366b..51812ee 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -67,6 +67,9 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
*/
static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ return true;
+#else
bool success = false;
const unsigned int timeout = 100;
const unsigned long remaining = jiffies + msecs_to_jiffies(timeout);
@@ -79,6 +82,7 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
}
}
return success;
+#endif
}
#endif
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 1ce806f..7d45a08 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -48,6 +48,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
kbase_csf_queue_group_debugfs_init(kctx);
kbase_csf_kcpu_debugfs_init(kctx);
kbase_csf_tiler_heap_debugfs_init(kctx);
+ kbase_csf_tiler_heap_total_debugfs_init(kctx);
kbase_csf_cpu_queue_debugfs_init(kctx);
}
KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init);
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 85f4c0a..9eaf69a 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -163,8 +163,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
- mutex_init(&kctx->legacy_hwcnt_lock);
-
mutex_lock(&kctx->kbdev->kctx_list_lock);
err = kbase_insert_kctx_to_process(kctx);
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index 765e419..29983fb 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -33,10 +33,12 @@ mali_kbase-y += \
csf/mali_kbase_csf_kcpu_debugfs.o \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
- csf/mali_kbase_csf_cpu_queue_debugfs.o
+ csf/mali_kbase_csf_cpu_queue_debugfs.o \
+ csf/mali_kbase_csf_event.o
mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
+mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
ifeq ($(KBUILD_EXTMOD),)
# in-tree
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index ce6d546..546e18d 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -43,7 +43,7 @@
#define COMMAND_PROTECTED_ACK ((u32)4)
#define COMMAND_RESET_ACK ((u32)5)
-/**
+/*
* Default value for the TIMER register of the IPA Control interface,
* expressed in milliseconds.
*
@@ -53,22 +53,22 @@
*/
#define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */
-/**
+/*
* Number of timer events per second.
*/
#define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS)
-/**
+/*
* Maximum number of loops polling the GPU before we assume the GPU has hung.
*/
#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)
-/**
+/*
* Number of bits used to configure a performance counter in SELECT registers.
*/
#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8)
-/**
+/*
* Maximum value of a performance counter.
*/
#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1)
@@ -251,9 +251,15 @@ static inline void calc_prfcnt_delta(struct kbase_device *kbdev,
delta_value *= prfcnt->scaling_factor;
- if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0))
- if (prfcnt->gpu_norm)
- delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate);
+ if (kbdev->csf.ipa_control.cur_gpu_rate == 0) {
+ static bool warned;
+
+ if (!warned) {
+ dev_warn(kbdev->dev, "%s: GPU freq is unexpectedly 0", __func__);
+ warned = true;
+ }
+ } else if (prfcnt->gpu_norm)
+ delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate);
prfcnt->latest_raw_value = raw_value;
@@ -791,7 +797,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
ipa_ctrl = &kbdev->csf.ipa_control;
session = (struct kbase_ipa_control_session *)client;
- if (WARN_ON(!session->active)) {
+ if (!session->active) {
dev_err(kbdev->dev,
"%s: attempt to query inactive session", __func__);
return -EINVAL;
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
index 348a52f..0469c48 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
@@ -24,7 +24,7 @@
#include <mali_kbase.h>
-/**
+/*
* Maximum index accepted to configure an IPA Control performance counter.
*/
#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3)
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 142e5a8..8b70349 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -33,30 +33,12 @@
#include "mali_kbase_csf_timeout.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_hwaccess_time.h>
+#include "mali_kbase_csf_event.h"
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
-/**
- * struct kbase_csf_event - CSF event callback.
- *
- * This structure belongs to the list of events which is part of a Kbase
- * context, and describes a callback function with a custom parameter to pass
- * to it when a CSF event is signalled.
- *
- * @link: Link to the rest of the list.
- * @kctx: Pointer to the Kbase context this event belongs to.
- * @callback: Callback function to call when a CSF event is signalled.
- * @param: Parameter to pass to the callback function.
- */
-struct kbase_csf_event {
- struct list_head link;
- struct kbase_context *kctx;
- kbase_csf_event_callback *callback;
- void *param;
-};
-
const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
KBASE_QUEUE_GROUP_PRIORITY_HIGH,
KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
@@ -530,24 +512,24 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
if (reg_ex && reg_ex->ex_buffer_size) {
int buf_pages = (reg_ex->ex_buffer_size +
(1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+ struct kbase_va_region *region_ex =
+ kbase_region_tracker_find_region_enclosing_address(kctx,
+ reg_ex->ex_buffer_base);
- region = kbase_region_tracker_find_region_enclosing_address(
- kctx, reg_ex->ex_buffer_base);
- if (kbase_is_region_invalid_or_free(region)) {
+ if (kbase_is_region_invalid_or_free(region_ex)) {
ret = -ENOENT;
goto out_unlock_vm;
}
- if (buf_pages > (region->nr_pages -
- ((reg_ex->ex_buffer_base >> PAGE_SHIFT) -
- region->start_pfn))) {
+ if (buf_pages > (region_ex->nr_pages -
+ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
ret = -EINVAL;
goto out_unlock_vm;
}
- region = kbase_region_tracker_find_region_enclosing_address(
- kctx, reg_ex->ex_offset_var_addr);
- if (kbase_is_region_invalid_or_free(region)) {
+ region_ex = kbase_region_tracker_find_region_enclosing_address(
+ kctx, reg_ex->ex_offset_var_addr);
+ if (kbase_is_region_invalid_or_free(region_ex)) {
ret = -ENOENT;
goto out_unlock_vm;
}
@@ -582,6 +564,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->sb_status = 0;
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
+ atomic_set(&queue->pending, 0);
+
INIT_LIST_HEAD(&queue->link);
INIT_LIST_HEAD(&queue->error.link);
INIT_WORK(&queue->oom_event_work, oom_event_worker);
@@ -589,6 +573,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
list_add(&queue->link, &kctx->csf.queue_list);
region->flags |= KBASE_REG_NO_USER_FREE;
+ region->user_data = queue;
/* Initialize the cs_trace configuration parameters, When buffer_size
* is 0, trace is disabled. Here we only update the fields when
@@ -669,8 +654,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
queue = find_queue(kctx, term->buffer_gpu_addr);
if (queue) {
- unsigned long flags;
-
/* As the GPU queue has been terminated by the
* user space, undo the actions that were performed when the
* queue was registered i.e. remove the queue from the per
@@ -687,19 +670,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
/* After this the Userspace would be able to free the
* memory for GPU queue. In case the Userspace missed
* terminating the queue, the cleanup will happen on
- * context termination where teardown of region tracker
+ * context termination where tear down of region tracker
* would free up the GPU queue memory.
*/
queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ queue->queue_reg->user_data = NULL;
}
kbase_gpu_vm_unlock(kctx);
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
dev_dbg(kctx->kbdev->dev,
"Remove any pending command queue fatal from context %pK\n",
(void *)kctx);
- list_del_init(&queue->error.link);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+ kbase_csf_event_remove_error(kctx, &queue->error);
release_queue(queue);
}
@@ -781,6 +763,48 @@ static struct kbase_queue_group *get_bound_queue_group(
return group;
}
+/**
+ * pending_submission_worker() - Work item to process pending kicked GPU command queues.
+ *
+ * @work: Pointer to pending_submission_work.
+ *
+ * This function starts all pending queues, for which the work
+ * was previously submitted via ioctl call from application thread.
+ * If the queue is already scheduled and resident, it will be started
+ * right away, otherwise once the group is made resident.
+ */
+static void pending_submission_worker(struct work_struct *work)
+{
+ struct kbase_context *kctx =
+ container_of(work, struct kbase_context, csf.pending_submission_work);
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_queue *queue;
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+
+ if (err) {
+ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
+ return;
+ }
+
+ mutex_lock(&kctx->csf.lock);
+
+ /* Iterate through the queue list and schedule the pending ones for submission. */
+ list_for_each_entry(queue, &kctx->csf.queue_list, link) {
+ if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
+ struct kbase_queue_group *group = get_bound_queue_group(queue);
+
+ if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
+ dev_dbg(kbdev->dev, "queue is not bound to a group");
+ else
+ WARN_ON(kbase_csf_scheduler_queue_start(queue));
+ }
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+
+ kbase_reset_gpu_allow(kbdev);
+}
+
void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
{
if (WARN_ON(slot < 0))
@@ -846,40 +870,44 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
}
+static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
+{
+ queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
+}
+
int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_kick *kick)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_queue_group *group;
- struct kbase_queue *queue;
+ bool trigger_submission = false;
+ struct kbase_va_region *region;
int err = 0;
- err = kbase_reset_gpu_prevent_and_wait(kbdev);
- if (err) {
- dev_warn(
- kbdev->dev,
- "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
- kick->buffer_gpu_addr);
- return err;
- }
-
- mutex_lock(&kctx->csf.lock);
- queue = find_queue(kctx, kick->buffer_gpu_addr);
- if (!queue)
- err = -EINVAL;
+ /* GPU work submission happening asynchronously to prevent the contention with
+ * scheduler lock and as the result blocking application thread. For this reason,
+ * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
+ * from the context list of active va_regions.
+ * Once the target queue is found the pending flag is set to one atomically avoiding
+ * a race between submission ioctl thread and the work item.
+ */
+ kbase_gpu_vm_lock(kctx);
+ region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
+ if (!kbase_is_region_invalid_or_free(region)) {
+ struct kbase_queue *queue = region->user_data;
- if (!err) {
- group = get_bound_queue_group(queue);
- if (!group) {
- dev_err(kctx->kbdev->dev, "queue not bound\n");
- err = -EINVAL;
+ if (queue) {
+ atomic_cmpxchg(&queue->pending, 0, 1);
+ trigger_submission = true;
}
+ } else {
+ dev_dbg(kbdev->dev,
+ "Attempt to kick GPU queue without a valid command buffer region");
+ err = -EFAULT;
}
+ kbase_gpu_vm_unlock(kctx);
- if (!err)
- err = kbase_csf_scheduler_queue_start(queue);
- mutex_unlock(&kctx->csf.lock);
- kbase_reset_gpu_allow(kbdev);
+ if (likely(trigger_submission))
+ enqueue_gpu_submission_work(kctx);
return err;
}
@@ -1310,6 +1338,7 @@ static int create_queue_group(struct kbase_context *const kctx,
group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
group->faulted = false;
+
group->group_uid = generate_group_uid();
create->out.group_uid = group->group_uid;
@@ -1343,6 +1372,7 @@ static int create_queue_group(struct kbase_context *const kctx,
return group_handle;
}
+
int kbase_csf_queue_group_create(struct kbase_context *const kctx,
union kbase_ioctl_cs_queue_group_create *const create)
{
@@ -1368,6 +1398,9 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
"No CSG has at least %d CSs",
create->in.cs_min);
err = -EINVAL;
+ } else if (create->in.reserved) {
+ dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
+ err = -EINVAL;
} else {
/* For the CSG which satisfies the condition for having
* the needed number of CSs, check whether it also conforms
@@ -1517,6 +1550,19 @@ static void cancel_queue_group_events(struct kbase_queue_group *group)
cancel_work_sync(&group->protm_event_work);
}
+static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Remove any pending group fatal error from context %pK\n",
+ (void *)group->kctx);
+
+ kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
+ kbase_csf_event_remove_error(kctx, &group->error_timeout);
+ kbase_csf_event_remove_error(kctx, &group->error_fatal);
+}
+
void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
u8 group_handle)
{
@@ -1539,19 +1585,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
group = find_queue_group(kctx, group_handle);
if (group) {
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- dev_dbg(kbdev->dev,
- "Remove any pending group fatal error from context %pK\n",
- (void *)group->kctx);
-
- list_del_init(&group->error_tiler_oom.link);
- list_del_init(&group->error_timeout.link);
- list_del_init(&group->error_fatal.link);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
+ remove_pending_group_fatal_error(group);
term_queue_group(group);
kctx->csf.queue_groups[group_handle] = NULL;
}
@@ -1603,48 +1637,6 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
return err;
}
-/**
- * add_error() - Add an error to the list of errors to report to user space
- *
- * @kctx: Address of a base context associated with a GPU address space.
- * @error: Address of the item to be added to the context's pending error list.
- * @data: Error data to be returned to userspace.
- *
- * Does not wake up the event queue blocking a user thread in kbase_poll. This
- * is to make it more efficient to add multiple errors.
- *
- * The added error must not already be on the context's list of errors waiting
- * to be reported (e.g. because a previous error concerning the same object has
- * not yet been reported).
- */
-static void add_error(struct kbase_context *const kctx,
- struct kbase_csf_notification *const error,
- struct base_csf_notification const *const data)
-{
- unsigned long flags;
-
- if (WARN_ON(!kctx))
- return;
-
- if (WARN_ON(!error))
- return;
-
- if (WARN_ON(!data))
- return;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- if (!WARN_ON(!list_empty(&error->link))) {
- error->data = *data;
- list_add_tail(&error->link, &kctx->csf.error_list);
- dev_dbg(kctx->kbdev->dev,
- "Added error %pK of type %d in context %pK\n",
- (void *)error, data->type, (void *)kctx);
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
void kbase_csf_add_group_fatal_error(
struct kbase_queue_group *const group,
struct base_gpu_queue_group_error const *const err_payload)
@@ -1667,7 +1659,7 @@ void kbase_csf_add_group_fatal_error(
}
};
- add_error(group->kctx, &group->error_fatal, &error);
+ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
}
void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
@@ -1708,12 +1700,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
struct kbase_device *kbdev = kctx->kbdev;
int err = -ENOMEM;
- INIT_LIST_HEAD(&kctx->csf.event_callback_list);
INIT_LIST_HEAD(&kctx->csf.queue_list);
INIT_LIST_HEAD(&kctx->csf.link);
- INIT_LIST_HEAD(&kctx->csf.error_list);
- spin_lock_init(&kctx->csf.event_lock);
+ kbase_csf_event_init(kctx);
+
kctx->csf.user_reg_vma = NULL;
mutex_lock(&kbdev->pm.lock);
/* The inode information for /dev/malixx file is not available at the
@@ -1744,9 +1735,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
if (likely(!err)) {
err = kbase_csf_tiler_heap_context_init(kctx);
- if (likely(!err))
+ if (likely(!err)) {
mutex_init(&kctx->csf.lock);
- else
+ INIT_WORK(&kctx->csf.pending_submission_work,
+ pending_submission_worker);
+ } else
kbase_csf_kcpu_queue_context_term(kctx);
}
@@ -1829,7 +1822,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* for queue groups & kcpu queues, hence no need to explicitly remove
* those debugfs files.
*/
- kbase_csf_event_wait_remove_all(kctx);
/* Wait for a GPU reset if it is happening, prevent it if not happening */
err = kbase_reset_gpu_prevent_and_wait(kbdev);
@@ -1841,13 +1833,20 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
else
reset_prevented = true;
+ cancel_work_sync(&kctx->csf.pending_submission_work);
+
mutex_lock(&kctx->csf.lock);
+
/* Iterate through the queue groups that were not terminated by
* userspace and issue the term request to firmware for them.
*/
for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
- if (kctx->csf.queue_groups[i])
- term_queue_group(kctx->csf.queue_groups[i]);
+ struct kbase_queue_group *group = kctx->csf.queue_groups[i];
+
+ if (group) {
+ remove_pending_group_fatal_error(group);
+ term_queue_group(group);
+ }
}
mutex_unlock(&kctx->csf.lock);
@@ -1910,185 +1909,19 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
kbase_csf_scheduler_context_term(kctx);
+ kbase_csf_event_term(kctx);
mutex_destroy(&kctx->csf.lock);
}
-int kbase_csf_event_wait_add(struct kbase_context *kctx,
- kbase_csf_event_callback *callback, void *param)
-{
- int err = -ENOMEM;
- struct kbase_csf_event *event =
- kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL);
-
- if (event) {
- unsigned long flags;
-
- event->kctx = kctx;
- event->callback = callback;
- event->param = param;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
- list_add_tail(&event->link, &kctx->csf.event_callback_list);
- dev_dbg(kctx->kbdev->dev,
- "Added event handler %pK with param %pK\n", event,
- event->param);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- err = 0;
- }
-
- return err;
-}
-
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
- kbase_csf_event_callback *callback, void *param)
-{
- struct kbase_csf_event *event;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
- if ((event->callback == callback) && (event->param == param)) {
- list_del(&event->link);
- dev_dbg(kctx->kbdev->dev,
- "Removed event handler %pK with param %pK\n",
- event, event->param);
- kfree(event);
- break;
- }
- }
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
-bool kbase_csf_read_error(struct kbase_context *kctx,
- struct base_csf_notification *event_data)
-{
- bool got_event = true;
- struct kbase_csf_notification *error_data = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- if (likely(!list_empty(&kctx->csf.error_list))) {
- error_data = list_first_entry(&kctx->csf.error_list,
- struct kbase_csf_notification, link);
- list_del_init(&error_data->link);
- *event_data = error_data->data;
- dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
- (void *)error_data, (void *)kctx);
- } else {
- got_event = false;
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- return got_event;
-}
-
-bool kbase_csf_error_pending(struct kbase_context *kctx)
-{
- bool event_pended = false;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
- event_pended = !list_empty(&kctx->csf.error_list);
- dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
- event_pended ? "An" : "No", (void *)kctx);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- return event_pended;
-}
-
-static void sync_update_notify_gpu(struct kbase_context *kctx)
-{
- bool can_notify_gpu;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
- can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered;
-#ifdef KBASE_PM_RUNTIME
- if (kctx->kbdev->pm.backend.gpu_sleep_mode_active)
- can_notify_gpu = false;
-#endif
-
- if (can_notify_gpu) {
- kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
- }
-
- spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
-}
-
-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
-{
- struct kbase_csf_event *event, *next_event;
- unsigned long flags;
-
- dev_dbg(kctx->kbdev->dev,
- "Signal event (%s GPU notify) for context %pK\n",
- notify_gpu ? "with" : "without", (void *)kctx);
-
- /* First increment the signal count and wake up event thread.
- */
- atomic_set(&kctx->event_count, 1);
- kbase_event_wakeup(kctx);
-
- /* Signal the CSF firmware. This is to ensure that pending command
- * stream synch object wait operations are re-evaluated.
- * Write to GLB_DOORBELL would suffice as spec says that all pending
- * synch object wait operations are re-evaluated on a write to any
- * CS_DOORBELL/GLB_DOORBELL register.
- */
- if (notify_gpu)
- sync_update_notify_gpu(kctx);
-
- /* Now invoke the callbacks registered on backend side.
- * Allow item removal inside the loop, if requested by the callback.
- */
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- list_for_each_entry_safe(
- event, next_event, &kctx->csf.event_callback_list, link) {
- enum kbase_csf_event_callback_action action;
-
- dev_dbg(kctx->kbdev->dev,
- "Calling event handler %pK with param %pK\n",
- (void *)event, event->param);
- action = event->callback(event->param);
- if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
- list_del(&event->link);
- kfree(event);
- }
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
-{
- struct kbase_csf_event *event, *next_event;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- list_for_each_entry_safe(
- event, next_event, &kctx->csf.event_callback_list, link) {
- list_del(&event->link);
- dev_dbg(kctx->kbdev->dev,
- "Removed event handler %pK with param %pK\n",
- (void *)event, event->param);
- kfree(event);
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
/**
* handle_oom_event - Handle the OoM event generated by the firmware for the
* CSI.
*
+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
+ * @stream: Pointer to the structure containing info provided by the firmware
+ * about the CSI.
+ *
* This function will handle the OoM event request from the firmware for the
* CS. It will retrieve the address of heap context and heap's
* statistics (like number of render passes in-flight) from the CS's kernel
@@ -2097,10 +1930,6 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
* It will also update the CS's kernel input page with the address
* of a new chunk that was allocated.
*
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @stream: Pointer to the structure containing info provided by the firmware
- * about the CSI.
- *
* Return: 0 if successfully handled the request, otherwise a negative error
* code on failure.
*/
@@ -2171,7 +2000,9 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
} } } };
- add_error(group->kctx, &group->error_tiler_oom, &error);
+ kbase_csf_event_add_error(group->kctx,
+ &group->error_tiler_oom,
+ &error);
kbase_event_wakeup(group->kctx);
}
@@ -2316,7 +2147,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group)
"Notify the event notification thread, forward progress timeout (%llu cycles)\n",
kbase_csf_timeout_get(group->kctx->kbdev));
- add_error(group->kctx, &group->error_timeout, &error);
+ kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
kbase_event_wakeup(group->kctx);
}
@@ -2452,7 +2283,7 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
}
};
- add_error(queue->kctx, &queue->error, &error);
+ kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
kbase_event_wakeup(queue->kctx);
}
@@ -3008,6 +2839,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
int non_idle_offslot_grps;
bool can_suspend_on_idle;
+
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
kbase_csf_firmware_global_input_mask(
global_iface, GLB_REQ, glb_ack,
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 640d2ed..e3db81d 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -26,6 +26,7 @@
#include "mali_kbase_csf_scheduler.h"
#include "mali_kbase_csf_firmware.h"
#include "mali_kbase_csf_protected_memory.h"
+#include "mali_kbase_hwaccess_time.h"
/* Indicate invalid CS h/w interface
*/
@@ -47,129 +48,6 @@
#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
/**
- * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
- *
- * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly.
- * It doesn't correspond to any action or type of event callback.
- *
- * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered.
- *
- * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed
- * immediately upon return.
- *
- * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly.
- * It doesn't correspond to any action or type of event callback.
- */
-enum kbase_csf_event_callback_action {
- KBASE_CSF_EVENT_CALLBACK_FIRST = 0,
- KBASE_CSF_EVENT_CALLBACK_KEEP,
- KBASE_CSF_EVENT_CALLBACK_REMOVE,
- KBASE_CSF_EVENT_CALLBACK_LAST,
-};
-
-/**
- * kbase_csf_event_callback_action - type for callback functions to be
- * called upon CSF events.
- *
- * This is the type of callback functions that can be registered
- * for CSF events. These function calls shall be triggered by any call
- * to kbase_csf_event_signal.
- *
- * @param: Generic parameter to pass to the callback function.
- *
- * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain
- * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed.
- */
-typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param);
-
-/**
- * kbase_csf_event_wait_add - Add a CSF event callback
- *
- * This function adds an event callback to the list of CSF event callbacks
- * belonging to a given Kbase context, to be triggered when a CSF event is
- * signalled by kbase_csf_event_signal.
- *
- * @kctx: The Kbase context the @callback should be registered to.
- * @callback: The callback function to register.
- * @param: Custom parameter to be passed to the @callback function.
- *
- * Return: 0 on success, or negative on failure.
- */
-int kbase_csf_event_wait_add(struct kbase_context *kctx,
- kbase_csf_event_callback *callback, void *param);
-
-/**
- * kbase_csf_event_wait_remove - Remove a CSF event callback
- *
- * This function removes an event callback from the list of CSF event callbacks
- * belonging to a given Kbase context.
- *
- * @kctx: The kbase context the @callback should be removed from.
- * @callback: The callback function to remove.
- * @param: Custom parameter that would have been passed to the @p callback
- * function.
- */
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
- kbase_csf_event_callback *callback, void *param);
-
-/**
- * kbase_csf_event_wait_remove_all - Removes all CSF event callbacks
- *
- * This function empties the list of CSF event callbacks belonging to a given
- * Kbase context.
- *
- * @kctx: The kbase context for which CSF event callbacks have to be removed.
- */
-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx);
-
-/**
- * kbase_csf_read_error - Read CS fatal error
- *
- * This function takes the CS fatal error from context's ordered
- * error_list, copies its contents to @event_data.
- *
- * @kctx: The kbase context to read fatal error from
- * @event_data: Caller-provided buffer to copy the fatal error to
- *
- * Return: true if fatal error is read successfully.
- */
-bool kbase_csf_read_error(struct kbase_context *kctx,
- struct base_csf_notification *event_data);
-
-/**
- * kbase_csf_error_pending - Check whether fatal error is pending
- *
- * @kctx: The kbase context to check fatal error upon.
- *
- * Return: true if fatal error is pending.
- */
-bool kbase_csf_error_pending(struct kbase_context *kctx);
-
-/**
- * kbase_csf_event_signal - Signal a CSF event
- *
- * This function triggers all the CSF event callbacks that are registered to
- * a given Kbase context, and also signals the event handling thread of
- * userspace driver waiting for the CSF event.
- *
- * @kctx: The kbase context whose CSF event callbacks shall be triggered.
- * @notify_gpu: Flag to indicate if CSF firmware should be notified of the
- * signaling of event that happened on the Driver side, either
- * the signal came from userspace or from kcpu queues.
- */
-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu);
-
-static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx)
-{
- kbase_csf_event_signal(kctx, true);
-}
-
-static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
-{
- kbase_csf_event_signal(kctx, false);
-}
-
-/**
* kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space.
*
* @kctx: Pointer to the kbase context which is being initialized.
@@ -182,11 +60,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx);
* kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon
* GPU bus fault, MMU page fault or similar.
*
- * This function terminates all GPU command queue groups in the context and
- * notifies the event notification thread of the fault.
- *
* @kctx: Pointer to faulty kbase context.
* @fault: Pointer to the fault.
+ *
+ * This function terminates all GPU command queue groups in the context and
+ * notifies the event notification thread of the fault.
*/
void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
struct kbase_fault *fault);
@@ -194,10 +72,10 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
/**
* kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
*
+ * @kctx: Pointer to the kbase context which is being terminated.
+ *
* This function terminates any remaining CSGs and CSs which weren't destroyed
* before context termination.
- *
- * @kctx: Pointer to the kbase context which is being terminated.
*/
void kbase_csf_ctx_term(struct kbase_context *kctx);
@@ -246,14 +124,14 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
* kbase_csf_alloc_command_stream_user_pages - Allocate resources for a
* GPU command queue.
*
- * This function allocates a pair of User mode input/output pages for a
- * GPU command queue and maps them in the shared interface segment of MCU
- * firmware address space. Also reserves a hardware doorbell page for the queue.
- *
* @kctx: Pointer to the kbase context within which the resources
* for the queue are being allocated.
* @queue: Pointer to the queue for which to allocate resources.
*
+ * This function allocates a pair of User mode input/output pages for a
+ * GPU command queue and maps them in the shared interface segment of MCU
+ * firmware address space. Also reserves a hardware doorbell page for the queue.
+ *
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
@@ -294,9 +172,9 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue);
/**
* kbase_csf_queue_kick - Schedule a GPU command queue on the firmware
*
- * @kctx: The kbase context.
- * @kick: Pointer to the struct which specifies the queue
- * that needs to be scheduled.
+ * @kctx: The kbase context.
+ * @kick: Pointer to the struct which specifies the queue
+ * that needs to be scheduled.
*
* Return: 0 on success, or negative on failure.
*/
@@ -307,12 +185,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
* kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
* is valid.
*
- * This function is used to determine if the queue group handle is valid.
- *
* @kctx: The kbase context under which the queue group exists.
* @group_handle: Handle for the group which uniquely identifies it within
* the context with which it was created.
*
+ * This function is used to determine if the queue group handle is valid.
+ *
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
@@ -359,8 +237,6 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
/**
* kbase_csf_queue_group_suspend - Suspend a GPU command queue group
*
- * This function is used to suspend a queue group and copy the suspend buffer.
- *
* @kctx: The kbase context for which the queue group is to be
* suspended.
* @sus_buf: Pointer to the structure which contains details of the
@@ -368,6 +244,8 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
* @group_handle: Handle for the group which uniquely identifies it within
* the context within which it was created.
*
+ * This function is used to suspend a queue group and copy the suspend buffer.
+ *
* Return: 0 on success or negative value if failed to suspend
* queue group and copy suspend buffer contents.
*/
@@ -397,12 +275,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);
* the update of userspace mapping of HW
* doorbell page.
*
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
* The function creates a file and allocates a dummy page to facilitate the
* update of userspace mapping to point to the dummy page instead of the real
* HW doorbell page after the suspend of queue group.
*
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev);
@@ -420,14 +298,14 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev);
* instead of the User register page after
* the GPU power down.
*
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
* The function allocates a dummy page which is used to replace the User
* register page in the userspace mapping after the power down of GPU.
* On the power up of GPU, the mapping is updated to point to the real
* User register page. The mapping is used to allow access to LATEST_FLUSH
* register from userspace.
*
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
@@ -443,10 +321,10 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
/**
* kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
*
- * The function kicks a notification on the CSG interface to firmware.
- *
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @slot: Index of CSG interface for ringing the door-bell.
+ *
+ * The function kicks a notification on the CSG interface to firmware.
*/
void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);
@@ -454,10 +332,10 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);
* kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG
* interfaces.
*
- * The function kicks a notification on a set of CSG interfaces to firmware.
- *
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc.
+ *
+ * The function kicks a notification on a set of CSG interfaces to firmware.
*/
void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
u32 slot_bitmap);
@@ -466,9 +344,6 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
* kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI
* assigned to a GPU queue
*
- * The function sends a doorbell interrupt notification to the firmware for
- * a CSI assigned to a GPU queue.
- *
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @csi_index: ID of the CSI assigned to the GPU queue.
* @csg_nr: Index of the CSG slot assigned to the queue
@@ -479,6 +354,9 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
* The flag is supposed be false only when the input page
* for bound GPU queues is programmed at the time of
* starting/resuming the group on a CSG slot.
+ *
+ * The function sends a doorbell interrupt notification to the firmware for
+ * a CSI assigned to a GPU queue.
*/
void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
int csi_index, int csg_nr,
@@ -488,11 +366,11 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
* kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a
* queue.
*
- * The function kicks a notification to the firmware on the doorbell assigned
- * to the queue.
- *
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @queue: Pointer to the queue for ringing the door-bell.
+ *
+ * The function kicks a notification to the firmware on the doorbell assigned
+ * to the queue.
*/
void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
struct kbase_queue *queue);
@@ -563,5 +441,23 @@ static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority
return kbasep_csf_queue_group_priority_to_relative[priority];
}
-
+/**
+ * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter
+ * value for Ktrace purpose.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function is just a wrapper to retreive the GPU cycle counter value, to
+ * avoid any overhead on Release builds where Ktrace is disabled by default.
+ *
+ * Return: Snapshot of the GPU cycle count register.
+ */
+static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
+{
+#if KBASE_KTRACE_ENABLE
+ return kbase_backend_get_cycle_cnt(kbdev);
+#else
+ return 0;
+#endif
+}
#endif /* _KBASE_CSF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index de471eb..0712648 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -30,6 +30,7 @@
#include <linux/wait.h>
#include "mali_kbase_csf_firmware.h"
+#include "mali_kbase_csf_event.h"
/* Maximum number of KCPU command queues to be created per GPU address space.
*/
@@ -331,6 +332,7 @@ struct kbase_csf_notification {
* queue.
* @cs_fatal_info: Records additional information about the CS fatal event.
* @cs_fatal: Records information about the CS fatal event.
+ * @pending: Indicating whether the queue has new submitted work.
*/
struct kbase_queue {
struct kbase_context *kctx;
@@ -364,6 +366,7 @@ struct kbase_queue {
struct work_struct fatal_event_work;
u64 cs_fatal_info;
u32 cs_fatal;
+ atomic_t pending;
};
/**
@@ -487,6 +490,7 @@ struct kbase_queue_group {
struct kbase_csf_notification error_tiler_oom;
struct work_struct timer_event_work;
+
};
/**
@@ -538,10 +542,6 @@ struct kbase_csf_cpu_queue_context {
/**
* struct kbase_csf_heap_context_allocator - Allocator of heap contexts
*
- * Heap context structures are allocated by the kernel for use by the firmware.
- * The current implementation subdivides a single GPU memory region for use as
- * a sparse array.
- *
* @kctx: Pointer to the kbase context with which this allocator is
* associated.
* @region: Pointer to a GPU memory region from which heap context structures
@@ -552,6 +552,10 @@ struct kbase_csf_cpu_queue_context {
* @lock: Lock preventing concurrent access to the @in_use bitmap.
* @in_use: Bitmap that indicates which heap context structures are currently
* allocated (in @region).
+ *
+ * Heap context structures are allocated by the kernel for use by the firmware.
+ * The current implementation subdivides a single GPU memory region for use as
+ * a sparse array.
*/
struct kbase_csf_heap_context_allocator {
struct kbase_context *kctx;
@@ -565,10 +569,6 @@ struct kbase_csf_heap_context_allocator {
* struct kbase_csf_tiler_heap_context - Object representing the tiler heaps
* context for a GPU address space.
*
- * This contains all of the CSF state relating to chunked tiler heaps for one
- * @kbase_context. It is not the same as a heap context structure allocated by
- * the kernel for use by the firmware.
- *
* @lock: Lock to prevent the concurrent access to tiler heaps (after the
* initialization), a tiler heap can be terminated whilst an OoM
* event is being handled for it.
@@ -576,6 +576,10 @@ struct kbase_csf_heap_context_allocator {
* @ctx_alloc: Allocator for heap context structures.
* @nr_of_heaps: Total number of tiler heaps that were added during the
* life time of the context.
+ *
+ * This contains all of the CSF state relating to chunked tiler heaps for one
+ * @kbase_context. It is not the same as a heap context structure allocated by
+ * the kernel for use by the firmware.
*/
struct kbase_csf_tiler_heap_context {
struct mutex lock;
@@ -617,6 +621,43 @@ struct kbase_csf_scheduler_context {
};
/**
+ * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly.
+ * It doesn't correspond to any action or type of event callback.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed
+ * immediately upon return.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly.
+ * It doesn't correspond to any action or type of event callback.
+ */
+enum kbase_csf_event_callback_action {
+ KBASE_CSF_EVENT_CALLBACK_FIRST = 0,
+ KBASE_CSF_EVENT_CALLBACK_KEEP,
+ KBASE_CSF_EVENT_CALLBACK_REMOVE,
+ KBASE_CSF_EVENT_CALLBACK_LAST,
+};
+
+/**
+ * struct kbase_csf_event - Object representing CSF event and error
+ *
+ * @callback_list: List of callbacks which are registered to serve CSF
+ * events.
+ * @error_list: List for CS fatal errors in CSF context.
+ * Link of fatal error is &struct_kbase_csf_notification.link.
+ * @lock: Lock protecting access to @callback_list and
+ * @error_list.
+ */
+struct kbase_csf_event {
+ struct list_head callback_list;
+ struct list_head error_list;
+ spinlock_t lock;
+};
+
+/**
* struct kbase_csf_context - Object representing CSF for a GPU address space.
*
* @event_pages_head: A list of pages allocated for the event memory used by
@@ -647,10 +688,7 @@ struct kbase_csf_scheduler_context {
* userspace mapping created for them on bind operation
* hasn't been removed.
* @kcpu_queues: Kernel CPU command queues.
- * @event_lock: Lock protecting access to @event_callback_list and
- * @error_list.
- * @event_callback_list: List of callbacks which are registered to serve CSF
- * events.
+ * @event: CSF event object.
* @tiler_heaps: Chunked tiler memory heaps.
* @wq: Dedicated workqueue to process work items corresponding
* to the OoM events raised for chunked tiler heaps being
@@ -661,10 +699,7 @@ struct kbase_csf_scheduler_context {
* of the USER register page. Currently used only for sanity
* checking.
* @sched: Object representing the scheduler's context
- * @error_list: List for CS fatal errors in this context.
- * Link of fatal error is
- * &struct_kbase_csf_notification.link.
- * @event_lock needs to be held to access this list.
+ * @pending_submission_work: Work item to process pending kicked GPU command queues.
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
*/
@@ -677,14 +712,13 @@ struct kbase_csf_context {
struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM];
struct list_head queue_list;
struct kbase_csf_kcpu_queue_context kcpu_queues;
- spinlock_t event_lock;
- struct list_head event_callback_list;
+ struct kbase_csf_event event;
struct kbase_csf_tiler_heap_context tiler_heaps;
struct workqueue_struct *wq;
struct list_head link;
struct vm_area_struct *user_reg_vma;
struct kbase_csf_scheduler_context sched;
- struct list_head error_list;
+ struct work_struct pending_submission_work;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_cpu_queue_context cpu_queue;
#endif
@@ -882,12 +916,12 @@ struct kbase_csf_scheduler {
bool tick_timer_active;
};
-/**
+/*
* Number of GPU cycles per unit of the global progress timeout.
*/
#define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024)
-/**
+/*
* Maximum value of the global progress timeout.
*/
#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \
@@ -895,12 +929,12 @@ struct kbase_csf_scheduler {
GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \
GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
-/**
+/*
* Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
*/
#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)
-/**
+/*
* In typical operations, the management of the shader core power transitions
* is delegated to the MCU/firmware. However, if the host driver is configured
* to take direct control, one needs to disable the MCU firmware GLB_PWROFF
@@ -911,7 +945,7 @@ struct kbase_csf_scheduler {
/* Index of the GPU_ACTIVE counter within the CSHW counter block */
#define GPU_ACTIVE_CNT_IDX (4)
-/**
+/*
* Maximum number of sessions that can be managed by the IPA Control component.
*/
#if MALI_UNIT_TEST
@@ -937,13 +971,13 @@ enum kbase_ipa_core_type {
KBASE_IPA_CORE_TYPE_NUM
};
-/**
+/*
* Number of configurable counters per type of block on the IPA Control
* interface.
*/
#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8)
-/**
+/*
* Total number of configurable counters existing on the IPA Control interface.
*/
#define KBASE_IPA_CONTROL_MAX_COUNTERS \
diff --git a/mali_kbase/csf/mali_kbase_csf_event.c b/mali_kbase/csf/mali_kbase_csf_event.c
new file mode 100644
index 0000000..5c86688
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_event.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#include <mali_kbase.h>
+#include "mali_kbase_csf_event.h"
+
+/**
+ * struct kbase_csf_event_cb - CSF event callback.
+ *
+ * @link: Link to the rest of the list.
+ * @kctx: Pointer to the Kbase context this event belongs to.
+ * @callback: Callback function to call when a CSF event is signalled.
+ * @param: Parameter to pass to the callback function.
+ *
+ * This structure belongs to the list of events which is part of a Kbase
+ * context, and describes a callback function with a custom parameter to pass
+ * to it when a CSF event is signalled.
+ */
+struct kbase_csf_event_cb {
+ struct list_head link;
+ struct kbase_context *kctx;
+ kbase_csf_event_callback *callback;
+ void *param;
+};
+
+int kbase_csf_event_wait_add(struct kbase_context *kctx,
+ kbase_csf_event_callback *callback, void *param)
+{
+ int err = -ENOMEM;
+ struct kbase_csf_event_cb *event_cb =
+ kzalloc(sizeof(struct kbase_csf_event_cb), GFP_KERNEL);
+
+ if (event_cb) {
+ unsigned long flags;
+
+ event_cb->kctx = kctx;
+ event_cb->callback = callback;
+ event_cb->param = param;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+ list_add_tail(&event_cb->link, &kctx->csf.event.callback_list);
+ dev_dbg(kctx->kbdev->dev,
+ "Added event handler %pK with param %pK\n", event_cb,
+ event_cb->param);
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+
+ err = 0;
+ }
+
+ return err;
+}
+
+void kbase_csf_event_wait_remove(struct kbase_context *kctx,
+ kbase_csf_event_callback *callback, void *param)
+{
+ struct kbase_csf_event_cb *event_cb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+
+ list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) {
+ if ((event_cb->callback == callback) && (event_cb->param == param)) {
+ list_del(&event_cb->link);
+ dev_dbg(kctx->kbdev->dev,
+ "Removed event handler %pK with param %pK\n",
+ event_cb, event_cb->param);
+ kfree(event_cb);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+static void sync_update_notify_gpu(struct kbase_context *kctx)
+{
+ bool can_notify_gpu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+ can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered;
+#ifdef KBASE_PM_RUNTIME
+ if (kctx->kbdev->pm.backend.gpu_sleep_mode_active)
+ can_notify_gpu = false;
+#endif
+
+ if (can_notify_gpu) {
+ kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
+ KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
+ }
+
+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+}
+
+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
+{
+ struct kbase_csf_event_cb *event_cb, *next_event_cb;
+ unsigned long flags;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Signal event (%s GPU notify) for context %pK\n",
+ notify_gpu ? "with" : "without", (void *)kctx);
+
+ /* First increment the signal count and wake up event thread.
+ */
+ atomic_set(&kctx->event_count, 1);
+ kbase_event_wakeup(kctx);
+
+ /* Signal the CSF firmware. This is to ensure that pending command
+ * stream synch object wait operations are re-evaluated.
+ * Write to GLB_DOORBELL would suffice as spec says that all pending
+ * synch object wait operations are re-evaluated on a write to any
+ * CS_DOORBELL/GLB_DOORBELL register.
+ */
+ if (notify_gpu)
+ sync_update_notify_gpu(kctx);
+
+ /* Now invoke the callbacks registered on backend side.
+ * Allow item removal inside the loop, if requested by the callback.
+ */
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+
+ list_for_each_entry_safe(
+ event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
+ enum kbase_csf_event_callback_action action;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Calling event handler %pK with param %pK\n",
+ (void *)event_cb, event_cb->param);
+ action = event_cb->callback(event_cb->param);
+ if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
+ list_del(&event_cb->link);
+ kfree(event_cb);
+ }
+ }
+
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+void kbase_csf_event_term(struct kbase_context *kctx)
+{
+ struct kbase_csf_event_cb *event_cb, *next_event_cb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+
+ list_for_each_entry_safe(
+ event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
+ list_del(&event_cb->link);
+ dev_warn(kctx->kbdev->dev,
+ "Removed event handler %pK with param %pK\n",
+ (void *)event_cb, event_cb->param);
+ kfree(event_cb);
+ }
+
+ WARN_ON(!list_empty(&kctx->csf.event.error_list));
+
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+void kbase_csf_event_init(struct kbase_context *const kctx)
+{
+ INIT_LIST_HEAD(&kctx->csf.event.callback_list);
+ INIT_LIST_HEAD(&kctx->csf.event.error_list);
+ spin_lock_init(&kctx->csf.event.lock);
+}
+
+void kbase_csf_event_remove_error(struct kbase_context *kctx,
+ struct kbase_csf_notification *error)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+ list_del_init(&error->link);
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+bool kbase_csf_event_read_error(struct kbase_context *kctx,
+ struct base_csf_notification *event_data)
+{
+ struct kbase_csf_notification *error_data = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+ if (likely(!list_empty(&kctx->csf.event.error_list))) {
+ error_data = list_first_entry(&kctx->csf.event.error_list,
+ struct kbase_csf_notification, link);
+ list_del_init(&error_data->link);
+ *event_data = error_data->data;
+ dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
+ (void *)error_data, (void *)kctx);
+ }
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+ return !!error_data;
+}
+
+void kbase_csf_event_add_error(struct kbase_context *const kctx,
+ struct kbase_csf_notification *const error,
+ struct base_csf_notification const *const data)
+{
+ unsigned long flags;
+
+ if (WARN_ON(!kctx))
+ return;
+
+ if (WARN_ON(!error))
+ return;
+
+ if (WARN_ON(!data))
+ return;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+ if (!WARN_ON(!list_empty(&error->link))) {
+ error->data = *data;
+ list_add_tail(&error->link, &kctx->csf.event.error_list);
+ dev_dbg(kctx->kbdev->dev,
+ "Added error %pK of type %d in context %pK\n",
+ (void *)error, data->type, (void *)kctx);
+ }
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+bool kbase_csf_event_error_pending(struct kbase_context *kctx)
+{
+ bool error_pending = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event.lock, flags);
+ error_pending = !list_empty(&kctx->csf.event.error_list);
+
+ dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
+ error_pending ? "An" : "No", (void *)kctx);
+
+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+
+ return error_pending;
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_event.h b/mali_kbase/csf/mali_kbase_csf_event.h
new file mode 100644
index 0000000..1270ef6
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_event.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_EVENT_H_
+#define _KBASE_CSF_EVENT_H_
+
+#include <linux/types.h>
+#include <linux/wait.h>
+
+struct kbase_context;
+struct kbase_csf_event;
+enum kbase_csf_event_callback_action;
+
+/**
+ * kbase_csf_event_callback_action - type for callback functions to be
+ * called upon CSF events.
+ * @param: Generic parameter to pass to the callback function.
+ *
+ * This is the type of callback functions that can be registered
+ * for CSF events. These function calls shall be triggered by any call
+ * to kbase_csf_event_signal.
+ *
+ * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain
+ * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed.
+ */
+typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param);
+
+/**
+ * kbase_csf_event_wait_add - Add a CSF event callback
+ *
+ * @kctx: The Kbase context the @callback should be registered to.
+ * @callback: The callback function to register.
+ * @param: Custom parameter to be passed to the @callback function.
+ *
+ * This function adds an event callback to the list of CSF event callbacks
+ * belonging to a given Kbase context, to be triggered when a CSF event is
+ * signalled by kbase_csf_event_signal.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_event_wait_add(struct kbase_context *kctx,
+ kbase_csf_event_callback *callback, void *param);
+
+/**
+ * kbase_csf_event_wait_remove - Remove a CSF event callback
+ *
+ * @kctx: The kbase context the @callback should be removed from.
+ * @callback: The callback function to remove.
+ * @param: Custom parameter that would have been passed to the @p callback
+ * function.
+ *
+ * This function removes an event callback from the list of CSF event callbacks
+ * belonging to a given Kbase context.
+ */
+void kbase_csf_event_wait_remove(struct kbase_context *kctx,
+ kbase_csf_event_callback *callback, void *param);
+
+/**
+ * kbase_csf_event_term - Removes all CSF event callbacks
+ *
+ * @kctx: The kbase context for which CSF event callbacks have to be removed.
+ *
+ * This function empties the list of CSF event callbacks belonging to a given
+ * Kbase context.
+ */
+void kbase_csf_event_term(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_event_signal - Signal a CSF event
+ *
+ * @kctx: The kbase context whose CSF event callbacks shall be triggered.
+ * @notify_gpu: Flag to indicate if CSF firmware should be notified of the
+ * signaling of event that happened on the Driver side, either
+ * the signal came from userspace or from kcpu queues.
+ *
+ * This function triggers all the CSF event callbacks that are registered to
+ * a given Kbase context, and also signals the event handling thread of
+ * userspace driver waiting for the CSF event.
+ */
+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu);
+
+static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx)
+{
+ kbase_csf_event_signal(kctx, true);
+}
+
+static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
+{
+ kbase_csf_event_signal(kctx, false);
+}
+
+/**
+ * kbase_csf_event_init - Initialize event object
+ *
+ * This function initializes the event object.
+ *
+ * @kctx: The kbase context whose event object will be initialized.
+ */
+void kbase_csf_event_init(struct kbase_context *const kctx);
+
+struct kbase_csf_notification;
+struct base_csf_notification;
+/**
+ * kbase_csf_event_read_error - Read and remove an error from error list in event
+ *
+ * @kctx: The kbase context.
+ * @event_data: Caller-provided buffer to copy the fatal error to
+ *
+ * This function takes the CS fatal error from context's ordered
+ * error_list, copies its contents to @event_data.
+ *
+ * Return: true if error is read out or false if there is no error in error list.
+ */
+bool kbase_csf_event_read_error(struct kbase_context *kctx,
+ struct base_csf_notification *event_data);
+
+/**
+ * kbase_csf_event_add_error - Add an error into event error list
+ *
+ * @kctx: Address of a base context associated with a GPU address space.
+ * @error: Address of the item to be added to the context's pending error list.
+ * @data: Error data to be returned to userspace.
+ *
+ * Does not wake up the event queue blocking a user thread in kbase_poll. This
+ * is to make it more efficient to add multiple errors.
+ *
+ * The added error must not already be on the context's list of errors waiting
+ * to be reported (e.g. because a previous error concerning the same object has
+ * not yet been reported).
+ *
+ */
+void kbase_csf_event_add_error(struct kbase_context *const kctx,
+ struct kbase_csf_notification *const error,
+ struct base_csf_notification const *const data);
+
+/**
+ * kbase_csf_event_remove_error - Remove an error from event error list
+ *
+ * @kctx: Address of a base context associated with a GPU address space.
+ * @error: Address of the item to be removed from the context's event error list.
+ */
+void kbase_csf_event_remove_error(struct kbase_context *kctx,
+ struct kbase_csf_notification *error);
+
+/**
+ * kbase_csf_event_error_pending - Check the error pending status
+ *
+ * @kctx: The kbase context to check fatal error upon.
+ *
+ * Return: true if there is error in the list.
+ */
+bool kbase_csf_event_error_pending(struct kbase_context *kctx);
+#endif /* _KBASE_CSF_EVENT_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 785555c..202c677 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -31,6 +31,7 @@
#include "device/mali_kbase_device.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
#include "tl/mali_kbase_timeline_priv.h"
+#include "tl/mali_kbase_tracepoints.h"
#include "mali_kbase_csf_tl_reader.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
@@ -157,8 +158,7 @@ static bool entry_optional(u32 header)
}
/**
- * struct firmware_timeline_metadata -
- * Timeline metadata item within the MCU firmware
+ * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware
*
* @node: List head linking all timeline metadata to
* kbase_device:csf.firmware_timeline_metadata.
@@ -217,10 +217,11 @@ static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val)
return (max_loops == 0) ? -1 : 0;
}
-void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
{
- if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0)
- dev_err(kbdev->dev, "MCU failed to get disabled");
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
}
static void wait_for_firmware_stop(struct kbase_device *kbdev)
@@ -229,6 +230,13 @@ static void wait_for_firmware_stop(struct kbase_device *kbdev)
/* This error shall go away once MIDJM-2371 is closed */
dev_err(kbdev->dev, "Firmware failed to stop");
}
+
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+}
+
+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
+{
+ wait_for_firmware_stop(kbdev);
}
static void stop_csf_firmware(struct kbase_device *kbdev)
@@ -463,16 +471,16 @@ out:
/**
* parse_memory_setup_entry() - Process an "interface memory setup" section
*
+ * @kbdev: Kbase device structure
+ * @fw: The firmware image containing the section
+ * @entry: Pointer to the start of the section
+ * @size: Size (in bytes) of the section
+ *
* Read an "interface memory setup" section from the firmware image and create
* the necessary memory region including the MMU page tables. If successful
* the interface will be added to the kbase_device:csf.firmware_interfaces list.
*
* Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device structure
- * @fw: The firmware image containing the section
- * @entry: Pointer to the start of the section
- * @size: Size (in bytes) of the section
*/
static int parse_memory_setup_entry(struct kbase_device *kbdev,
const struct firmware *fw,
@@ -724,6 +732,11 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
/**
* load_firmware_entry() - Process an entry from a firmware image
*
+ * @kbdev: Kbase device
+ * @fw: Firmware image containing the entry
+ * @offset: Byte offset within the image of the entry to load
+ * @header: Header word of the entry
+ *
* Read an entry from a firmware image and do any necessary work (e.g. loading
* the data into page accessible to the MCU).
*
@@ -731,11 +744,6 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
* otherwise the function will fail with -EINVAL
*
* Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device
- * @fw: Firmware image containing the entry
- * @offset: Byte offset within the image of the entry to load
- * @header: Header word of the entry
*/
static int load_firmware_entry(struct kbase_device *kbdev,
const struct firmware *fw,
@@ -784,18 +792,6 @@ static int load_firmware_entry(struct kbase_device *kbdev,
}
return kbase_csf_firmware_cfg_option_entry_parse(
kbdev, fw, entry, size, updatable);
- case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST:
-#ifndef MALI_KBASE_BUILD
- /* FW UTF option */
- if (size < 2*sizeof(*entry)) {
- dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n",
- size);
- return -EINVAL;
- }
- return mali_kutf_process_fw_utf_entry(kbdev, fw->data,
- fw->size, entry);
-#endif
- break;
case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER:
/* Trace buffer */
if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) {
@@ -1170,6 +1166,7 @@ u32 kbase_csf_firmware_csg_output(
dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
return val;
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);
void kbase_csf_firmware_global_input(
const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -1180,6 +1177,7 @@ void kbase_csf_firmware_global_input(
dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value);
input_page_write(iface->input, offset, value);
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);
void kbase_csf_firmware_global_input_mask(
const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -1191,6 +1189,7 @@ void kbase_csf_firmware_global_input_mask(
offset, value, mask);
input_page_partial_write(iface->input, offset, value, mask);
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);
u32 kbase_csf_firmware_global_input_read(
const struct kbase_csf_global_iface *const iface, const u32 offset)
@@ -1211,6 +1210,7 @@ u32 kbase_csf_firmware_global_output(
dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
return val;
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
/**
* handle_internal_firmware_fatal - Handler for CS internal firmware fault.
@@ -1484,8 +1484,7 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev)
}
/**
- * kbase_csf_firmware_reload_worker() -
- * reload the fw image and re-enable the MCU
+ * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU
* @work: CSF Work item for reloading the firmware.
*
* This helper function will reload the firmware image and re-enable the MCU.
@@ -1505,6 +1504,8 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
dev_info(kbdev->dev, "reloading firmware");
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
/* Reload just the data sections from firmware binary image */
err = reload_fw_data_sections(kbdev);
if (err)
@@ -2017,10 +2018,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
kfree(metadata);
}
-#ifndef MALI_KBASE_BUILD
- mali_kutf_fw_utf_entry_cleanup(kbdev);
-#endif
-
/* This will also free up the region allocated for the shared interface
* entry parsed from the firmware image.
*/
@@ -2144,6 +2141,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
unsigned long flags;
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Validate there are no on-slot groups when sending the
* halt request to firmware.
@@ -2155,12 +2154,25 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
+{
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+ /* Trigger the boot of MCU firmware, Use the AUTO mode as
+ * otherwise on fast reset, to exit protected mode, MCU will
+ * not reboot by itself to enter normal mode.
+ */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
+}
+
#ifdef KBASE_PM_RUNTIME
void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
unsigned long flags;
+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
kbase_csf_scheduler_spin_lock(kbdev, &flags);
set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
dev_dbg(kbdev->dev, "Sending sleep request to MCU");
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 0edcc30..f4ce33c 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -346,14 +346,14 @@ static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev,
/**
* kbase_csf_read_firmware_memory - Read a value in a GPU address
*
+ * @kbdev: Device pointer
+ * @gpu_addr: GPU address to read
+ * @value: output pointer to which the read value will be written.
+ *
* This function read a value in a GPU address that belongs to
* a private firmware memory region. The function assumes that the location
* is not permanently mapped on the CPU address space, therefore it maps it
* and then unmaps it to access it independently.
- *
- * @kbdev: Device pointer
- * @gpu_addr: GPU address to read
- * @value: output pointer to which the read value will be written.
*/
void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value);
@@ -361,14 +361,14 @@ void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
/**
* kbase_csf_update_firmware_memory - Write a value in a GPU address
*
+ * @kbdev: Device pointer
+ * @gpu_addr: GPU address to write
+ * @value: Value to write
+ *
* This function writes a given value in a GPU address that belongs to
* a private firmware memory region. The function assumes that the destination
* is not permanently mapped on the CPU address space, therefore it maps it
* and then unmaps it to access it independently.
- *
- * @kbdev: Device pointer
- * @gpu_addr: GPU address to write
- * @value: Value to write
*/
void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 value);
@@ -404,20 +404,20 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
*
- * The function sends the ping request to firmware.
- *
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * The function sends the ping request to firmware.
*/
void kbase_csf_firmware_ping(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
*
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
* The function sends the ping request to firmware and waits to confirm it is
* alive.
*
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
@@ -462,8 +462,12 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ return true;
+#else
return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) ==
MCU_STATUS_HALTED);
+#endif /* CONFIG_MALI_NO_MALI */
}
/**
@@ -481,24 +485,14 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev);
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
-static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
-{
- /* Trigger the boot of MCU firmware, Use the AUTO mode as
- * otherwise on fast reset, to exit protected mode, MCU will
- * not reboot by itself to enter normal mode.
- */
- kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
-}
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_disable_mcu - Send the command to disable MCU
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
-static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
-{
- kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
-}
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled
@@ -560,9 +554,9 @@ void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
* requests, sent after the reboot of MCU firmware, have
* completed or not.
*
- * Return: true if the Global configuration requests completed otherwise false.
- *
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: true if the Global configuration requests completed otherwise false.
*/
bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev);
@@ -587,17 +581,16 @@ void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev,
* request has completed or not, that was sent to update
* the core attributes.
*
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
* Return: true if the Global configuration request to update the core
* attributes has completed, otherwise false.
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);
/**
- * Request the global control block of CSF interface capabilities
- *
- * Return: Total number of CSs, summed across all groups.
+ * kbase_csf_firmware_get_glb_iface - Request the global control block of CSF
+ * interface capabilities
*
* @kbdev: Kbase device.
* @group_data: Pointer where to store all the group data
@@ -620,6 +613,8 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);
* @instr_features: Instrumentation features. Bits 7:4 hold the max size
* of events. Bits 3:0 hold the offset update rate.
* (csf >= 1,1,0)
+ *
+ * Return: Total number of CSs, summed across all groups.
*/
u32 kbase_csf_firmware_get_glb_iface(
struct kbase_device *kbdev, struct basep_cs_group_control *group_data,
@@ -628,20 +623,26 @@ u32 kbase_csf_firmware_get_glb_iface(
u32 *group_num, u32 *prfcnt_size, u32 *instr_features);
/**
- * Get CSF firmware header timeline metadata content
- *
- * Return: The firmware timeline metadata content which match @p name.
+ * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline
+ * metadata content
*
* @kbdev: Kbase device.
* @name: Name of the metadata which metadata content to be returned.
* @size: Metadata size if specified metadata found.
+ *
+ * Return: The firmware timeline metadata content which match @p name.
*/
const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev,
const char *name, size_t *size);
/**
- * kbase_csf_firmware_mcu_shared_mapping_init -
- * Allocate and map MCU shared memory.
+ * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory.
+ *
+ * @kbdev: Kbase device the memory mapping shall belong to.
+ * @num_pages: Number of memory pages to map.
+ * @cpu_map_properties: Either PROT_READ or PROT_WRITE.
+ * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR.
+ * @csf_mapping: Object where to write metadata for the memory mapping.
*
* This helper function allocates memory and maps it on both the CPU
* and the GPU address spaces. Most of the properties of the mapping
@@ -653,12 +654,6 @@ const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev,
* will be ignored by the function.
*
* Return: 0 if success, or an error code on failure.
- *
- * @kbdev: Kbase device the memory mapping shall belong to.
- * @num_pages: Number of memory pages to map.
- * @cpu_map_properties: Either PROT_READ or PROT_WRITE.
- * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR.
- * @csf_mapping: Object where to write metadata for the memory mapping.
*/
int kbase_csf_firmware_mcu_shared_mapping_init(
struct kbase_device *kbdev,
@@ -676,35 +671,6 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
void kbase_csf_firmware_mcu_shared_mapping_term(
struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping);
-#ifndef MALI_KBASE_BUILD
-/**
- * mali_kutf_process_fw_utf_entry() - Process the "Firmware UTF tests" section
- *
- * Read "Firmware UTF tests" section from the firmware image and create
- * necessary kutf app+suite+tests.
- *
- * Return: 0 if successful, negative error code on failure. In both cases
- * caller will have to invoke mali_kutf_fw_utf_entry_cleanup for the cleanup
- *
- * @kbdev: Kbase device structure
- * @fw_data: Pointer to the start of firmware binary image loaded from disk
- * @fw_size: Size (in bytes) of the firmware image
- * @entry: Pointer to the start of the section
- */
-int mali_kutf_process_fw_utf_entry(struct kbase_device *kbdev,
- const void *fw_data, size_t fw_size, const u32 *entry);
-
-/**
- * mali_kutf_fw_utf_entry_cleanup() - Remove the Fw UTF tests debugfs entries
- *
- * Destroy the kutf apps+suites+tests created on parsing "Firmware UTF tests"
- * section from the firmware image.
- *
- * @kbdev: Kbase device structure
- */
-void mali_kutf_fw_utf_entry_cleanup(struct kbase_device *kbdev);
-#endif
-
#ifdef CONFIG_MALI_DEBUG
extern bool fw_debug;
#endif
@@ -722,11 +688,11 @@ static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs)
* kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis
* monitoring operation
*
+ * @kbdev: Kbase device structure
+ *
* Program the firmware interface with its configured hysteresis count value
* and enable the firmware to act on it. The Caller is
* assumed to hold the kbdev->csf.scheduler.interrupt_lock.
- *
- * @kbdev: Kbase device structure
*/
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev);
@@ -734,10 +700,10 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev);
* kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time
* hysteresis monitoring operation
*
+ * @kbdev: Kbase device structure
+ *
* Program the firmware interface to disable the idle hysteresis timer. The
* Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock.
- *
- * @kbdev: Kbase device structure
*/
void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev);
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index f00acb1..70bf26a 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -29,10 +29,6 @@
/**
* struct firmware_config - Configuration item within the MCU firmware
*
- * The firmware may expose configuration options. Each option has a name, the
- * address where the option is controlled and the minimum and maximum values
- * that the option can take.
- *
* @node: List head linking all options to
* kbase_device:csf.firmware_config
* @kbdev: Pointer to the Kbase device
@@ -47,6 +43,10 @@
* @min: The lowest legal value of the configuration option
* @max: The maximum legal value of the configuration option
* @cur_val: The current value of the configuration option
+ *
+ * The firmware may expose configuration options. Each option has a name, the
+ * address where the option is controlled and the minimum and maximum values
+ * that the option can take.
*/
struct firmware_config {
struct list_head node;
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
index 080c154..c2d2fc5 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
@@ -32,12 +32,12 @@
* kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration
* options present in firmware image.
*
+ * @kbdev: Pointer to the Kbase device
+ *
* This function would create a sysfs directory and populate it with a
* sub-directory, that would contain a file per attribute, for every
* configuration option parsed from firmware image.
*
- * @kbdev: Pointer to the Kbase device
- *
* Return: The initialization error code.
*/
int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev);
@@ -55,16 +55,16 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev);
* kbase_csf_firmware_cfg_option_entry_parse() - Process a
* "configuration option" section.
*
- * Read a "configuration option" section adding it to the
- * kbase_device:csf.firmware_config list.
- *
- * Return: 0 if successful, negative error code on failure
- *
* @kbdev: Kbase device structure
* @fw: Firmware image containing the section
* @entry: Pointer to the section
* @size: Size (in bytes) of the section
* @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE
+ *
+ * Read a "configuration option" section adding it to the
+ * kbase_device:csf.firmware_config list.
+ *
+ * Return: 0 if successful, negative error code on failure
*/
int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
const struct firmware *fw,
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index e99c968..6f61631 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -136,13 +136,13 @@ static inline void output_page_write(u32 *const output, const u32 offset,
/**
* invent_memory_setup_entry() - Invent an "interface memory setup" section
*
+ * @kbdev: Kbase device structure
+ *
* Invent an "interface memory setup" section similar to one from a firmware
* image. If successful the interface will be added to the
* kbase_device:csf.firmware_interfaces list.
*
* Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device structure
*/
static int invent_memory_setup_entry(struct kbase_device *kbdev)
{
@@ -371,6 +371,7 @@ u32 kbase_csf_firmware_csg_output(
dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
return val;
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);
static void
csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface,
@@ -418,6 +419,7 @@ void kbase_csf_firmware_global_input(
output_page_write(iface->output, GLB_ACK, value);
}
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);
void kbase_csf_firmware_global_input_mask(
const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -431,6 +433,7 @@ void kbase_csf_firmware_global_input_mask(
/* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */
kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask));
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);
u32 kbase_csf_firmware_global_input_read(
const struct kbase_csf_global_iface *const iface, const u32 offset)
@@ -451,6 +454,7 @@ u32 kbase_csf_firmware_global_output(
dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
return val;
}
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
/**
* handle_internal_firmware_fatal - Handler for CS internal firmware fault.
@@ -1020,10 +1024,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
/* NO_MALI: No trace buffers to terminate */
-#ifndef MALI_KBASE_BUILD
- mali_kutf_fw_utf_entry_cleanup(kbdev);
-#endif
-
mutex_destroy(&kbdev->csf.reg_lock);
/* This will also free up the region allocated for the shared interface
@@ -1154,6 +1154,15 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
+{
+ /* Trigger the boot of MCU firmware, Use the AUTO mode as
+ * otherwise on fast reset, to exit protected mode, MCU will
+ * not reboot by itself to enter normal mode.
+ */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
+}
+
#ifdef KBASE_PM_RUNTIME
void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
{
@@ -1290,6 +1299,11 @@ const char *kbase_csf_firmware_get_timeline_metadata(
return NULL;
}
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
+{
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
+}
+
void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
{
/* NO_MALI: Nothing to do here */
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h
index 993db63..9aab7ab 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h
@@ -47,11 +47,11 @@ void kbase_csf_heap_context_allocator_term(
/**
* kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure
*
+ * @ctx_alloc: Pointer to the heap context allocator.
+ *
* If this function is successful then it returns the address of a
* zero-initialized heap context structure for use by the firmware.
*
- * @ctx_alloc: Pointer to the heap context allocator.
- *
* Return: GPU virtual address of the allocated heap context or 0 on failure.
*/
u64 kbase_csf_heap_context_allocator_alloc(
@@ -60,13 +60,13 @@ u64 kbase_csf_heap_context_allocator_alloc(
/**
* kbase_csf_heap_context_allocator_free - Free a heap context structure
*
- * This function returns a heap context structure to the free pool of unused
- * contexts for possible reuse by a future call to
- * @kbase_csf_heap_context_allocator_alloc.
- *
* @ctx_alloc: Pointer to the heap context allocator.
* @heap_gpu_va: The GPU virtual address of a heap context structure that
* was allocated for the firmware.
+ *
+ * This function returns a heap context structure to the free pool of unused
+ * contexts for possible reuse by a future call to
+ * @kbase_csf_heap_context_allocator_alloc.
*/
void kbase_csf_heap_context_allocator_free(
struct kbase_csf_heap_context_allocator *const ctx_alloc,
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 8729307..05a4fa0 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -190,6 +190,12 @@ static void kbase_jit_add_to_pending_alloc_list(
*
* @queue: The queue containing this JIT allocation
* @cmd: The JIT allocation command
+ *
+ * Return:
+ * * 0 - allocation OK
+ * * -EINVAL - missing info or JIT ID still in use
+ * * -EAGAIN - Retry
+ * * -ENOMEM - no memory. unable to allocate
*/
static int kbase_kcpu_jit_allocate_process(
struct kbase_kcpu_command_queue *queue,
@@ -289,8 +295,8 @@ static int kbase_kcpu_jit_allocate_process(
* Write the address of the JIT allocation to the user provided
* GPU allocation.
*/
- ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
- &mapping);
+ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+ KBASE_REG_CPU_WR, &mapping);
if (!ptr) {
ret = -ENOMEM;
goto fail;
@@ -570,9 +576,11 @@ static int kbase_csf_queue_group_suspend_prepare(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
struct kbase_suspend_copy_buffer *sus_buf = NULL;
+ const u32 csg_suspend_buf_size =
+ kctx->kbdev->csf.global_iface.groups[0].suspend_size;
u64 addr = suspend_buf->buffer;
u64 page_addr = addr & PAGE_MASK;
- u64 end_addr = addr + suspend_buf->size - 1;
+ u64 end_addr = addr + csg_suspend_buf_size - 1;
u64 last_page_addr = end_addr & PAGE_MASK;
int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1;
int pinned_pages = 0, ret = 0;
@@ -580,8 +588,7 @@ static int kbase_csf_queue_group_suspend_prepare(
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
- if (suspend_buf->size <
- kctx->kbdev->csf.global_iface.groups[0].suspend_size)
+ if (suspend_buf->size < csg_suspend_buf_size)
return -EINVAL;
ret = kbase_csf_queue_group_handle_is_valid(kctx,
@@ -593,7 +600,7 @@ static int kbase_csf_queue_group_suspend_prepare(
if (!sus_buf)
return -ENOMEM;
- sus_buf->size = suspend_buf->size;
+ sus_buf->size = csg_suspend_buf_size;
sus_buf->nr_pages = nr_pages;
sus_buf->offset = addr & ~PAGE_MASK;
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 6300569..3edb4de 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -294,6 +294,8 @@ struct kbase_kcpu_command_queue {
* queue will be created.
* @newq: Pointer to the structure which contains information about
* the new KCPU command queue to be created.
+ *
+ * Return: 0 if successful or a negative error code on failure.
*/
int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_new *newq);
@@ -307,6 +309,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
* queue is to be deleted.
* @del: Pointer to the structure which specifies the KCPU command
* queue to be deleted.
+ *
+ * Return: 0 if successful or a negative error code on failure.
*/
int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_delete *del);
@@ -320,6 +324,8 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
* @enq: Pointer to the structure which specifies the KCPU command
* as well as the KCPU command queue into which the command
* is to be enqueued.
+ *
+ * Return: 0 if successful or a negative error code on failure.
*/
int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_enqueue *enq);
@@ -337,11 +343,11 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
/**
* kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context
* for a GPU address space
+ * @kctx: Pointer to the kbase context being terminated.
*
* This function deletes any kernel CPU queues that weren't deleted before
* context termination.
*
- * @kctx: Pointer to the kbase context being terminated.
*/
void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx);
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index 7b63132..d5d8318 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -576,6 +576,7 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev)
return 0;
}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent);
bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
{
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index f22a5d7..cd87027 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -164,12 +164,14 @@ static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
* This function will force the Scheduler to exit the sleep state by doing the
* wake up of MCU and suspension of on-slot groups. It is called at the time of
* system suspend.
+ *
+ * Return: 0 on success.
*/
-static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
+static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
- int ret;
+ int ret = 0;
lockdep_assert_held(&scheduler->lock);
WARN_ON(scheduler->state != SCHED_SLEEPING);
@@ -177,12 +179,16 @@ static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
kbase_pm_lock(kbdev);
ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
- if (ret)
- dev_warn(kbdev->dev, "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
- kbase_backend_get_cycle_cnt(kbdev));
kbase_pm_unlock(kbdev);
+ if (ret) {
+ dev_warn(kbdev->dev,
+ "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
+ kbase_backend_get_cycle_cnt(kbdev));
+ goto out;
+ }
- suspend_active_groups_on_powerdown(kbdev, true);
+ if (suspend_active_groups_on_powerdown(kbdev, true))
+ goto out;
kbase_pm_lock(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -191,12 +197,26 @@ static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
ret = kbase_pm_wait_for_desired_state(kbdev);
- if (ret)
- dev_warn(kbdev->dev, "[%llu] Wait for pm state change failed on forced scheduler suspend",
- kbase_backend_get_cycle_cnt(kbdev));
kbase_pm_unlock(kbdev);
+ if (ret) {
+ dev_warn(kbdev->dev,
+ "[%llu] Wait for pm state change failed on forced scheduler suspend",
+ kbase_backend_get_cycle_cnt(kbdev));
+ goto out;
+ }
scheduler->state = SCHED_SUSPENDED;
+
+ return 0;
+
+out:
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_csf_scheduler_invoke_tick(kbdev);
+
+ return ret;
}
#endif
@@ -445,6 +465,13 @@ static bool queue_group_idle_locked(struct kbase_queue_group *group)
group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
}
+static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
+{
+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+ return (group->run_state == KBASE_CSF_GROUP_IDLE);
+}
+
static bool queue_group_scheduled(struct kbase_queue_group *group)
{
return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
@@ -582,6 +609,8 @@ static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
* This function is usually called when Scheduler needs to be activated.
* The PM reference count is acquired for the Scheduler and the power on
* of GPU is initiated.
+ *
+ * Return: 0 if successful or a negative error code on failure.
*/
static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
enum kbase_pm_suspend_handler suspend_handler)
@@ -1243,8 +1272,16 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
static void update_hw_active(struct kbase_queue *queue, bool active)
{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ if (queue && queue->enabled) {
+ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+
+ output_addr[CS_ACTIVE / sizeof(u32)] = active;
+ }
+#else
CSTD_UNUSED(queue);
CSTD_UNUSED(active);
+#endif
}
static void program_cs_extract_init(struct kbase_queue *queue)
@@ -2099,6 +2136,10 @@ static void save_csg_slot(struct kbase_queue_group *group)
bool sync_wait = false;
bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
CSG_STATUS_STATE_IDLE_MASK;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ for (i = 0; i < max_streams; i++)
+ update_hw_active(group->bound_queues[i], false);
+#endif /* CONFIG_MALI_NO_MALI */
for (i = 0; idle && i < max_streams; i++) {
struct kbase_queue *const queue =
group->bound_queues[i];
@@ -2385,6 +2426,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
protm_suspend_buf >> 32);
}
+
/* Enable all interrupts for now */
kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
@@ -2414,7 +2456,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
/* Trace the programming of the CSG on the slot */
KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
- group->handle, slot);
+ group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
group->handle, kctx->tgid, kctx->id, slot, prio);
@@ -3166,15 +3208,15 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
* flagged after the completion of a CSG status
* update command
*
+ * @kbdev: Pointer to the GPU device.
+ * @slot: The given slot for checking an occupying resident group's idle
+ * state.
+ *
* This function is called at the start of scheduling tick to check the
* idle status of a queue group resident on a CSG slot.
* The caller must make sure the corresponding status update command has
* been called and completed before checking this status.
*
- * @kbdev: Pointer to the GPU device.
- * @slot: The given slot for checking an occupying resident group's idle
- * state.
- *
* Return: true if the group resident on slot is idle, otherwise false.
*/
static bool group_on_slot_is_idle(struct kbase_device *kbdev,
@@ -3194,16 +3236,16 @@ static bool group_on_slot_is_idle(struct kbase_device *kbdev,
* slots_update_state_changed() - Check the handshake state of a subset of
* command group slots.
*
- * Checks the state of a subset of slots selected through the slots_mask
- * bit_map. Records which slots' handshake completed and send it back in the
- * slots_done bit_map.
- *
* @kbdev: The GPU device.
* @field_mask: The field mask for checking the state in the csg_req/ack.
* @slots_mask: A bit_map specifying the slots to check.
* @slots_done: A cleared bit_map for returning the slots that
* have finished update.
*
+ * Checks the state of a subset of slots selected through the slots_mask
+ * bit_map. Records which slots' handshake completed and send it back in the
+ * slots_done bit_map.
+ *
* Return: true if the slots_done is set for at least one slot.
* Otherwise false.
*/
@@ -3237,10 +3279,6 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
* wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
* the specified groups.
*
- * This function waits for the acknowledgement of the request that have
- * already been placed for the CSG slots by the caller. Currently used for
- * the CSG priority update and status update requests.
- *
* @kbdev: Pointer to the GPU device.
* @field_mask: The field mask for checking the state in the csg_req/ack.
* @slot_mask: Bitmap reflecting the slots, the function will modify
@@ -3248,6 +3286,10 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
* bits.
* @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
*
+ * This function waits for the acknowledgment of the request that have
+ * already been placed for the CSG slots by the caller. Currently used for
+ * the CSG priority update and status update requests.
+ *
* Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
* timed out condition with unacknowledged slots, their bits remain
* set in the slot_mask.
@@ -3349,14 +3391,14 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
* scheduler_slot_protm_ack - Acknowledging the protected region requests
* from the resident group on a given slot.
*
- * The function assumes that the given slot is in stable running state and
- * has already been judged by the caller on that any pending protected region
- * requests of the resident group should be acknowledged.
- *
* @kbdev: Pointer to the GPU device.
* @group: Pointer to the resident group on the given slot.
* @slot: The slot that the given group is actively operating on.
*
+ * The function assumes that the given slot is in stable running state and
+ * has already been judged by the caller on that any pending protected region
+ * requests of the resident group should be acknowledged.
+ *
* Return: true if the group has pending protm request(s) and is acknowledged.
* The caller should arrange to enter the protected mode for servicing
* it. Otherwise return false, indicating the group has no pending protm
@@ -3426,15 +3468,15 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
* scheduler_group_check_protm_enter - Request the given group to be evaluated
* for triggering the protected mode.
*
+ * @kbdev: Pointer to the GPU device.
+ * @input_grp: Pointer to the GPU queue group.
+ *
* The function assumes the given group is either an active running group or
* the scheduler internally maintained field scheduler->top_grp.
*
* If the GPU is not already running in protected mode and the input group
* has protected region requests from its bound queues, the requests are
* acknowledged and the GPU is instructed to enter the protected mode.
- *
- * @kbdev: Pointer to the GPU device.
- * @input_grp: Pointer to the GPU queue group.
*/
static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
struct kbase_queue_group *const input_grp)
@@ -3538,7 +3580,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
}
}
- /* Initialize the remaining avialable csg slots for the tick/tock */
+ /* Initialize the remaining available csg slots for the tick/tock */
scheduler->remaining_tick_slots = available_csg_slots;
/* If there are spare slots, apply heads in the list */
@@ -3615,8 +3657,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
if (queue_group_idle_locked(group)) {
- list_add_tail(&group->link_to_schedule,
- &scheduler->idle_groups_to_schedule);
+ if (on_slot_group_idle_locked(group))
+ list_add_tail(&group->link_to_schedule,
+ &scheduler->idle_groups_to_schedule);
continue;
}
@@ -3640,6 +3683,8 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
* fairness of scheduling within a single
* kbase_context.
*
+ * @kbdev: Pointer to the GPU device.
+ *
* Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
* the highest slot priority) is guaranteed to get the resources that it
* needs we only rotate the kbase_context corresponding to it -
@@ -3678,8 +3723,6 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
* the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
* be for a group in the next lowest priority level or in absence of those the
* next kbase_context's queue groups.
- *
- * @kbdev: Pointer to the GPU device.
*/
static void scheduler_rotate_groups(struct kbase_device *kbdev)
{
@@ -3750,17 +3793,17 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
* slots for which the IDLE notification was received
* previously.
*
- * This function sends a CSG status update request for all the CSG slots
- * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
- * request to complete.
- * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
- * this function.
- *
* @kbdev: Pointer to the GPU device.
* @csg_bitmap: Bitmap of the CSG slots for which
* the status update request completed successfully.
* @failed_csg_bitmap: Bitmap of the CSG slots for which
* the status update request timedout.
+ *
+ * This function sends a CSG status update request for all the CSG slots
+ * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
+ * request to complete.
+ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
+ * this function.
*/
static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
@@ -3832,6 +3875,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
* resident on CSG slots for which the
* IDLE notification was received previously.
*
+ * @kbdev: Pointer to the GPU device.
+ *
* This function is called at the start of scheduling tick/tock to reconfirm
* the idle status of queue groups resident on CSG slots for
* which idle notification was received previously, i.e. all the CSG slots
@@ -3845,8 +3890,6 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
* updated accordingly.
* The bits corresponding to slots for which the status update request timedout
* remain set in scheduler->csg_slots_idle_mask.
- *
- * @kbdev: Pointer to the GPU device.
*/
static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
{
@@ -3901,7 +3944,7 @@ static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
link_to_schedule) {
- WARN_ON(!queue_group_idle_locked(group));
+ WARN_ON(!on_slot_group_idle_locked(group));
if (!scheduler->ngrp_to_schedule) {
/* keep the top csg's origin */
@@ -3955,6 +3998,18 @@ static struct kbase_queue_group *get_tock_top_group(
return NULL;
}
+/**
+ * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
+ * suspend or GPU IDLE.
+ *
+ * @kbdev: Pointer to the device
+ * @system_suspend: Flag to indicate it's for system suspend.
+ *
+ * This function will suspend all active CSG groups upon either
+ * system suspend, runtime suspend or GPU IDLE.
+ *
+ * Return: 0 on success, -1 otherwise.
+ */
static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
bool system_suspend)
{
@@ -3964,8 +4019,8 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
int ret = suspend_active_queue_groups(kbdev, slot_mask);
if (ret) {
- /* The suspend of CSGs failed, trigger the GPU reset and wait
- * for it to complete to be in a deterministic state.
+ /* The suspend of CSGs failed,
+ * trigger the GPU reset to be in a deterministic state.
*/
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
kbase_backend_get_cycle_cnt(kbdev),
@@ -3975,13 +4030,6 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
- if (system_suspend) {
- mutex_unlock(&scheduler->lock);
- kbase_reset_gpu_allow(kbdev);
- kbase_reset_gpu_wait(kbdev);
- kbase_reset_gpu_prevent_and_wait(kbdev);
- mutex_lock(&scheduler->lock);
- }
return -1;
}
@@ -4059,6 +4107,8 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
* This function is called on GPU idle notification to trigger the power down of
* GPU. Scheduler's state is changed to suspended and all the active queue
* groups are suspended before halting the MCU firmware.
+ *
+ * Return: true if scheduler will be suspended or false if suspend is aborted.
*/
static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
{
@@ -4104,6 +4154,8 @@ static void gpu_idle_worker(struct work_struct *work)
disable_gpu_idle_fw_timer(kbdev);
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
if (scheduler_is_idle_suspendable) {
+ KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev) &&
scheduler->total_runnable_grps)
@@ -4174,8 +4226,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
/* Adds those idle but runnable groups to the scanout list */
scheduler_scan_idle_groups(kbdev);
- /* After adding the idle CSGs, the two counts should be the same */
- WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule);
+ WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
scheduler->num_active_address_spaces |
@@ -4705,8 +4756,11 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
* due to the extra context ref-count, which prevents the
* L2 powering down cache clean operation in the non racing
* case.
+ * LSC is being flushed together to cover buslogging usecase,
+ * where GPU reset is done regularly to avoid the log buffer
+ * overflow.
*/
- kbase_gpu_start_cache_clean(kbdev);
+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
kbdev->reset_timeout_ms);
if (ret2) {
@@ -5055,13 +5109,18 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
unsigned int target_page_nr = 0, i = 0;
u64 offset = sus_buf->offset;
size_t to_copy = sus_buf->size;
+ const u32 csg_suspend_buf_nr_pages =
+ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
if (scheduler->state != SCHED_SUSPENDED) {
/* Similar to the case of HW counters, need to flush
- * the GPU cache before reading from the suspend buffer
+ * the GPU L2 cache before reading from the suspend buffer
* pages as they are mapped and cached on GPU side.
+ * Flushing LSC is not done here, since only the flush of
+ * CSG suspend buffer contents is needed from the L2 cache.
*/
- kbase_gpu_start_cache_clean(kbdev);
+ kbase_gpu_start_cache_clean(
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
kbase_gpu_wait_cache_clean(kbdev);
} else {
/* Make sure power down transitions have completed,
@@ -5073,7 +5132,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
kbase_pm_wait_for_desired_state(kbdev);
}
- for (i = 0; i < PFN_UP(sus_buf->size) &&
+ for (i = 0; i < csg_suspend_buf_nr_pages &&
target_page_nr < sus_buf->nr_pages; i++) {
struct page *pg =
as_page(group->normal_suspend_buf.phy[i]);
@@ -5252,7 +5311,7 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
* This function will evaluate the sync condition, if any, of all the queues
* bound to the given group.
*
- * Return true if the sync condition of at least one queue has been satisfied.
+ * Return: true if the sync condition of at least one queue has been satisfied.
*/
static bool check_sync_update_for_on_slot_group(
struct kbase_queue_group *group)
@@ -5341,7 +5400,7 @@ static bool check_sync_update_for_on_slot_group(
* protected mode that has a higher priority than the active protected mode
* group.
*
- * Return true if the sync condition of at least one queue in a group has been
+ * Return: true if the sync condition of at least one queue in a group has been
* satisfied.
*/
static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
@@ -5604,8 +5663,14 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
flush_work(&kbdev->csf.scheduler.gpu_idle_work);
mutex_lock(&kbdev->csf.scheduler.lock);
- if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED))
+ if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
+ /* The power policy could prevent the Scheduler from
+ * getting suspended when GPU becomes idle.
+ */
+ WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
scheduler_suspend(kbdev);
+ }
+
mutex_unlock(&kbdev->csf.scheduler.lock);
cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
cancel_tick_timer(kbdev);
@@ -5692,12 +5757,16 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
* available, so need to drop the lock before cancellation.
*/
cancel_work_sync(&scheduler->tick_work);
- } else if (!currently_enabled && enable) {
+ return;
+ }
+
+ if (!currently_enabled && enable) {
scheduler->timer_enabled = true;
scheduler_enable_tick_timer_nolock(kbdev);
- mutex_unlock(&scheduler->lock);
}
+
+ mutex_unlock(&scheduler->lock);
}
void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
@@ -5718,18 +5787,20 @@ out:
mutex_unlock(&scheduler->lock);
}
-void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
{
+ int result = 0;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
/* Cancel any potential queued delayed work(s) */
cancel_work_sync(&scheduler->tick_work);
cancel_tock_work(scheduler);
- if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+ result = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (result) {
dev_warn(kbdev->dev,
"Stop PM suspending for failing to prevent gpu reset.\n");
- return;
+ return result;
}
mutex_lock(&scheduler->lock);
@@ -5742,18 +5813,31 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
*/
if (scheduler->state == SCHED_SLEEPING) {
dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
- force_scheduler_to_exit_sleep(kbdev);
+ result = force_scheduler_to_exit_sleep(kbdev);
+ if (result) {
+ dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
+ goto exit;
+ }
}
#endif
if (scheduler->state != SCHED_SUSPENDED) {
- suspend_active_groups_on_powerdown(kbdev, true);
- dev_info(kbdev->dev, "Scheduler PM suspend");
- scheduler_suspend(kbdev);
- cancel_tick_timer(kbdev);
+ result = suspend_active_groups_on_powerdown(kbdev, true);
+ if (result) {
+ dev_warn(kbdev->dev, "failed to suspend active groups");
+ goto exit;
+ } else {
+ dev_info(kbdev->dev, "Scheduler PM suspend");
+ scheduler_suspend(kbdev);
+ cancel_tick_timer(kbdev);
+ }
}
+
+exit:
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
+
+ return result;
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 73ebb66..068a45b 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -23,6 +23,7 @@
#define _KBASE_CSF_SCHEDULER_H_
#include "mali_kbase_csf.h"
+#include "mali_kbase_csf_event.h"
/**
* kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue
@@ -250,14 +251,14 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev);
* kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue
* group and copy suspend buffer.
*
- * This function is called to suspend a queue group and copy the suspend_buffer
- * contents to the input buffer provided.
- *
* @group: Pointer to the queue group to be suspended.
* @sus_buf: Pointer to the structure which contains details of the
* user buffer and its kernel pinned pages to which we need to copy
* the group suspend buffer.
*
+ * This function is called to suspend a queue group and copy the suspend_buffer
+ * contents to the input buffer provided.
+ *
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -425,8 +426,10 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev);
*
* This function will make the scheduler suspend all the running queue groups
* and drop its power managemenet reference.
+ *
+ * Return: 0 on success.
*/
-void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev);
+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev);
/**
* kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 06a7824..62fb241 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -28,13 +28,13 @@
/**
* encode_chunk_ptr - Encode the address and size of a chunk as an integer.
*
+ * @chunk_size: Size of a tiler heap chunk, in bytes.
+ * @chunk_addr: GPU virtual address of the same tiler heap chunk.
+ *
* The size and address of the next chunk in a list are packed into a single
* 64-bit value for storage in a chunk's header. This function returns that
* value.
*
- * @chunk_size: Size of a tiler heap chunk, in bytes.
- * @chunk_addr: GPU virtual address of the same tiler heap chunk.
- *
* Return: Next chunk pointer suitable for writing into a chunk header.
*/
static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr)
@@ -76,14 +76,14 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
/**
* link_chunk - Link a chunk into a tiler heap
*
+ * @heap: Pointer to the tiler heap.
+ * @chunk: Pointer to the heap chunk to be linked.
+ *
* Unless the @chunk is the first in the kernel's list of chunks belonging to
* a given tiler heap, this function stores the size and address of the @chunk
* in the header of the preceding chunk. This requires the GPU memory region
* containing the header to be be mapped temporarily, which can fail.
*
- * @heap: Pointer to the tiler heap.
- * @chunk: Pointer to the heap chunk to be linked.
- *
* Return: 0 if successful or a negative error code on failure.
*/
static int link_chunk(struct kbase_csf_tiler_heap *const heap,
@@ -118,15 +118,15 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap,
/**
* init_chunk - Initialize and link a tiler heap chunk
*
- * Zero-initialize a new chunk's header (including its pointer to the next
- * chunk, which doesn't exist yet) and then update the previous chunk's
- * header to link the new chunk into the chunk list.
- *
* @heap: Pointer to the tiler heap.
* @chunk: Pointer to the heap chunk to be initialized and linked.
* @link_with_prev: Flag to indicate if the new chunk needs to be linked with
* the previously allocated chunk.
*
+ * Zero-initialize a new chunk's header (including its pointer to the next
+ * chunk, which doesn't exist yet) and then update the previous chunk's
+ * header to link the new chunk into the chunk list.
+ *
* Return: 0 if successful or a negative error code on failure.
*/
static int init_chunk(struct kbase_csf_tiler_heap *const heap,
@@ -163,14 +163,14 @@ static int init_chunk(struct kbase_csf_tiler_heap *const heap,
/**
* create_chunk - Create a tiler heap chunk
*
- * This function allocates a chunk of memory for a tiler heap and adds it to
- * the end of the list of chunks associated with that heap. The size of the
- * chunk is not a parameter because it is configured per-heap not per-chunk.
- *
* @heap: Pointer to the tiler heap for which to allocate memory.
* @link_with_prev: Flag to indicate if the chunk to be allocated needs to be
* linked with the previously allocated chunk.
*
+ * This function allocates a chunk of memory for a tiler heap and adds it to
+ * the end of the list of chunks associated with that heap. The size of the
+ * chunk is not a parameter because it is configured per-heap not per-chunk.
+ *
* Return: 0 if successful or a negative error code on failure.
*/
static int create_chunk(struct kbase_csf_tiler_heap *const heap,
@@ -237,15 +237,15 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
/**
* delete_chunk - Delete a tiler heap chunk
*
+ * @heap: Pointer to the tiler heap for which @chunk was allocated.
+ * @chunk: Pointer to a chunk to be deleted.
+ *
* This function frees a tiler heap chunk previously allocated by @create_chunk
* and removes it from the list of chunks associated with the heap.
*
* WARNING: The deleted chunk is not unlinked from the list of chunks used by
* the GPU, therefore it is only safe to use this function when
* deleting a heap.
- *
- * @heap: Pointer to the tiler heap for which @chunk was allocated.
- * @chunk: Pointer to a chunk to be deleted.
*/
static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
struct kbase_csf_tiler_heap_chunk *const chunk)
@@ -264,10 +264,10 @@ static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
/**
* delete_all_chunks - Delete all chunks belonging to a tiler heap
*
+ * @heap: Pointer to a tiler heap.
+ *
* This function empties the list of chunks associated with a tiler heap by
* freeing all chunks previously allocated by @create_chunk.
- *
- * @heap: Pointer to a tiler heap.
*/
static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
{
@@ -284,12 +284,12 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
/**
* create_initial_chunks - Create the initial list of chunks for a tiler heap
*
- * This function allocates a given number of chunks for a tiler heap and
- * adds them to the list of chunks associated with that heap.
- *
* @heap: Pointer to the tiler heap for which to allocate memory.
* @nchunks: Number of chunks to create.
*
+ * This function allocates a given number of chunks for a tiler heap and
+ * adds them to the list of chunks associated with that heap.
+ *
* Return: 0 if successful or a negative error code on failure.
*/
static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
@@ -310,12 +310,12 @@ static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
/**
* delete_heap - Delete a tiler heap
*
+ * @heap: Pointer to a tiler heap to be deleted.
+ *
* This function frees any chunks allocated for a tiler heap previously
* initialized by @kbase_csf_tiler_heap_init and removes it from the list of
* heaps associated with the kbase context. The heap context structure used by
* the firmware is also freed.
- *
- * @heap: Pointer to a tiler heap to be deleted.
*/
static void delete_heap(struct kbase_csf_tiler_heap *heap)
{
@@ -346,15 +346,15 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
/**
* find_tiler_heap - Find a tiler heap from the address of its heap context
*
+ * @kctx: Pointer to the kbase context to search for a tiler heap.
+ * @heap_gpu_va: GPU virtual address of a heap context structure.
+ *
* Each tiler heap managed by the kernel has an associated heap context
* structure used by the firmware. This function finds a tiler heap object from
* the GPU virtual address of its associated heap context. The heap context
* should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
* same @kctx.
*
- * @kctx: Pointer to the kbase context to search for a tiler heap.
- * @heap_gpu_va: GPU virtual address of a heap context structure.
- *
* Return: pointer to the tiler heap object, or NULL if not found.
*/
static struct kbase_csf_tiler_heap *find_tiler_heap(
@@ -495,8 +495,11 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
heap->gpu_va);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
+ kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count;
+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
}
-
return err;
}
@@ -505,27 +508,36 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
{
int err = 0;
struct kbase_csf_tiler_heap *heap = NULL;
+ u32 chunk_count = 0;
+ u64 heap_size = 0;
mutex_lock(&kctx->csf.tiler_heaps.lock);
heap = find_tiler_heap(kctx, heap_gpu_va);
- if (likely(heap))
+ if (likely(heap)) {
+ chunk_count = heap->chunk_count;
+ heap_size = heap->chunk_size * chunk_count;
delete_heap(heap);
- else
+ } else
err = -EINVAL;
mutex_unlock(&kctx->csf.tiler_heaps.lock);
-
+ if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
+ kctx->running_total_tiler_heap_memory -= heap_size;
+ else
+ dev_warn(kctx->kbdev->dev,
+ "Running total tiler heap memory lower than expected!");
+ if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
+ kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
+ else
+ dev_warn(kctx->kbdev->dev,
+ "Running total tiler chunk count lower than expected!");
return err;
}
/**
* alloc_new_chunk - Allocate a new chunk for the tiler heap.
*
- * This function will allocate a new chunk for the chunked tiler heap depending
- * on the settings provided by userspace when the heap was created and the
- * heap's statistics (like number of render passes in-flight).
- *
* @heap: Pointer to the tiler heap.
* @nr_in_flight: Number of render passes that are in-flight, must not be zero.
* @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
@@ -534,6 +546,10 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
* @new_chunk_ptr: Where to store the GPU virtual address & size of the new
* chunk allocated for the heap.
*
+ * This function will allocate a new chunk for the chunked tiler heap depending
+ * on the settings provided by userspace when the heap was created and the
+ * heap's statistics (like number of render passes in-flight).
+ *
* Return: 0 if a new chunk was allocated otherwise an appropriate negative
* error code.
*/
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
index 04c27f7..4031ad4 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
@@ -38,10 +38,10 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx);
* kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a
* GPU address space
*
+ * @kctx: Pointer to the kbase context being terminated.
+ *
* This function deletes any chunked tiler heaps that weren't deleted before
* context termination.
- *
- * @kctx: Pointer to the kbase context being terminated.
*/
void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
@@ -74,15 +74,15 @@ int kbase_csf_tiler_heap_init(struct kbase_context *kctx,
/**
* kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap.
*
+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
+ * @gpu_heap_va: The GPU virtual address of the context that was set up for the
+ * tiler heap.
+ *
* This function will terminate a chunked tiler heap and cause all the chunks
* (initial and those added during out-of-memory processing) to be freed.
* It is the caller's responsibility to ensure no further operations on this
* heap will happen before calling this function.
*
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @gpu_heap_va: The GPU virtual address of the context that was set up for the
- * tiler heap.
- *
* Return: 0 if successful or a negative error code on failure.
*/
int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
@@ -90,12 +90,6 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
/**
* kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap.
*
- * This function will allocate a new chunk for the chunked tiler heap depending
- * on the settings provided by userspace when the heap was created and the
- * heap's statistics (like number of render passes in-flight).
- * It would return an appropriate error code if a new chunk couldn't be
- * allocated.
- *
* @kctx: Pointer to the kbase context in which the tiler heap was initialized.
* @gpu_heap_va: GPU virtual address of the heap context.
* @nr_in_flight: Number of render passes that are in-flight, must not be zero.
@@ -105,6 +99,12 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
* @new_chunk_ptr: Where to store the GPU virtual address & size of the new
* chunk allocated for the heap.
*
+ * This function will allocate a new chunk for the chunked tiler heap depending
+ * on the settings provided by userspace when the heap was created and the
+ * heap's statistics (like number of render passes in-flight).
+ * It would return an appropriate error code if a new chunk couldn't be
+ * allocated.
+ *
* Return: 0 if a new chunk was allocated otherwise an appropriate negative
* error code (like -EBUSY when a free chunk is expected to be
* available upon completion of a render pass and -EINVAL when
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c
index f46beed..96e0f28 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c
@@ -32,7 +32,7 @@
* @file: The seq_file for printing to
* @data: The debugfs dentry private data, a pointer to kbase_context
*
- * Return: Negative error code or 0 on success.
+ * Return: 0 in any case.
*/
static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data)
{
@@ -65,11 +65,41 @@ static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data)
return 0;
}
+/**
+ * kbasep_csf_tiler_heap_total_debugfs_show() - Print the total memory allocated
+ * for all tiler heaps in a context.
+ *
+ * @file: The seq_file for printing to
+ * @data: The debugfs dentry private data, a pointer to kbase_context
+ *
+ * Return: 0 in any case.
+ */
+static int kbasep_csf_tiler_heap_total_debugfs_show(struct seq_file *file, void *data)
+{
+ struct kbase_context *kctx = file->private;
+
+ seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n",
+ MALI_CSF_TILER_HEAP_DEBUGFS_VERSION);
+ seq_printf(file, "Total number of chunks of all heaps in the context: %lu\n",
+ (unsigned long)kctx->running_total_tiler_heap_nr_chunks);
+ seq_printf(file, "Total allocated memory of all heaps in the context: %llu\n",
+ (unsigned long long)kctx->running_total_tiler_heap_memory);
+ seq_printf(file, "Peak allocated tiler heap memory in the context: %llu\n",
+ (unsigned long long)kctx->peak_total_tiler_heap_memory);
+
+ return 0;
+}
+
static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file)
{
return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private);
}
+static int kbasep_csf_tiler_heap_total_debugfs_open(struct inode *in, struct file *file)
+{
+ return single_open(file, kbasep_csf_tiler_heap_total_debugfs_show, in->i_private);
+}
+
static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = {
.open = kbasep_csf_tiler_heap_debugfs_open,
.read = seq_read,
@@ -77,6 +107,13 @@ static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = {
.release = single_release,
};
+static const struct file_operations kbasep_csf_tiler_heap_total_debugfs_fops = {
+ .open = kbasep_csf_tiler_heap_total_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx)
{
struct dentry *file;
@@ -93,6 +130,21 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx)
}
}
+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx)
+{
+ struct dentry *file;
+
+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+ return;
+
+ file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry,
+ kctx, &kbasep_csf_tiler_heap_total_debugfs_fops);
+
+ if (IS_ERR_OR_NULL(file)) {
+ dev_warn(kctx->kbdev->dev,
+ "Unable to create total tiler heap allocated memory debugfs entry");
+ }
+}
#else
/*
@@ -102,5 +154,9 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx)
{
}
+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx)
+{
+}
+
#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h
index 92ae91a..27a9074 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h
@@ -34,4 +34,11 @@ struct kbase_context;
*/
void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx);
+/**
+ * kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
+ */
+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx);
+
#endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 563faec..b01ac29 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -171,8 +171,8 @@ static int kbase_ts_converter_init(
*
* Return: The CPU timestamp.
*/
-void kbase_ts_converter_convert(const struct kbase_ts_converter *self,
- u64 *gpu_ts)
+static void __maybe_unused
+kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts)
{
u64 old_gpu_ts = *gpu_ts;
*gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) +
@@ -477,7 +477,14 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self,
return 0;
if (tl_reader_init_late(self, kbdev)) {
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ dev_warn(
+ kbdev->dev,
+ "CSFFW timeline is not available for MALI_NO_MALI builds!");
+ return 0;
+#else
return -EINVAL;
+#endif
}
tl_reader_reset(self);
@@ -521,14 +528,5 @@ void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self)
void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self)
{
- u64 gpu_cycle = 0;
- struct kbase_device *kbdev = self->kbdev;
-
- if (!kbdev)
- return;
-
kbase_csf_tl_reader_flush_buffer(self);
-
- get_cpu_gpu_time(kbdev, NULL, NULL, &gpu_cycle);
- KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET(kbdev, gpu_cycle);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
index 891a8f3..4523ba2 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
@@ -40,8 +40,7 @@ struct kbase_tlstream;
struct kbase_device;
/**
- * struct kbase_ts_converter -
- * System timestamp to CPU timestamp converter state.
+ * struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
*
* @multiplier: Numerator of the converter's fraction.
* @divisor: Denominator of the converter's fraction.
@@ -145,8 +144,7 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);
/**
- * kbase_csf_tl_reader_start() -
- * Start asynchronous copying of CSFFW timeline stream.
+ * kbase_csf_tl_reader_start() - Start asynchronous copying of CSFFW timeline stream.
*
* @self: CSFFW TL Reader instance.
* @kbdev: Kbase device.
@@ -157,8 +155,7 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self,
struct kbase_device *kbdev);
/**
- * kbase_csf_tl_reader_stop() -
- * Stop asynchronous copying of CSFFW timeline stream.
+ * kbase_csf_tl_reader_stop() - Stop asynchronous copying of CSFFW timeline stream.
*
* @self: CSFFW TL Reader instance.
*/
@@ -166,8 +163,7 @@ void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self);
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
- * kbase_csf_tl_reader_debugfs_init() -
- * Initialize debugfs for CSFFW Timelime Stream Reader.
+ * kbase_csf_tl_reader_debugfs_init() - Initialize debugfs for CSFFW Timelime Stream Reader.
*
* @kbdev: Kbase device.
*/
@@ -175,8 +171,7 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev);
#endif
/**
- * kbase_csf_tl_reader_reset() -
- * Reset CSFFW timeline reader, it should be called before reset CSFFW.
+ * kbase_csf_tl_reader_reset() - Reset CSFFW timeline reader, it should be called before reset CSFFW.
*
* @self: CSFFW TL Reader instance.
*/
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index a6343c8..0c72f00 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -38,12 +38,6 @@
/**
* struct firmware_trace_buffer - Trace Buffer within the MCU firmware
*
- * The firmware relays information to the host by writing on memory buffers
- * which are allocated and partially configured by the host. These buffers
- * are called Trace Buffers: each of them has a specific purpose and is
- * identified by a name and a set of memory addresses where the host can
- * set pointers to host-allocated structures.
- *
* @kbdev: Pointer to the Kbase device.
* @node: List head linking all trace buffers to
* kbase_device:csf.firmware_trace_buffers
@@ -73,6 +67,12 @@
* @num_pages: Size of the data buffer, in pages.
* @trace_enable_init_mask: Initial value for the trace enable bit mask.
* @name: NULL terminated string which contains the name of the trace buffer.
+ *
+ * The firmware relays information to the host by writing on memory buffers
+ * which are allocated and partially configured by the host. These buffers
+ * are called Trace Buffers: each of them has a specific purpose and is
+ * identified by a name and a set of memory addresses where the host can
+ * set pointers to host-allocated structures.
*/
struct firmware_trace_buffer {
struct kbase_device *kbdev;
@@ -100,14 +100,14 @@ struct firmware_trace_buffer {
/**
* struct firmware_trace_buffer_data - Configuration data for trace buffers
*
- * Describe how to set up a trace buffer interface.
- * Trace buffers are identified by name and they require a data buffer and
- * an initial mask of values for the trace enable bits.
- *
* @name: Name identifier of the trace buffer
* @trace_enable_init_mask: Initial value to assign to the trace enable bits
* @size: Size of the data buffer to allocate for the trace buffer, in pages.
* The size of a data buffer must always be a power of 2.
+ *
+ * Describe how to set up a trace buffer interface.
+ * Trace buffers are identified by name and they require a data buffer and
+ * an initial mask of values for the trace enable bits.
*/
struct firmware_trace_buffer_data {
char name[64];
@@ -121,14 +121,13 @@ struct firmware_trace_buffer_data {
* This table contains the configuration data for the trace buffers that are
* expected to be parsed from the firmware.
*/
-static const struct firmware_trace_buffer_data
-trace_buffer_data[] = {
-#ifndef MALI_KBASE_BUILD
- { "fwutf", {0}, 1 },
+static const struct firmware_trace_buffer_data trace_buffer_data[] = {
+#if MALI_UNIT_TEST
+ { "fwutf", { 0 }, 1 },
#endif
- { FW_TRACE_BUF_NAME, {0}, 4 },
- { "benchmark", {0}, 2 },
- { "timeline", {0}, KBASE_CSF_TL_BUFFER_NR_PAGES },
+ { FW_TRACE_BUF_NAME, { 0 }, 4 },
+ { "benchmark", { 0 }, 2 },
+ { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
};
int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
index b9f481d..823ace7 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
@@ -34,6 +34,8 @@ struct kbase_device;
/**
* kbase_csf_firmware_trace_buffers_init - Initialize trace buffers
*
+ * @kbdev: Device pointer
+ *
* Allocate resources for trace buffers. In particular:
* - One memory page of GPU-readable, CPU-writable memory is used for
* the Extract variables of all trace buffers.
@@ -52,8 +54,6 @@ struct kbase_device;
* populated with data from the firmware image parsing.
*
* Return: 0 if success, or an error code on failure.
- *
- * @kbdev: Device pointer
*/
int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev);
@@ -67,6 +67,11 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section
*
+ * @kbdev: Kbase device structure
+ * @entry: Pointer to the section
+ * @size: Size (in bytes) of the section
+ * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE
+ *
* Read a "trace buffer" section adding metadata for the related trace buffer
* to the kbase_device:csf.firmware_trace_buffers list.
*
@@ -74,11 +79,6 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev);
* will not be initialized.
*
* Return: 0 if successful, negative error code on failure.
- *
- * @kbdev: Kbase device structure
- * @entry: Pointer to the section
- * @size: Size (in bytes) of the section
- * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE
*/
int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
const u32 *entry,
@@ -86,8 +86,9 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
bool updatable);
/**
- * kbase_csf_firmware_reload_trace_buffers_data -
- * Reload trace buffers data for firmware reboot
+ * kbase_csf_firmware_reload_trace_buffers_data - Reload trace buffers data for firmware reboot
+ *
+ * @kbdev: Device pointer
*
* Helper function used when rebooting the firmware to reload the initial setup
* for all the trace buffers which have been previously parsed and initialized.
@@ -99,44 +100,40 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
*
* In other words, the re-initialization done by this function will be
* equivalent but not necessarily identical to the original initialization.
- *
- * @kbdev: Device pointer
*/
void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_get_trace_buffer - Get a trace buffer
*
- * Return: handle to a trace buffer, given the name, or NULL if a trace buffer
- * with that name couldn't be found.
- *
* @kbdev: Device pointer
* @name: Name of the trace buffer to find
+ *
+ * Return: handle to a trace buffer, given the name, or NULL if a trace buffer
+ * with that name couldn't be found.
*/
struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer(
struct kbase_device *kbdev, const char *name);
/**
- * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count -
- * Get number of trace enable bits for a trace buffer
- *
- * Return: Number of trace enable bits in a trace buffer.
+ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer
*
* @trace_buffer: Trace buffer handle
+ *
+ * Return: Number of trace enable bits in a trace buffer.
*/
unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(
const struct firmware_trace_buffer *trace_buffer);
/**
- * kbase_csf_firmware_trace_buffer_update_trace_enable_bit -
- * Update a trace enable bit
- *
- * Update the value of a given trace enable bit.
+ * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - Update a trace enable bit
*
* @trace_buffer: Trace buffer handle
* @bit: Bit to update
* @value: New value for the given bit
*
+ * Update the value of a given trace enable bit.
+ *
* Return: 0 if successful, negative error code on failure.
*/
int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
@@ -146,9 +143,9 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
/**
* kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate
*
- * Return: True if the trace buffer is empty, or false otherwise.
- *
* @trace_buffer: Trace buffer handle
+ *
+ * Return: True if the trace buffer is empty, or false otherwise.
*/
bool kbase_csf_firmware_trace_buffer_is_empty(
const struct firmware_trace_buffer *trace_buffer);
@@ -156,14 +153,14 @@ bool kbase_csf_firmware_trace_buffer_is_empty(
/**
* kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer
*
+ * @trace_buffer: Trace buffer handle
+ * @data: Pointer to a client-allocated where data shall be written.
+ * @num_bytes: Maximum number of bytes to read from the trace buffer.
+ *
* Read available data from a trace buffer. The client provides a data buffer
* of a given size and the maximum number of bytes to read.
*
* Return: Number of bytes read from the trace buffer.
- *
- * @trace_buffer: Trace buffer handle
- * @data: Pointer to a client-allocated where data shall be written.
- * @num_bytes: Maximum number of bytes to read from the trace buffer.
*/
unsigned int kbase_csf_firmware_trace_buffer_read_data(
struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
index d05f802..2506ce1 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
@@ -97,6 +97,13 @@ int dummy_array[] = {
/* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */
KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK),
+ /* info_val[63:0] = GPU cycle counter, used mainly for benchmarking
+ * purpose.
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(GPU_IDLE_HANDLING_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(MCU_HALTED),
+ KBASE_KTRACE_CODE_MAKE_CODE(MCU_IN_SLEEP),
+
/*
* Group events
*/
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
index 4b23fc9..9ee7f81 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
@@ -58,6 +58,9 @@ DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END);
DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN);
DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END);
DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK);
+DEFINE_MALI_ADD_EVENT(GPU_IDLE_HANDLING_START);
+DEFINE_MALI_ADD_EVENT(MCU_HALTED);
+DEFINE_MALI_ADD_EVENT(MCU_IN_SLEEP);
DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace.h b/mali_kbase/debug/mali_kbase_debug_ktrace.h
index f943696..f1e6d3d 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace.h
@@ -49,6 +49,7 @@
/**
* kbase_ktrace_init - initialize kbase ktrace.
* @kbdev: kbase device
+ * Return: 0 if successful or a negative error code on failure.
*/
int kbase_ktrace_init(struct kbase_device *kbdev);
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h b/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h
index d9bd351..ba93f29 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h
@@ -63,6 +63,8 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg,
* @ktrace: kbase device's ktrace
*
* This may also empty the oldest entry in the ringbuffer to make space.
+ *
+ * Return: ktrace message
*/
struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace);
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 7b37a96..8a4d2e2 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -24,11 +24,15 @@
#include <mali_kbase_hwaccess_backend.h>
#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
+#include <mali_kbase_hwcnt_watchdog_if_timer.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <csf/mali_kbase_csf.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_linux.h>
+#endif
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -196,9 +200,31 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
}
/**
+ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
+ * interface.
+ * @kbdev: Device pointer
+ */
+static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
+{
+ return kbase_hwcnt_watchdog_if_timer_create(
+ &kbdev->hwcnt_watchdog_timer);
+}
+
+/**
+ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog
+ * interface.
+ * @kbdev: Device pointer
+ */
+static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev)
+{
+ kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer);
+}
+
+/**
* kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend
* firmware interface.
* @kbdev: Device pointer
+ * Return: 0 if successful or a negative error code on failure.
*/
static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev)
{
@@ -226,7 +252,7 @@ static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
return kbase_hwcnt_backend_csf_create(
&kbdev->hwcnt_backend_csf_if_fw,
KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT,
- &kbdev->hwcnt_gpu_iface);
+ &kbdev->hwcnt_watchdog_timer, &kbdev->hwcnt_gpu_iface);
}
/**
@@ -239,8 +265,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
}
static const struct kbase_device_init dev_init[] = {
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ { kbase_gpu_device_create, kbase_gpu_device_destroy,
+ "Dummy model initialization failed" },
+#else
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
+#endif
{ power_control_init, power_control_term,
"Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
@@ -270,6 +301,9 @@ static const struct kbase_device_init dev_init[] = {
"Clock rate trace manager initialization failed" },
{ kbase_lowest_gpu_freq_init, NULL,
"Lowest freq initialization failed" },
+ { kbase_device_hwcnt_watchdog_if_init,
+ kbase_device_hwcnt_watchdog_if_term,
+ "GPU hwcnt backend watchdog interface creation failed" },
{ kbase_device_hwcnt_backend_csf_if_init,
kbase_device_hwcnt_backend_csf_if_term,
"GPU hwcnt backend CSF interface creation failed" },
@@ -283,7 +317,6 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_csf_early_init, kbase_csf_early_term,
"Early CSF initialization failed" },
{ NULL, kbase_device_firmware_hwcnt_term, NULL },
-#ifdef MALI_KBASE_BUILD
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
"DebugFS initialization failed" },
/* Sysfs init needs to happen before registering the device with
@@ -305,7 +338,6 @@ static const struct kbase_device_init dev_init[] = {
"GPU property population failed" },
{ kbase_device_late_init, kbase_device_late_term,
"Late device initialization failed" },
-#endif
};
static void kbase_device_term_partial(struct kbase_device *kbdev,
@@ -476,3 +508,4 @@ out:
return ret;
}
+KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once);
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 7288e8e..2e022eb 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -28,6 +28,9 @@
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_linux.h>
+#endif /* CONFIG_MALI_NO_MALI */
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include <arbiter/mali_kbase_arbiter_pm.h>
@@ -156,8 +159,13 @@ static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
}
static const struct kbase_device_init dev_init[] = {
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ { kbase_gpu_device_create, kbase_gpu_device_destroy,
+ "Dummy model initialization failed" },
+#else
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
+#endif
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term,
@@ -203,7 +211,6 @@ static const struct kbase_device_init dev_init[] = {
"Performance counter instrumentation initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term,
"Late backend initialization failed" },
-#ifdef MALI_KBASE_BUILD
{ kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term,
"Job fault debug initialization failed" },
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
@@ -225,7 +232,6 @@ static const struct kbase_device_init dev_init[] = {
"Misc device registration failed" },
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
-#endif
{ NULL, kbase_dummy_job_wa_cleanup, NULL },
{ kbase_device_late_init, kbase_device_late_term,
"Late device initialization failed" },
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 518aaf9..dc53c43 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -275,6 +275,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
if (err)
goto dma_set_mask_failed;
+
/* There is no limit for Mali, so set to max. We only do this if dma_parms
* is already allocated by the platform.
*/
@@ -345,6 +346,7 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
kbase_device_all_as_term(kbdev);
+
if (kbdev->oom_notifier_block.notifier_call)
unregister_oom_notifier(&kbdev->oom_notifier_block);
}
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index 517c16b..22ceca0 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -118,22 +118,42 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
bool kbase_is_gpu_removed(struct kbase_device *kbdev);
/**
+ * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
+ * @kbdev: Kbase device
+ * @flush_op: Flush command register value to be sent to HW
+ *
+ * Issue a cache flush command to hardware, then busy wait an irq status.
+ * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other
+ * threads through kbase_gpu_start_cache_clean(), and wake them up manually
+ * after the busy-wait is done. Any pended cache flush commands raised by
+ * other thread are handled in this function.
+ * hwaccess_lock must be held by the caller.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
+ u32 flush_op);
+
+/**
* kbase_gpu_start_cache_clean - Start a cache clean
* @kbdev: Kbase device
+ * @flush_op: Flush command register value to be sent to HW
*
- * Issue a cache clean and invalidate command to hardware. This function will
- * take hwaccess_lock.
+ * Issue a given cache flush command to hardware.
+ * This function will take hwaccess_lock.
*/
-void kbase_gpu_start_cache_clean(struct kbase_device *kbdev);
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op);
/**
* kbase_gpu_start_cache_clean_nolock - Start a cache clean
* @kbdev: Kbase device
+ * @flush_op: Flush command register value to be sent to HW
*
- * Issue a cache clean and invalidate command to hardware. hwaccess_lock
- * must be held by the caller.
+ * Issue a given cache flush command to hardware.
+ * hwaccess_lock must be held by the caller.
*/
-void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev);
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev,
+ u32 flush_op);
/**
* kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index 4c98ae1..beacc7c 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -38,7 +38,98 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev)
}
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
-void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
+static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
+{
+ /* Previously MMU-AS command was used for L2 cache flush on page-table update.
+ * And we're using the same max-loops count for GPU command, because amount of
+ * L2 cache flush overhead are same between them.
+ */
+ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+
+ /* Wait for the GPU cache clean operation to complete */
+ while (--max_loops &&
+ !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
+ CLEAN_CACHES_COMPLETED)) {
+ ;
+ }
+
+ /* reset gpu if time-out occurred */
+ if (max_loops == 0) {
+ dev_err(kbdev->dev,
+ "CLEAN_CACHES_COMPLETED bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu_locked(kbdev);
+ return -EBUSY;
+ }
+
+ /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+ CLEAN_CACHES_COMPLETED);
+
+ return 0;
+}
+
+int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
+ u32 flush_op)
+{
+ u32 irq_mask;
+ int need_to_wake_up = 0;
+ int ret = 0;
+
+ /* hwaccess_lock must be held to avoid any sync issue with
+ * kbase_gpu_start_cache_clean() / kbase_clean_caches_done()
+ */
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set.
+ * If it is set, it means there are threads waiting for
+ * CLEAN_CACHES_COMPLETED irq to be raised.
+ * We'll clear the irq mask bit and busy-wait for the cache
+ * clean operation to complete before submitting the cache
+ * clean command required after the GPU page table update.
+ * Pended flush commands will be merged to requested command.
+ */
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+ if (irq_mask & CLEAN_CACHES_COMPLETED) {
+ /* disable irq first */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask & ~CLEAN_CACHES_COMPLETED);
+
+ /* busy wait irq status to be enabled */
+ ret = busy_wait_cache_clean_irq(kbdev);
+ if (ret)
+ return ret;
+
+ /* merge pended command if there's any */
+ flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE(
+ kbdev->cache_clean_queued, flush_op);
+
+ /* enable wake up notify flag */
+ need_to_wake_up = 1;
+ } else {
+ /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+ CLEAN_CACHES_COMPLETED);
+ }
+
+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */
+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
+
+ /* 3. Busy-wait irq status to be enabled. */
+ ret = busy_wait_cache_clean_irq(kbdev);
+ if (ret)
+ return ret;
+
+ /* 4. Wake-up blocked threads when there is any. */
+ if (need_to_wake_up)
+ kbase_gpu_cache_clean_wait_complete(kbdev);
+
+ return ret;
+}
+
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev,
+ u32 flush_op)
{
u32 irq_mask;
@@ -47,10 +138,11 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
if (kbdev->cache_clean_in_progress) {
/* If this is called while another clean is in progress, we
* can't rely on the current one to flush any new changes in
- * the cache. Instead, trigger another cache clean immediately
- * after this one finishes.
+ * the cache. Instead, accumulate all cache clean operations
+ * and trigger that immediately after this one finishes.
*/
- kbdev->cache_clean_queued = true;
+ kbdev->cache_clean_queued = GPU_COMMAND_FLUSH_CACHE_MERGE(
+ kbdev->cache_clean_queued, flush_op);
return;
}
@@ -59,19 +151,18 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask | CLEAN_CACHES_COMPLETED);
- KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CACHE_CLN_INV_L2);
+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
kbdev->cache_clean_in_progress = true;
}
-void kbase_gpu_start_cache_clean(struct kbase_device *kbdev)
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_gpu_start_cache_clean_nolock(kbdev);
+ kbase_gpu_start_cache_clean_nolock(kbdev, flush_op);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
@@ -79,7 +170,7 @@ void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
- kbdev->cache_clean_queued = false;
+ kbdev->cache_clean_queued = 0;
kbdev->cache_clean_in_progress = false;
wake_up(&kbdev->cache_clean_wait);
}
@@ -92,11 +183,14 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbdev->cache_clean_queued) {
- kbdev->cache_clean_queued = false;
+ u32 pended_flush_op = kbdev->cache_clean_queued;
+
+ kbdev->cache_clean_queued = 0;
- KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0);
+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL,
+ pended_flush_op);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CACHE_CLN_INV_L2);
+ pended_flush_op);
} else {
/* Disable interrupt */
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index 2f4c9d9..e095986 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -24,6 +24,9 @@
#include "mali_kbase_ipa_counter_common_jm.h"
#include "mali_kbase.h"
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
/* Performance counter blocks base offsets */
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
@@ -94,9 +97,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst
static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
u32 counter_block_offset)
{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ const u32 sc_base = MEMSYS_BASE +
+ (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+ KBASE_IPA_NR_BYTES_PER_BLOCK);
+#else
const u32 sc_base = MEMSYS_BASE +
(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
KBASE_IPA_NR_BYTES_PER_BLOCK);
+#endif
return sc_base + counter_block_offset;
}
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index 8b05e68..c0c0cbb 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -537,18 +537,34 @@ static void opp_translate_freq_voltage(struct kbase_device *kbdev,
unsigned long *freqs,
unsigned long *volts)
{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* An arbitrary voltage and frequency value can be chosen for testing
+ * in no mali configuration which may not match with any OPP level.
+ */
+ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq;
+ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage;
+
+ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq;
+ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage;
+#else
u64 core_mask;
+ unsigned int i;
kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask,
freqs, volts);
CSTD_UNUSED(core_mask);
+ /* Convert micro volts to milli volts */
+ for (i = 0; i < kbdev->nr_clocks; i++)
+ volts[i] /= 1000;
+
if (kbdev->nr_clocks == 1) {
freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
}
+#endif
}
#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index cb1c276..ac8f89b 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -653,8 +653,8 @@ static inline bool kbase_jd_katom_is_protected(
/**
* kbase_atom_is_younger - query if one atom is younger by age than another
- * @katom_a the first atom
- * @katom_a the second atom
+ * @katom_a: the first atom
+ * @katom_a: the second atom
*
* Return: true if the first atom is strictly younger than the second, false
* otherwise.
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 2e81cb1..0f2b106 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -37,41 +37,42 @@ enum base_hw_feature {
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_generic[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = {
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tMIx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tHEx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tSIx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tDVx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tNOx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
@@ -80,7 +81,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = {
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tGOx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = {
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
@@ -89,50 +90,55 @@ static const enum base_hw_feature base_hw_features_tGOx[] = {
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tTRx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tNAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tBEx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tBAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tDUx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tODx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
@@ -140,7 +146,7 @@ static const enum base_hw_feature base_hw_features_tODx[] = {
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tGRx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
@@ -148,7 +154,7 @@ static const enum base_hw_feature base_hw_features_tGRx[] = {
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tVAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
@@ -156,7 +162,7 @@ static const enum base_hw_feature base_hw_features_tVAx[] = {
BASE_HW_FEATURE_END
};
-static const enum base_hw_feature base_hw_features_tTUx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index d188120..ad45325 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -63,11 +63,11 @@ enum base_hw_issue {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_generic[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_generic[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_11054,
@@ -87,7 +87,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_11054,
@@ -107,7 +107,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_11054,
@@ -127,7 +127,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_7891,
@@ -142,7 +142,7 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_11054,
@@ -155,7 +155,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_11054,
@@ -168,7 +168,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_11054,
@@ -181,7 +181,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_10682,
BASE_HW_ISSUE_TMIX_7891,
@@ -193,7 +193,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_7891,
@@ -203,7 +203,7 @@ static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11054,
BASE_HW_ISSUE_TMIX_8133,
@@ -216,7 +216,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11054,
BASE_HW_ISSUE_TMIX_8133,
@@ -229,7 +229,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_11054,
BASE_HW_ISSUE_TMIX_8133,
@@ -241,7 +241,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
@@ -252,7 +252,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
@@ -262,7 +262,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
@@ -273,7 +273,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
@@ -283,7 +283,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
@@ -295,7 +295,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
@@ -305,7 +305,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
@@ -317,7 +317,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_1116,
@@ -329,7 +329,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TMIX_8133,
@@ -339,7 +339,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -355,7 +355,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -371,7 +371,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -386,7 +386,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -398,7 +398,7 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -414,7 +414,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -429,7 +429,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -441,7 +441,7 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -455,7 +455,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -468,7 +468,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -481,7 +481,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -494,7 +494,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -506,7 +506,7 @@ static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -520,7 +520,7 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -533,7 +533,7 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -546,7 +546,7 @@ static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -559,7 +559,7 @@ static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -571,7 +571,7 @@ static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -581,7 +581,7 @@ static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -591,7 +591,7 @@ static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
@@ -599,7 +599,7 @@ static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tODx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -608,14 +608,14 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -623,14 +623,14 @@ static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -638,7 +638,7 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
@@ -646,7 +646,7 @@ static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
BASE_HW_ISSUE_END
};
-static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 2472c7c..0cbbf44 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -31,6 +31,10 @@
#include <ipa/mali_kbase_ipa_debugfs.h>
#endif /* CONFIG_DEVFREQ_THERMAL */
#endif /* CONFIG_MALI_DEVFREQ */
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include "backend/gpu/mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
#include "mali_kbase_mem_profile_debugfs_buf_size.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_mem_pool_debugfs.h"
@@ -52,7 +56,6 @@
#endif
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_legacy.h"
#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_vinstr.h"
#if MALI_USE_CSF
@@ -60,6 +63,7 @@
#include "csf/mali_kbase_csf_tiler_heap.h"
#include "csf/mali_kbase_csf_csg_debugfs.h"
#include "csf/mali_kbase_csf_cpu_queue_debugfs.h"
+#include "csf/mali_kbase_csf_event.h"
#endif
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include "arbiter/mali_kbase_arbiter_pm.h"
@@ -342,15 +346,6 @@ static void kbase_file_delete(struct kbase_file *const kfile)
#if IS_ENABLED(CONFIG_DEBUG_FS)
kbasep_mem_profile_debugfs_remove(kctx);
#endif
-
- mutex_lock(&kctx->legacy_hwcnt_lock);
- /* If this client was performing hardware counter dumping and
- * did not explicitly detach itself, destroy it now
- */
- kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli);
- kctx->legacy_hwcnt_cli = NULL;
- mutex_unlock(&kctx->legacy_hwcnt_lock);
-
kbase_context_debugfs_term(kctx);
kbase_destroy_context(kctx);
@@ -905,62 +900,6 @@ static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
}
-static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
- struct kbase_ioctl_hwcnt_enable *enable)
-{
- int ret;
-
- mutex_lock(&kctx->legacy_hwcnt_lock);
- if (enable->dump_buffer != 0) {
- /* Non-zero dump buffer, so user wants to create the client */
- if (kctx->legacy_hwcnt_cli == NULL) {
- ret = kbase_hwcnt_legacy_client_create(
- kctx->kbdev->hwcnt_gpu_virt,
- enable,
- &kctx->legacy_hwcnt_cli);
- } else {
- /* This context already has a client */
- ret = -EBUSY;
- }
- } else {
- /* Zero dump buffer, so user wants to destroy the client */
- if (kctx->legacy_hwcnt_cli != NULL) {
- kbase_hwcnt_legacy_client_destroy(
- kctx->legacy_hwcnt_cli);
- kctx->legacy_hwcnt_cli = NULL;
- ret = 0;
- } else {
- /* This context has no client to destroy */
- ret = -EINVAL;
- }
- }
- mutex_unlock(&kctx->legacy_hwcnt_lock);
-
- return ret;
-}
-
-static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
-{
- int ret;
-
- mutex_lock(&kctx->legacy_hwcnt_lock);
- ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli);
- mutex_unlock(&kctx->legacy_hwcnt_lock);
-
- return ret;
-}
-
-static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
-{
- int ret;
-
- mutex_lock(&kctx->legacy_hwcnt_lock);
- ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli);
- mutex_unlock(&kctx->legacy_hwcnt_lock);
-
- return ret;
-}
-
static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo)
{
@@ -992,6 +931,17 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
return 0;
}
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+ struct kbase_ioctl_hwcnt_values *values)
+{
+ gpu_model_set_dummy_prfcnt_sample(
+ (u32 __user *)(uintptr_t)values->data,
+ values->size);
+
+ return 0;
+}
+#endif /* CONFIG_MALI_NO_MALI */
static int kbase_api_disjoint_query(struct kbase_context *kctx,
struct kbase_ioctl_disjoint_query *query)
@@ -1415,6 +1365,30 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx,
return kbase_csf_queue_kick(kctx, kick);
}
+static int kbasep_cs_queue_group_create_1_6(
+ struct kbase_context *kctx,
+ union kbase_ioctl_cs_queue_group_create_1_6 *create)
+{
+ union kbase_ioctl_cs_queue_group_create
+ new_create = { .in = {
+ .tiler_mask = create->in.tiler_mask,
+ .fragment_mask =
+ create->in.fragment_mask,
+ .compute_mask = create->in.compute_mask,
+ .cs_min = create->in.cs_min,
+ .priority = create->in.priority,
+ .tiler_max = create->in.tiler_max,
+ .fragment_max = create->in.fragment_max,
+ .compute_max = create->in.compute_max,
+ } };
+
+ int ret = kbase_csf_queue_group_create(kctx, &new_create);
+
+ create->out.group_handle = new_create.out.group_handle;
+ create->out.group_uid = new_create.out.group_uid;
+
+ return ret;
+}
static int kbasep_cs_queue_group_create(struct kbase_context *kctx,
union kbase_ioctl_cs_queue_group_create *create)
{
@@ -1873,28 +1847,20 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_hwcnt_reader_setup,
kctx);
break;
- case KBASE_IOCTL_HWCNT_ENABLE:
- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
- kbase_api_hwcnt_enable,
- struct kbase_ioctl_hwcnt_enable,
- kctx);
- break;
- case KBASE_IOCTL_HWCNT_DUMP:
- KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
- kbase_api_hwcnt_dump,
- kctx);
- break;
- case KBASE_IOCTL_HWCNT_CLEAR:
- KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
- kbase_api_hwcnt_clear,
- kctx);
- break;
case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO,
kbase_api_get_cpu_gpu_timeinfo,
union kbase_ioctl_get_cpu_gpu_timeinfo,
kctx);
break;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ case KBASE_IOCTL_HWCNT_SET:
+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+ kbase_api_hwcnt_set,
+ struct kbase_ioctl_hwcnt_values,
+ kctx);
+ break;
+#endif /* CONFIG_MALI_NO_MALI */
#ifdef CONFIG_MALI_CINSTR_GWT
case KBASE_IOCTL_CINSTR_GWT_START:
KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
@@ -1949,6 +1915,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_cs_queue_kick,
kctx);
break;
+ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6:
+ KBASE_HANDLE_IOCTL_INOUT(
+ KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6,
+ kbasep_cs_queue_group_create_1_6,
+ union kbase_ioctl_cs_queue_group_create_1_6, kctx);
+ break;
case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE,
kbasep_cs_queue_group_create,
@@ -2048,7 +2020,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
if (atomic_read(&kctx->event_count))
read_event = true;
else
- read_error = kbase_csf_read_error(kctx, &event_data);
+ read_error = kbase_csf_event_read_error(kctx, &event_data);
if (!read_event && !read_error) {
bool dump = kbase_csf_cpu_queue_read_dump_req(kctx,
@@ -2153,7 +2125,7 @@ int kbase_event_pending(struct kbase_context *ctx)
WARN_ON_ONCE(!ctx);
return (atomic_read(&ctx->event_count) != 0) ||
- kbase_csf_error_pending(ctx) ||
+ kbase_csf_event_error_pending(ctx) ||
kbase_csf_cpu_queue_dump_needed(ctx);
}
#else
@@ -3910,8 +3882,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
show_js_ctx_scheduling_mode,
set_js_ctx_scheduling_mode);
-#ifdef MALI_KBASE_BUILD
-
/* Number of entries in serialize_jobs_settings[] */
#define NR_SERIALIZE_JOBS_SETTINGS 5
/* Maximum string length in serialize_jobs_settings[].name */
@@ -4126,7 +4096,6 @@ static ssize_t store_serialize_jobs_sysfs(struct device *dev,
static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs,
store_serialize_jobs_sysfs);
-#endif /* MALI_KBASE_BUILD */
#endif /* !MALI_USE_CSF */
static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
@@ -4222,6 +4191,15 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
kfree(kbdev->protected_dev);
}
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+ return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
int err = 0;
@@ -4257,6 +4235,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
kbdev->reg_size = 0;
}
}
+#endif /* CONFIG_MALI_NO_MALI */
int registers_map(struct kbase_device * const kbdev)
{
@@ -4574,7 +4553,6 @@ void power_control_term(struct kbase_device *kbdev)
#endif
}
-#ifdef MALI_KBASE_BUILD
#if IS_ENABLED(CONFIG_DEBUG_FS)
static void trigger_reset(struct kbase_device *kbdev)
@@ -4847,7 +4825,6 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev)
debugfs_remove_recursive(kbdev->mali_debugfs_directory);
}
#endif /* CONFIG_DEBUG_FS */
-#endif /* MALI_KBASE_BUILD */
int kbase_device_coherency_init(struct kbase_device *kbdev)
{
@@ -5238,10 +5215,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
dev_set_drvdata(kbdev->dev, NULL);
kbase_device_free(kbdev);
} else {
-#ifdef MALI_KBASE_BUILD
dev_info(kbdev->dev,
"Probed as %s\n", dev_name(kbdev->mdev.this_device));
-#endif /* MALI_KBASE_BUILD */
kbase_increment_device_id();
#ifdef CONFIG_MALI_ARBITER_SUPPORT
mutex_lock(&kbdev->pm.lock);
@@ -5262,7 +5237,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
*
* @dev: The device to suspend
*
- * Return: A standard Linux error code
+ * Return: A standard Linux error code on failure, 0 otherwise.
*/
static int kbase_device_suspend(struct device *dev)
{
@@ -5271,7 +5246,10 @@ static int kbase_device_suspend(struct device *dev)
if (!kbdev)
return -ENODEV;
- kbase_pm_suspend(kbdev);
+ if (kbase_pm_suspend(kbdev)) {
+ dev_warn(kbdev->dev, "Abort suspend as GPU suspension failed");
+ return -EBUSY;
+ }
#ifdef CONFIG_MALI_MIDGARD_DVFS
kbase_pm_metrics_stop(kbdev);
@@ -5512,6 +5490,7 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
__stringify(BASE_UK_VERSION_MAJOR) "." \
__stringify(BASE_UK_VERSION_MINOR) ")");
MODULE_SOFTDEP("pre: memory_group_manager");
+MODULE_INFO(import_ns, "DMA_BUF");
#define CREATE_TRACE_POINTS
/* Create the trace points (otherwise we just get code to call a tracepoint) */
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index d06380d..8026e7f 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -23,6 +23,9 @@
#include <mali_kbase_defs.h>
#include "mali_kbase_ctx_sched.h"
#include "tl/mali_kbase_tracepoints.h"
+#if !MALI_USE_CSF
+#include <mali_kbase_hwaccess_jm.h>
+#endif
/* Helper for ktrace */
#if KBASE_KTRACE_ENABLE
@@ -124,7 +127,6 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
kbdev, prev_kctx->id);
prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
}
-
kctx->as_nr = free_as;
kbdev->as_to_kctx[free_as] = kctx;
KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS(
@@ -173,6 +175,9 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
kbdev->as_to_kctx[kctx->as_nr] = NULL;
kctx->as_nr = KBASEP_AS_NR_INVALID;
kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
+#if !MALI_USE_CSF
+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+#endif
}
}
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 5b1fdd3..86e4042 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -742,6 +742,7 @@ struct kbase_process {
* @hwcnt.addr: HW counter address
* @hwcnt.addr_bytes: HW counter size in bytes
* @hwcnt.backend: Kbase instrumentation backend
+ * @hwcnt_watchdog_timer: Hardware counter watchdog interface.
* @hwcnt_gpu_iface: Backend interface for GPU hardware counter access.
* @hwcnt_gpu_ctx: Context for GPU hardware counter access.
* @hwaccess_lock must be held when calling
@@ -770,8 +771,8 @@ struct kbase_process {
* @cache_clean_in_progress: Set when a cache clean has been started, and
* cleared when it has finished. This prevents multiple
* cache cleans being done simultaneously.
- * @cache_clean_queued: Set if a cache clean is invoked while another is in
- * progress. If this happens, another cache clean needs
+ * @cache_clean_queued: Pended cache clean operations invoked while another is
+ * in progress. If this is not 0, another cache clean needs
* to be triggered immediately after completion of the
* current one.
* @cache_clean_wait: Signalled when a cache clean has finished.
@@ -979,6 +980,15 @@ struct kbase_device {
char devname[DEVNAME_SIZE];
u32 id;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ void *model;
+ struct kmem_cache *irq_slab;
+ struct workqueue_struct *irq_workq;
+ atomic_t serving_job_irq;
+ atomic_t serving_gpu_irq;
+ atomic_t serving_mmu_irq;
+ spinlock_t reg_op_lock;
+#endif /* CONFIG_MALI_NO_MALI */
struct kbase_pm_device_data pm;
struct kbase_mem_pool_group mem_pools;
@@ -1008,6 +1018,7 @@ struct kbase_device {
#if MALI_USE_CSF
struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
+ struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
#else
struct kbase_hwcnt {
spinlock_t lock;
@@ -1037,7 +1048,7 @@ struct kbase_device {
u64 lowest_gpu_freq_khz;
bool cache_clean_in_progress;
- bool cache_clean_queued;
+ u32 cache_clean_queued;
wait_queue_head_t cache_clean_wait;
void *platform_context;
@@ -1205,6 +1216,7 @@ struct kbase_device {
struct priority_control_manager_device *pcm_dev;
struct notifier_block oom_notifier_block;
+
};
/**
@@ -1562,6 +1574,12 @@ struct kbase_sub_alloc {
* pages used for GPU allocations, done for the context,
* to the memory consumed by the process.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
+ * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
+ * tiler heaps of the kbase context.
+ * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the
+ * kbase context.
+ * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the
+ * kbase context.
* @jit_va: Indicates if a JIT_VA zone has been created.
* @mem_profile_data: Buffer containing the profiling information provided by
* Userspace, can be read through the mem_profile debugfs file.
@@ -1588,11 +1606,6 @@ struct kbase_sub_alloc {
* @slots_pullable: Bitmask of slots, indicating the slots for which the
* context has pullable atoms in the runnable tree.
* @work: Work structure used for deferred ASID assignment.
- * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters
- * client, there can be only such client per kbase
- * context.
- * @legacy_hwcnt_lock: Lock used to prevent concurrent access to
- * @legacy_hwcnt_cli.
* @completed_jobs: List containing completed atoms for which base_jd_event is
* to be posted.
* @work_count: Number of work items, corresponding to atoms, currently
@@ -1775,6 +1788,11 @@ struct kbase_context {
spinlock_t mm_update_lock;
struct mm_struct __rcu *process_mm;
u64 gpu_va_end;
+#if MALI_USE_CSF
+ u32 running_total_tiler_heap_nr_chunks;
+ u64 running_total_tiler_heap_memory;
+ u64 peak_total_tiler_heap_memory;
+#endif
bool jit_va;
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1788,10 +1806,6 @@ struct kbase_context {
struct list_head job_fault_resume_event_list;
#endif /* CONFIG_DEBUG_FS */
-
- struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli;
- struct mutex legacy_hwcnt_lock;
-
struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
u8 jit_max_allocations;
u8 jit_current_allocations;
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index 967c08e..b5ba642 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -371,6 +371,7 @@ static void kbase_gpuprops_calculate_props(
gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
#if MALI_USE_CSF
+ CSTD_UNUSED(gpu_id);
gpu_props->thread_props.max_registers =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
0U, 22);
diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h
index 819ca13..3766310 100644
--- a/mali_kbase/mali_kbase_hwaccess_instr.h
+++ b/mali_kbase/mali_kbase_hwaccess_instr.h
@@ -144,4 +144,27 @@ void kbase_instr_backend_term(struct kbase_device *kbdev);
void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev);
#endif
+/**
+ * kbase_instr_hwcnt_on_unrecoverable_error() - JM HWC instr backend function
+ * called when unrecoverable errors
+ * are detected.
+ * @kbdev: Kbase device
+ *
+ * This should be called on encountering errors that can only be recovered from
+ * with reset, or that may put HWC logic in state that could result in hang. For
+ * example, when HW becomes unresponsive.
+ *
+ * Caller requires kbdev->hwaccess_lock held.
+ */
+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_on_before_reset() - JM HWC instr backend function to be
+ * called immediately before a reset.
+ * Takes us out of the unrecoverable
+ * error state, if we were in it.
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev);
+
#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index 8689647..d0207f7 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -299,4 +299,21 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
*/
bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev);
+/**
+ * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked
+ * kctx
+ *
+ * @kbdev: Device pointer
+ * @kctx: The kbase context that needs to be purged from slot_rb[]
+ *
+ * For JM GPUs, the L1 read only caches may need a start_flush invalidation,
+ * potentially on all slots (even if the kctx was only using a single slot),
+ * following a context termination or address-space ID recycle. This function
+ * performs a clean-up purge on the given kctx which if it has been tracked by
+ * slot_rb[] objects.
+ *
+ * Caller must hold kbase_device->hwaccess_lock.
+ */
+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx);
+
#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h
index 36bbe2d..a8e4b95 100644
--- a/mali_kbase/mali_kbase_hwaccess_pm.h
+++ b/mali_kbase/mali_kbase_hwaccess_pm.h
@@ -85,8 +85,10 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
* Perform any backend-specific actions to suspend the GPU
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if suspend was successful.
*/
-void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
/**
* Perform any backend-specific actions to resume the GPU from a suspend
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c
index ea4893d..1fa6640 100644
--- a/mali_kbase/mali_kbase_hwcnt.c
+++ b/mali_kbase/mali_kbase_hwcnt.c
@@ -158,7 +158,6 @@ int kbase_hwcnt_context_init(
return 0;
- destroy_workqueue(hctx->wq);
err_alloc_workqueue:
kfree(hctx);
err_alloc_hctx:
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
index 7ba1671..4602138 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
@@ -36,16 +36,24 @@
#define BASE_MAX_NR_CLOCKS_REGULATORS 2
#endif
+/* Backend watch dog timer interval in milliseconds: 1 second. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000)
+
/**
* enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
*
* @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is
* an error.
*
- * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are
- * waiting for an ACK, this ACK could come from either PRFCNT_ACK,
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and
+ * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK,
* PROTMODE_ENTER_ACK, or if an error occurs.
*
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been
+ * requested and we're waiting for an ACK - this ACK could come from either
+ * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here
+ * since watchdog request can't be triggered in protected mode.
+ *
* @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert
* immediately after receiving the ACK, so we know which index corresponds to
* the buffer we requested.
@@ -60,18 +68,25 @@
* @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully.
*
* Valid state transitions:
- * IDLE -> REQUESTED (on dump request)
- * REQUESTED -> QUERYING_INSERT (on dump ack)
+ * IDLE -> REQUESTED (on user dump request)
+ * IDLE -> WATCHDOG_REQUESTED (on watchdog request)
+ * IDLE -> QUERYING_INSERT (on user dump request in protected mode)
+ * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware)
+ * WATCHDOG_REQUESTED -> REQUESTED (on user dump request)
+ * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request)
* QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission)
* WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating)
* ACCUMULATING -> COMPLETED (on accumulation completion)
- * COMPLETED -> REQUESTED (on dump request)
+ * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode)
+ * COMPLETED -> REQUESTED (on user dump request)
+ * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request)
* COMPLETED -> IDLE (on disable)
* ANY -> IDLE (on error)
*/
enum kbase_hwcnt_backend_csf_dump_state {
KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE,
KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED,
KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT,
KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED,
KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING,
@@ -136,6 +151,7 @@ enum kbase_hwcnt_backend_csf_enable_state {
* @counter_set: The performance counter set to use.
* @metadata: Hardware counter metadata.
* @prfcnt_info: Performance counter information.
+ * @watchdog_if: Watchdog interface object pointer.
*/
struct kbase_hwcnt_backend_csf_info {
struct kbase_hwcnt_backend_csf *backend;
@@ -146,6 +162,7 @@ struct kbase_hwcnt_backend_csf_info {
enum kbase_hwcnt_set counter_set;
const struct kbase_hwcnt_metadata *metadata;
struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info;
+ struct kbase_hwcnt_watchdog_interface *watchdog_if;
};
/**
@@ -192,6 +209,10 @@ struct kbase_hwcnt_csf_physical_layout {
* @old_sample_buf: HWC sample buffer to save the previous values
* for delta calculation, size
* prfcnt_info.dump_bytes.
+ * @watchdog_last_seen_insert_idx: The insert index which watchdog has last
+ * seen, to check any new firmware automatic
+ * samples generated during the watchdog
+ * period.
* @ring_buf: Opaque pointer for ring buffer object.
* @ring_buf_cpu_base: CPU base address of the allocated ring buffer.
* @clk_enable_map: The enable map specifying enabled clock domains.
@@ -204,6 +225,8 @@ struct kbase_hwcnt_csf_physical_layout {
* it is completed accumulating up to the
* insert_index_to_accumulate.
* Should be initialized to the "complete" state.
+ * @user_requested: Flag to indicate a dump_request called from
+ * user.
* @hwc_dump_workq: Single threaded work queue for HWC workers
* execution.
* @hwc_dump_work: Worker to accumulate samples.
@@ -219,6 +242,7 @@ struct kbase_hwcnt_backend_csf {
u64 *to_user_buf;
u64 *accum_buf;
u32 *old_sample_buf;
+ u32 watchdog_last_seen_insert_idx;
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
void *ring_buf_cpu_base;
u64 clk_enable_map;
@@ -226,6 +250,7 @@ struct kbase_hwcnt_backend_csf {
u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
struct kbase_hwcnt_csf_physical_layout phys_layout;
struct completion dump_completed;
+ bool user_requested;
struct workqueue_struct *hwc_dump_workq;
struct work_struct hwc_dump_work;
struct work_struct hwc_threshold_work;
@@ -594,6 +619,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
backend_csf->info->csf_if->set_extract_index(
backend_csf->info->csf_if->ctx, insert_index_to_stop);
+ /* Update the watchdog last seen index to check any new FW auto samples
+ * in next watchdog callback.
+ */
+ backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop;
backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
flags);
}
@@ -612,6 +641,67 @@ static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
}
}
+static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info = info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+ unsigned long flags;
+
+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+
+ if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) {
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+ return;
+ }
+
+ backend_csf = csf_info->backend;
+
+ /* Only do watchdog request when all conditions are met: */
+ if (/* 1. Backend is enabled. */
+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+ /* 2. FW is not in protected mode. */
+ (!csf_info->fw_in_protected_mode) &&
+ /* 3. dump state indicates no other dumping is in progress. */
+ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
+ (backend_csf->dump_state ==
+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
+ u32 extract_index;
+ u32 insert_index;
+
+ /* Read the raw extract and insert indexes from the CSF interface. */
+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx,
+ &extract_index, &insert_index);
+
+ /* Do watchdog request if no new FW auto samples. */
+ if (insert_index ==
+ backend_csf->watchdog_last_seen_insert_idx) {
+ /* Trigger the watchdog request. */
+ csf_info->csf_if->dump_request(csf_info->csf_if->ctx);
+
+ /* A watchdog dump is required, change the state to
+ * start the request process.
+ */
+ backend_csf->dump_state =
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED;
+ }
+ }
+
+ /* Must schedule another callback when in the transitional state because
+ * this function can be called for the first time before the performance
+ * counter enabled interrupt.
+ */
+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ||
+ (backend_csf->enable_state ==
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
+ /* Reschedule the timer for next watchdog callback. */
+ csf_info->watchdog_if->modify(
+ csf_info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+ }
+
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
/**
* kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker.
* @work: Work structure.
@@ -826,6 +916,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
struct kbase_hwcnt_backend_csf *backend_csf =
(struct kbase_hwcnt_backend_csf *)backend;
struct kbase_hwcnt_backend_csf_if_enable enable;
+ int err;
if (!backend_csf || !enable_map ||
(enable_map->metadata != backend_csf->info->metadata))
@@ -841,6 +932,13 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
return -EIO;
+ err = backend_csf->info->watchdog_if->enable(
+ backend_csf->info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS,
+ kbasep_hwcnt_backend_watchdog_timer_cb, backend_csf->info);
+ if (err)
+ return err;
+
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
WARN_ON(!completion_done(&backend_csf->dump_completed));
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
@@ -948,6 +1046,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
flags);
+ /* Deregister the timer and block until any timer callback has completed.
+ * We've transitioned out of the ENABLED state so we can guarantee it
+ * won't reschedule itself.
+ */
+ backend_csf->info->watchdog_if->disable(
+ backend_csf->info->watchdog_if->timer);
+
/* Block until any async work has completed. We have transitioned out of
* the ENABLED state so we can guarantee no new work will concurrently
* be submitted.
@@ -978,6 +1083,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
break;
}
+ backend_csf->user_requested = false;
+ backend_csf->watchdog_last_seen_insert_idx = 0;
+
backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
flags);
@@ -1006,6 +1114,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_backend_csf *backend_csf =
(struct kbase_hwcnt_backend_csf *)backend;
bool do_request = false;
+ bool watchdog_dumping = false;
if (!backend_csf)
return -EINVAL;
@@ -1022,6 +1131,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+ backend_csf->user_requested = true;
backend_csf->info->csf_if->unlock(
backend_csf->info->csf_if->ctx, flags);
return 0;
@@ -1035,11 +1145,21 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
}
/* Make sure that this is either the first request since enable or the
- * previous dump has completed, so we can avoid midway through a dump.
+ * previous user dump has completed or a watchdog dump is in progress,
+ * so we can avoid midway through a user dump.
+ * If user request comes while a watchdog dumping is in progress,
+ * the user request takes the ownership of the watchdog dumping sample by
+ * changing the dump_state so the interrupt for the watchdog
+ * request can be processed instead of ignored.
*/
if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
(backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) &&
+ (backend_csf->dump_state !=
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) {
+ /* HWC is disabled or another user dump is ongoing,
+ * or we're on fault.
+ */
backend_csf->info->csf_if->unlock(
backend_csf->info->csf_if->ctx, flags);
/* HWC is disabled or another dump is ongoing, or we are on
@@ -1051,6 +1171,10 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
/* Reset the completion so dump_wait() has something to wait on. */
reinit_completion(&backend_csf->dump_completed);
+ if (backend_csf->dump_state ==
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)
+ watchdog_dumping = true;
+
if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
!backend_csf->info->fw_in_protected_mode) {
/* Only do the request if we are fully enabled and not in
@@ -1078,15 +1202,29 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+ backend_csf->user_requested = true;
- if (do_request)
- backend_csf->info->csf_if->dump_request(
- backend_csf->info->csf_if->ctx);
- else
+ if (do_request) {
+ /* If a watchdog dumping is in progress, don't need to do
+ * another request, just update the dump_state and take the
+ * ownership of the sample which watchdog requested.
+ */
+ if (!watchdog_dumping)
+ backend_csf->info->csf_if->dump_request(
+ backend_csf->info->csf_if->ctx);
+ } else
kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);
backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
flags);
+
+ /* Modify watchdog timer to delay the regular check time since
+ * just requested.
+ */
+ backend_csf->info->watchdog_if->modify(
+ backend_csf->info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+
return 0;
}
@@ -1105,11 +1243,18 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
wait_for_completion(&backend_csf->dump_completed);
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
- /* Make sure the last dump actually succeeded. */
- errcode = (backend_csf->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ?
- 0 :
- -EIO;
+ /* Make sure the last dump actually succeeded when user requested is
+ * set.
+ */
+ if (backend_csf->user_requested &&
+ ((backend_csf->dump_state ==
+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ||
+ (backend_csf->dump_state ==
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)))
+ errcode = 0;
+ else
+ errcode = -EIO;
+
backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
flags);
@@ -1155,13 +1300,16 @@ static int kbasep_hwcnt_backend_csf_dump_get(
(dst_enable_map->metadata != dst->metadata))
return -EINVAL;
+ /* Extract elapsed cycle count for each clock domain if enabled. */
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
continue;
- /* Extract elapsed cycle count for each clock domain. */
- dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk];
+ /* Reset the counter to zero if accumulation is off. */
+ if (!accumulate)
+ dst->clk_cnt_buf[clk] = 0;
+ dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk];
}
/* We just return the user buffer without checking the current state,
@@ -1279,6 +1427,8 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED;
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
complete_all(&backend_csf->dump_completed);
+ backend_csf->user_requested = false;
+ backend_csf->watchdog_last_seen_insert_idx = 0;
*out_backend = backend_csf;
return 0;
@@ -1401,38 +1551,41 @@ static void kbasep_hwcnt_backend_csf_info_destroy(
* used to create backend interface.
* @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer.
* MUST be power of 2.
+ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used to create
+ * backend interface.
* @out_info: Non-NULL pointer to where info is stored on success.
* @return 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_csf_info_create(
struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
const struct kbase_hwcnt_backend_csf_info **out_info)
{
struct kbase_hwcnt_backend_csf_info *info = NULL;
- WARN_ON(!csf_if);
- WARN_ON(!out_info);
- WARN_ON(!is_power_of_2(ring_buf_cnt));
+ if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) ||
+ WARN_ON(!is_power_of_2(ring_buf_cnt)))
+ return -EINVAL;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
+ *info = (struct kbase_hwcnt_backend_csf_info)
+ {
#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
- info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+ .counter_set = KBASE_HWCNT_SET_SECONDARY,
#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
- info->counter_set = KBASE_HWCNT_SET_TERTIARY;
+ .counter_set = KBASE_HWCNT_SET_TERTIARY,
#else
- /* Default to primary */
- info->counter_set = KBASE_HWCNT_SET_PRIMARY;
+ /* Default to primary */
+ .counter_set = KBASE_HWCNT_SET_PRIMARY,
#endif
-
- info->backend = NULL;
- info->csf_if = csf_if;
- info->ring_buf_cnt = ring_buf_cnt;
- info->fw_in_protected_mode = false;
- info->unrecoverable_error_happened = false;
-
+ .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt,
+ .fw_in_protected_mode = false,
+ .unrecoverable_error_happened = false,
+ .watchdog_if = watchdog_if,
+ };
*out_info = info;
return 0;
@@ -1653,6 +1806,14 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
return;
backend_csf = csf_info->backend;
+ /* Skip the dump_work if it's a watchdog request. */
+ if (backend_csf->dump_state ==
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) {
+ backend_csf->dump_state =
+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ return;
+ }
+
/* If the current state is not REQUESTED, this HWC sample will be
* skipped and processed in next dump_request.
*/
@@ -1831,14 +1992,15 @@ void kbase_hwcnt_backend_csf_metadata_term(
}
}
-int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
- u32 ring_buf_cnt,
- struct kbase_hwcnt_backend_interface *iface)
+int kbase_hwcnt_backend_csf_create(
+ struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ struct kbase_hwcnt_backend_interface *iface)
{
int errcode;
const struct kbase_hwcnt_backend_csf_info *info = NULL;
- if (!iface || !csf_if)
+ if (!iface || !csf_if || !watchdog_if)
return -EINVAL;
/* The buffer count must be power of 2 */
@@ -1846,7 +2008,7 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
return -EINVAL;
errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt,
- &info);
+ watchdog_if, &info);
if (errcode)
return errcode;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/mali_kbase_hwcnt_backend_csf.h
index bfdf140..e0cafbe 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.h
@@ -29,6 +29,7 @@
#include "mali_kbase_hwcnt_backend.h"
#include "mali_kbase_hwcnt_backend_csf_if.h"
+#include "mali_kbase_hwcnt_watchdog_if.h"
/**
* kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@@ -37,6 +38,8 @@
* used to create backend interface.
* @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring
* buffer, MUST be power of 2.
+ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used
+ * to create backend interface.
* @iface: Non-NULL pointer to backend interface structure that is filled
* in on creation success.
*
@@ -44,9 +47,10 @@
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
- u32 ring_buf_cnt,
- struct kbase_hwcnt_backend_interface *iface);
+int kbase_hwcnt_backend_csf_create(
+ struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
index 124224d..40cf6bb 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -38,6 +38,9 @@
#include <linux/log2.h>
#include "mali_kbase_ccswe.h"
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
/** The number of nanoseconds in a second. */
#define NSECS_IN_SEC 1000000000ull /* ns */
@@ -217,6 +220,26 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ size_t dummy_model_blk_count;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ prfcnt_info->core_mask =
+ (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+ /* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */
+ dummy_model_blk_count =
+ 2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask);
+ prfcnt_info->dump_bytes =
+ dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE;
+ prfcnt_info->prfcnt_block_size =
+ KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
+ KBASE_HWCNT_VALUE_HW_BYTES;
+ prfcnt_info->clk_cnt = 1;
+ prfcnt_info->clearing_samples = true;
+ fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
+#else
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
u32 prfcnt_size;
@@ -261,6 +284,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
/* Total size must be multiple of block size. */
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
0);
+#endif
}
static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
@@ -355,6 +379,11 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
*out_ring_buf =
(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* The dummy model needs the CPU mapping. */
+ gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev,
+ phys, num_pages);
+#endif /* CONFIG_MALI_NO_MALI */
return 0;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
index 56bb1b6..d041391 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -28,6 +28,9 @@
#include "mali_kbase_hwaccess_time.h"
#include "mali_kbase_ccswe.h"
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif /* CONFIG_MALI_NO_MALI */
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
@@ -140,6 +143,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
if (!kbdev || !info)
return -EINVAL;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+#else /* CONFIG_MALI_NO_MALI */
{
const struct base_gpu_props *props = &kbdev->gpu_props.props;
const size_t l2_count = props->l2_props.num_l2_slices;
@@ -151,6 +159,7 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
info->prfcnt_values_per_block =
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
}
+#endif /* CONFIG_MALI_NO_MALI */
/* Determine the number of available clock domains. */
for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
@@ -569,6 +578,11 @@ static int kbasep_hwcnt_backend_jm_dump_get(
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
size_t clk;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ int errcode;
+#endif /* CONFIG_MALI_NO_MALI */
if (!backend_jm || !dst || !dst_enable_map ||
(backend_jm->info->metadata != dst->metadata) ||
@@ -582,15 +596,32 @@ static int kbasep_hwcnt_backend_jm_dump_get(
/* Dump sample to the internal 64-bit user buffer. */
kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
+ /* Extract elapsed cycle count for each clock domain if enabled. */
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
continue;
- /* Extract elapsed cycle count for each clock domain. */
- dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
+ /* Reset the counter to zero if accumulation is off. */
+ if (!accumulate)
+ dst->clk_cnt_buf[clk] = 0;
+ dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk];
}
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ kbdev = backend_jm->kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ /* Update the current configuration information. */
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
+ &backend_jm->curr_config);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (errcode)
+ return errcode;
+#endif /* CONFIG_MALI_NO_MALI */
return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf,
dst_enable_map, backend_jm->pm_core_mask,
&backend_jm->curr_config, accumulate);
@@ -700,6 +731,9 @@ static int kbasep_hwcnt_backend_jm_create(
int errcode;
struct kbase_device *kbdev;
struct kbase_hwcnt_backend_jm *backend = NULL;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ size_t page_count;
+#endif
WARN_ON(!info);
WARN_ON(!out_backend);
@@ -739,6 +773,13 @@ static int kbasep_hwcnt_backend_jm_create(
kbase_ccswe_init(&backend->ccswe_shader_cores);
backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* The dummy model needs the CPU mapping. */
+ page_count = PFN_UP(info->dump_bytes);
+ gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va, kbdev,
+ backend->vmap->cpu_pages,
+ page_count);
+#endif /* CONFIG_MALI_NO_MALI */
*out_backend = backend;
return 0;
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c
deleted file mode 100644
index 5ca4c51..0000000
--- a/mali_kbase/mali_kbase_hwcnt_legacy.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- *
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#include "mali_kbase_hwcnt_legacy.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_gpu_narrow.h"
-#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
-
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-/**
- * struct kbase_hwcnt_legacy_client - Legacy hardware counter client.
- * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned.
- * @enable_map: Counter enable map.
- * @dump_buf: Dump buffer used to manipulate dumps from virtualizer.
- * @hvcli: Hardware counter virtualizer client.
- * @dump_buf_user: Narrow dump buffer used to manipulate dumps before they are
- * copied to user.
- * @metadata_user: For compatibility with the user driver interface, this
- * contains a narrowed version of the hardware counter metadata
- * which is limited to 64 entries per block and 32-bit for each
- * entry.
- */
-struct kbase_hwcnt_legacy_client {
- void __user *user_dump_buf;
- struct kbase_hwcnt_enable_map enable_map;
- struct kbase_hwcnt_dump_buffer dump_buf;
- struct kbase_hwcnt_virtualizer_client *hvcli;
- struct kbase_hwcnt_dump_buffer_narrow dump_buf_user;
- const struct kbase_hwcnt_metadata_narrow *metadata_user;
-};
-
-int kbase_hwcnt_legacy_client_create(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_ioctl_hwcnt_enable *enable,
- struct kbase_hwcnt_legacy_client **out_hlcli)
-{
- int errcode;
- struct kbase_hwcnt_legacy_client *hlcli;
- const struct kbase_hwcnt_metadata *metadata;
- struct kbase_hwcnt_physical_enable_map phys_em;
-
- if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli)
- return -EINVAL;
-
- metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
-
- hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL);
- if (!hlcli)
- return -ENOMEM;
-
- errcode = kbase_hwcnt_gpu_metadata_narrow_create(&hlcli->metadata_user,
- metadata);
- if (errcode)
- goto error;
-
- errcode = kbase_hwcnt_dump_buffer_narrow_alloc(hlcli->metadata_user,
- &hlcli->dump_buf_user);
- if (errcode)
- goto error;
-
- hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer;
-
- errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map);
- if (errcode)
- goto error;
-
- /* Translate from the ioctl enable map to the internal one */
- phys_em.fe_bm = enable->fe_bm;
- phys_em.shader_bm = enable->shader_bm;
- phys_em.tiler_bm = enable->tiler_bm;
- phys_em.mmu_l2_bm = enable->mmu_l2_bm;
- kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em);
-
- errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf);
- if (errcode)
- goto error;
-
- errcode = kbase_hwcnt_virtualizer_client_create(
- hvirt, &hlcli->enable_map, &hlcli->hvcli);
- if (errcode)
- goto error;
-
- *out_hlcli = hlcli;
- return 0;
-
-error:
- kbase_hwcnt_legacy_client_destroy(hlcli);
- return errcode;
-}
-
-void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli)
-{
- if (!hlcli)
- return;
-
- kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli);
- kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf);
- kbase_hwcnt_enable_map_free(&hlcli->enable_map);
- kbase_hwcnt_dump_buffer_narrow_free(&hlcli->dump_buf_user);
- kbase_hwcnt_gpu_metadata_narrow_destroy(hlcli->metadata_user);
- kfree(hlcli);
-}
-
-int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli)
-{
- int errcode;
- u64 ts_start_ns;
- u64 ts_end_ns;
-
- if (!hlcli)
- return -EINVAL;
-
- /* Dump into the kernel buffer */
- errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
- &ts_start_ns, &ts_end_ns, &hlcli->dump_buf);
- if (errcode)
- return errcode;
-
- /* Patch the dump buf headers, to hide the counters that other hwcnt
- * clients are using.
- */
- kbase_hwcnt_gpu_patch_dump_headers(
- &hlcli->dump_buf, &hlcli->enable_map);
-
- /* Copy the dump buffer to the userspace visible buffer. The strict
- * variant will explicitly zero any non-enabled counters to ensure
- * nothing except exactly what the user asked for is made visible.
- *
- * A narrow copy is required since virtualizer has a bigger buffer
- * but user only needs part of it.
- */
- kbase_hwcnt_dump_buffer_copy_strict_narrow(
- &hlcli->dump_buf_user, &hlcli->dump_buf, &hlcli->enable_map);
-
- /* Copy into the user's buffer */
- errcode = copy_to_user(hlcli->user_dump_buf,
- hlcli->dump_buf_user.dump_buf,
- hlcli->dump_buf_user.md_narrow->dump_buf_bytes);
- /* Non-zero errcode implies user buf was invalid or too small */
- if (errcode)
- return -EFAULT;
-
- return 0;
-}
-
-int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli)
-{
- u64 ts_start_ns;
- u64 ts_end_ns;
-
- if (!hlcli)
- return -EINVAL;
-
- /* Dump with a NULL buffer to clear this client's counters */
- return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
- &ts_start_ns, &ts_end_ns, NULL);
-}
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.h b/mali_kbase/mali_kbase_hwcnt_legacy.h
deleted file mode 100644
index 163ae8d..0000000
--- a/mali_kbase/mali_kbase_hwcnt_legacy.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * Legacy hardware counter interface, giving userspace clients simple,
- * synchronous access to hardware counters.
- *
- * Any functions operating on an single legacy hardware counter client instance
- * must be externally synchronised.
- * Different clients may safely be used concurrently.
- */
-
-#ifndef _KBASE_HWCNT_LEGACY_H_
-#define _KBASE_HWCNT_LEGACY_H_
-
-struct kbase_hwcnt_legacy_client;
-struct kbase_ioctl_hwcnt_enable;
-struct kbase_hwcnt_virtualizer;
-
-/**
- * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client.
- * @hvirt: Non-NULL pointer to hardware counter virtualizer the client
- * should be attached to.
- * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid
- * pointer to a user dump buffer large enough to hold a dump, and
- * the counters that should be enabled.
- * @out_hlcli: Non-NULL pointer to where the pointer to the created client will
- * be stored on success.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_legacy_client_create(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_ioctl_hwcnt_enable *enable,
- struct kbase_hwcnt_legacy_client **out_hlcli);
-
-/**
- * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter
- * client.
- * @hlcli: Pointer to the legacy hardware counter client.
- *
- * Will safely destroy a client in any partial state of construction.
- */
-void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli);
-
-/**
- * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the
- * client's user buffer.
- * @hlcli: Non-NULL pointer to the legacy hardware counter client.
- *
- * This function will synchronously dump hardware counters into the user buffer
- * specified on client creation, with the counters specified on client creation.
- *
- * The counters are automatically cleared after each dump, such that the next
- * dump performed will return the counter values accumulated between the time of
- * this function call and the next dump.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli);
-
-/**
- * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter
- * dump.
- * @hlcli: Non-NULL pointer to the legacy hardware counter client.
- *
- * This function will synchronously clear the hardware counters, such that the
- * next dump performed will return the counter values accumulated between the
- * time of this function call and the next dump.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli);
-
-#endif /* _KBASE_HWCNT_LEGACY_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if.h b/mali_kbase/mali_kbase_hwcnt_watchdog_if.h
new file mode 100644
index 0000000..1873318
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter watchdog.
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_H_
+
+#include <linux/types.h>
+
+/*
+ * Opaque structure of information used to create a watchdog timer interface.
+ */
+struct kbase_hwcnt_watchdog_info;
+
+/**
+ * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done
+ *
+ * @user_data: Pointer to the callback user data.
+ */
+typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ * @period_ms: Period in milliseconds of the watchdog timer
+ * @callback: Non-NULL pointer to a watchdog callback function
+ * @user_data: Pointer to the user data, used when watchdog timer callback is called
+ *
+ * Return: 0 if the watchdog timer enabled successfully, error code otherwise.
+ */
+typedef int kbase_hwcnt_watchdog_enable_fn(
+ const struct kbase_hwcnt_watchdog_info *timer, u32 period_ms,
+ kbase_hwcnt_watchdog_callback_fn *callback, void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ */
+typedef void
+kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
+
+/**
+ * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ * @delay_ms: Watchdog timer expiration in milliseconds
+ */
+typedef void
+kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer,
+ u32 delay_ms);
+
+/**
+ * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface.
+ *
+ * @timer: Immutable watchdog timer info
+ * @enable: Function ptr to enable watchdog
+ * @disable: Function ptr to disable watchdog
+ * @modify: Function ptr to modify watchdog
+ */
+struct kbase_hwcnt_watchdog_interface {
+ const struct kbase_hwcnt_watchdog_info *timer;
+ kbase_hwcnt_watchdog_enable_fn *enable;
+ kbase_hwcnt_watchdog_disable_fn *disable;
+ kbase_hwcnt_watchdog_modify_fn *modify;
+};
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c
new file mode 100644
index 0000000..4a03080
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_hwcnt_watchdog_if.h"
+#include "mali_kbase_hwcnt_watchdog_if_timer.h"
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog
+ * interface.
+ *
+ * @watchdog_timer: Watchdog timer
+ * @timer_enabled: True if watchdog timer enabled, otherwise false
+ * @callback: Watchdog callback function
+ * @user_data: Pointer to user data passed as argument to the callback
+ * function
+ */
+struct kbase_hwcnt_watchdog_if_timer_info {
+ struct timer_list watchdog_timer;
+ bool timer_enabled;
+ kbase_hwcnt_watchdog_callback_fn *callback;
+ void *user_data;
+};
+
+/**
+ * kbasep_hwcnt_watchdog_callback() - Watchdog timer callback
+ *
+ * @timer: Timer structure
+ *
+ * Function to be called when watchdog timer expires. Will call the callback
+ * function provided at enable().
+ */
+static void kbasep_hwcnt_watchdog_callback(struct timer_list *const timer)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const info =
+ container_of(timer, struct kbase_hwcnt_watchdog_if_timer_info,
+ watchdog_timer);
+ if (info->callback)
+ info->callback(info->user_data);
+}
+
+static int kbasep_hwcnt_watchdog_if_timer_enable(
+ const struct kbase_hwcnt_watchdog_info *const timer,
+ u32 const period_ms, kbase_hwcnt_watchdog_callback_fn *const callback,
+ void *const user_data)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
+ (void *)timer;
+
+ if (WARN_ON(!timer) || WARN_ON(!callback))
+ return -EINVAL;
+
+ timer_info->callback = callback;
+ timer_info->user_data = user_data;
+
+ mod_timer(&timer_info->watchdog_timer,
+ jiffies + msecs_to_jiffies(period_ms));
+ timer_info->timer_enabled = true;
+
+ return 0;
+}
+
+static void kbasep_hwcnt_watchdog_if_timer_disable(
+ const struct kbase_hwcnt_watchdog_info *const timer)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
+ (void *)timer;
+
+ if (WARN_ON(!timer))
+ return;
+
+ if (!timer_info->timer_enabled)
+ return;
+
+ del_timer_sync(&timer_info->watchdog_timer);
+ timer_info->timer_enabled = false;
+}
+
+static void kbasep_hwcnt_watchdog_if_timer_modify(
+ const struct kbase_hwcnt_watchdog_info *const timer, u32 const delay_ms)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
+ (void *)timer;
+
+ if (WARN_ON(!timer))
+ return;
+
+ mod_timer(&timer_info->watchdog_timer,
+ jiffies + msecs_to_jiffies(delay_ms));
+}
+
+void kbase_hwcnt_watchdog_if_timer_destroy(
+ struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+ if (WARN_ON(!watchdog_if))
+ return;
+
+ timer_info = (void *)watchdog_if->timer;
+
+ if (WARN_ON(!timer_info))
+ return;
+
+ del_timer_sync(&timer_info->watchdog_timer);
+ kfree(timer_info);
+
+ memset(watchdog_if, 0, sizeof(*watchdog_if));
+}
+
+int kbase_hwcnt_watchdog_if_timer_create(
+ struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+ if (WARN_ON(!watchdog_if))
+ return -EINVAL;
+
+ timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL);
+ if (!timer_info)
+ return -ENOMEM;
+
+ *timer_info =
+ (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled =
+ false };
+
+ kbase_timer_setup(&timer_info->watchdog_timer,
+ kbasep_hwcnt_watchdog_callback);
+
+ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+ .timer = (void *)timer_info,
+ .enable = kbasep_hwcnt_watchdog_if_timer_enable,
+ .disable = kbasep_hwcnt_watchdog_if_timer_disable,
+ .modify = kbasep_hwcnt_watchdog_if_timer_modify,
+ };
+
+ return 0;
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h
new file mode 100644
index 0000000..3bd69c3
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+
+struct kbase_hwcnt_watchdog_interface;
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend.
+ *
+ * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_watchdog_if_timer_create(
+ struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter
+ * backend.
+ *
+ * @watchdog_if: Pointer to watchdog interface to destroy
+ */
+void kbase_hwcnt_watchdog_if_timer_destroy(
+ struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index c892455..08824bd 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -619,8 +619,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
size_to_read = sizeof(u64[COUNT]);
- ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read,
- &mapping);
+ ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read,
+ KBASE_REG_CPU_RD, &mapping);
if (!ptr) {
dev_warn(kctx->kbdev->dev,
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index ce996ca..27ff3bb 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -19,10 +19,10 @@
*
*/
+#include "mali_kbase.h"
#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_hwcnt_types.h"
-#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
#include "mali_kbase_hwcnt_gpu.h"
#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include "mali_malisw.h"
@@ -44,14 +44,12 @@
*/
#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC)
-/* The minimum allowed interval between dumps, in microseconds
- * (equivalent to 10KHz)
- */
-#define DUMP_INTERVAL_MIN_US (DUMP_INTERVAL_MIN_NS / 1000)
-
/* The maximum allowed buffers per client */
#define MAX_BUFFER_COUNT 32
+/* The module printing prefix */
+#define KINSTR_PRFCNT_PREFIX "mali_kbase_kinstr_prfcnt: "
+
/**
* struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware
* counters.
@@ -80,11 +78,11 @@ struct kbase_kinstr_prfcnt_context {
/**
* struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data.
- * @sample_meta: Pointer to samle metadata.
+ * @sample_meta: Pointer to sample metadata.
* @dump_buf: Dump buffer containing sample data.
*/
struct kbase_kinstr_prfcnt_sample {
- u64 *sample_meta;
+ struct prfcnt_metadata *sample_meta;
struct kbase_hwcnt_dump_buffer dump_buf;
};
@@ -92,7 +90,8 @@ struct kbase_kinstr_prfcnt_sample {
* struct kbase_kinstr_prfcnt_sample_array - Array of sample data.
* @page_addr: Address of allocated pages. A single allocation is used
* for all Dump Buffers in the array.
- * @page_order: The allocation order of the pages.
+ * @page_order: The allocation order of the pages, the order is on a
+ * logarithmic scale.
* @sample_count: Number of allocated samples.
* @samples: Non-NULL pointer to the array of Dump Buffers.
*/
@@ -107,59 +106,91 @@ struct kbase_kinstr_prfcnt_sample_array {
* struct kbase_kinstr_prfcnt_client_config - Client session configuration.
* @prfcnt_mode: Sampling mode: either manual or periodic.
* @counter_set: Set of performance counter blocks.
+ * @scope: Scope of performance counters to capture.
* @buffer_count: Number of buffers used to store samples.
- * @period_us: Sampling period, in microseconds, or 0 if manual mode.
+ * @period_ns: Sampling period, in nanoseconds, or 0 if manual mode.
* @phys_em: Enable map used by the GPU.
*/
struct kbase_kinstr_prfcnt_client_config {
u8 prfcnt_mode;
u8 counter_set;
+ u8 scope;
u16 buffer_count;
- u64 period_us;
+ u64 period_ns;
struct kbase_hwcnt_physical_enable_map phys_em;
};
/**
+ * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to
+ * carry out for a kinstr_prfcnt_client.
+ * @dump_work: Worker for performing asynchronous counter dumps.
+ * @user_data: User data for asynchronous dump in progress.
+ * @ts_end_ns: End timestamp of most recent async dump.
+ */
+struct kbase_kinstr_prfcnt_async {
+ struct work_struct dump_work;
+ u64 user_data;
+ u64 ts_end_ns;
+};
+
+/**
* struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached
* to a kinstr_prfcnt context.
- * @kinstr_ctx: kinstr_prfcnt context client is attached to.
- * @hvcli: Hardware counter virtualizer client.
- * @node: Node used to attach this client to list in kinstr_prfcnt
- * context.
- * @next_dump_time_ns: Time in ns when this client's next periodic dump must
- * occur. If 0, not a periodic client.
- * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
- * client.
- * @config: Configuration of the client session.
- * @enable_map: Counters enable map.
- * @tmp_buf: Temporary buffer to use before handing over dump to
- * client.
- * @sample_arr: Array of dump buffers allocated by this client.
- * @dump_bufs_meta: Metadata of dump buffers.
- * @meta_idx: Index of metadata being accessed by userspace.
- * @read_idx: Index of buffer read by userspace.
- * @write_idx: Index of buffer being written by dump worker.
- * @waitq: Client's notification queue.
- * @sample_size: Size of the data required for one sample, in bytes.
- * @sample_count: Number of samples the client is able to capture.
+ * @kinstr_ctx: kinstr_prfcnt context client is attached to.
+ * @hvcli: Hardware counter virtualizer client.
+ * @node: Node used to attach this client to list in
+ * kinstr_prfcnt context.
+ * @cmd_sync_lock: Lock coordinating the reader interface for commands
+ * that need interacting with the async sample dump
+ * worker thread.
+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must
+ * occur. If 0, not a periodic client.
+ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
+ * client.
+ * @sample_flags: Flags for the current active dumping sample, marking
+ * the conditions/events during the dump duration.
+ * @active: True if the client has been started.
+ * @config: Configuration of the client session.
+ * @enable_map: Counters enable map.
+ * @tmp_buf: Temporary buffer to use before handing over dump to
+ * client.
+ * @sample_arr: Array of dump buffers allocated by this client.
+ * @read_idx: Index of buffer read by userspace.
+ * @write_idx: Index of buffer being written by dump worker.
+ * @waitq: Client's notification queue.
+ * @sample_size: Size of the data required for one sample, in bytes.
+ * @sample_count: Number of samples the client is able to capture.
+ * @sync_sample_count: Number of available spaces for synchronous samples.
+ * It can differ from sample_count if asynchronous
+ * sample requests are reserving space in the buffer.
+ * @user_data: User data associated with the session.
+ * This is set when the session is started and stopped.
+ * This value is ignored for control commands that
+ * provide another value.
+ * @async: Asynchronous sampling operations to carry out in this
+ * client's session.
*/
struct kbase_kinstr_prfcnt_client {
struct kbase_kinstr_prfcnt_context *kinstr_ctx;
struct kbase_hwcnt_virtualizer_client *hvcli;
struct list_head node;
+ struct mutex cmd_sync_lock;
u64 next_dump_time_ns;
u32 dump_interval_ns;
+ u32 sample_flags;
+ bool active;
struct kbase_kinstr_prfcnt_client_config config;
struct kbase_hwcnt_enable_map enable_map;
struct kbase_hwcnt_dump_buffer tmp_buf;
struct kbase_kinstr_prfcnt_sample_array sample_arr;
- struct kbase_hwcnt_reader_metadata *dump_bufs_meta;
- atomic_t meta_idx;
atomic_t read_idx;
atomic_t write_idx;
wait_queue_head_t waitq;
size_t sample_size;
size_t sample_count;
+ atomic_t sync_sample_count;
+ u64 user_data;
+ struct kbase_kinstr_prfcnt_async async;
};
static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
@@ -188,21 +219,6 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
};
/**
- * kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready() - Check if client has ready
- * buffers.
- * @cli: Non-NULL pointer to kinstr_prfcnt client.
- *
- * Return: Non-zero if client has at least one dumping buffer filled that was
- * not notified to user yet.
- */
-static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(
- struct kbase_kinstr_prfcnt_client *cli)
-{
- WARN_ON(!cli);
- return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
-}
-
-/**
* kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll.
* @filp: Non-NULL pointer to file structure.
* @wait: Non-NULL pointer to poll table.
@@ -210,8 +226,15 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(
* Return: POLLIN if data can be read without blocking, 0 if data can not be
* read without blocking, else error code.
*/
-static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
- poll_table *wait)
+#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
+static unsigned int
+kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
+ struct poll_table_struct *wait)
+#else
+static __poll_t
+kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
+ struct poll_table_struct *wait)
+#endif
{
struct kbase_kinstr_prfcnt_client *cli;
@@ -225,13 +248,776 @@ static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
poll_wait(filp, &cli->waitq, wait);
- if (kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(cli))
+ if (atomic_read(&cli->write_idx) != atomic_read(&cli->read_idx))
return POLLIN;
return 0;
}
/**
+ * kbasep_kinstr_prfcnt_next_dump_time_ns() - Calculate the next periodic
+ * dump time.
+ * @cur_ts_ns: Current time in nanoseconds.
+ * @interval: Interval between dumps in nanoseconds.
+ *
+ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump
+ * time that occurs after cur_ts_ns.
+ */
+static u64 kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns, u32 interval)
+{
+ /* Non-periodic client */
+ if (interval == 0)
+ return 0;
+
+ /*
+ * Return the next interval after the current time relative to t=0.
+ * This means multiple clients with the same period will synchronize,
+ * regardless of when they were started, allowing the worker to be
+ * scheduled less frequently.
+ */
+ do_div(cur_ts_ns, interval);
+
+ return (cur_ts_ns + 1) * interval;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_timestamp_ns() - Get the current time in nanoseconds.
+ *
+ * Return: Current time in nanoseconds.
+ */
+static u64 kbasep_kinstr_prfcnt_timestamp_ns(void)
+{
+ return ktime_get_raw_ns();
+}
+
+/**
+ * kbasep_kinstr_prfcnt_reschedule_worker() - Update next dump times for all
+ * periodic kinstr_prfcnt clients,
+ * then reschedule the dump worker
+ * appropriately.
+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context.
+ *
+ * If there are no periodic clients, then the dump worker will not be
+ * rescheduled. Else, the dump worker will be rescheduled for the next
+ * periodic client dump.
+ */
+static void kbasep_kinstr_prfcnt_reschedule_worker(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx)
+{
+ u64 cur_ts_ns;
+ u64 shortest_period_ns = U64_MAX;
+ struct kbase_kinstr_prfcnt_client *pos;
+
+ WARN_ON(!kinstr_ctx);
+ lockdep_assert_held(&kinstr_ctx->lock);
+ cur_ts_ns = kbasep_kinstr_prfcnt_timestamp_ns();
+
+ /*
+ * This loop fulfills 2 separate tasks that don't affect each other:
+ *
+ * 1) Determine the shortest period.
+ * 2) Update the next dump time of clients that have already been
+ * dumped. It's important not to alter the next dump time of clients
+ * that haven't been dumped yet.
+ *
+ * For the sake of efficiency, the rescheduling decision ignores the time
+ * of the next dump and just uses the shortest period among all periodic
+ * clients. It is more efficient to serve multiple dump requests at once,
+ * rather than trying to reschedule the worker to serve each request
+ * individually.
+ */
+ list_for_each_entry(pos, &kinstr_ctx->clients, node) {
+ /* Ignore clients that are not periodic or not active. */
+ if (pos->active && pos->dump_interval_ns > 0) {
+ shortest_period_ns =
+ MIN(shortest_period_ns, pos->dump_interval_ns);
+
+ /* Next dump should happen exactly one period after the last dump.
+ * If last dump was overdue and scheduled to happen more than one
+ * period ago, compensate for that by scheduling next dump in the
+ * immediate future.
+ */
+ if (pos->next_dump_time_ns < cur_ts_ns)
+ pos->next_dump_time_ns =
+ MAX(cur_ts_ns + 1,
+ pos->next_dump_time_ns +
+ pos->dump_interval_ns);
+ }
+ }
+
+ /* Cancel the timer if it is already pending */
+ hrtimer_cancel(&kinstr_ctx->dump_timer);
+
+ /* Start the timer if there are periodic clients and kinstr_prfcnt is not
+ * suspended.
+ */
+ if ((shortest_period_ns != U64_MAX) &&
+ (kinstr_ctx->suspend_count == 0)) {
+ u64 next_schedule_time_ns =
+ kbasep_kinstr_prfcnt_next_dump_time_ns(
+ cur_ts_ns, shortest_period_ns);
+ hrtimer_start(&kinstr_ctx->dump_timer,
+ ns_to_ktime(next_schedule_time_ns - cur_ts_ns),
+ HRTIMER_MODE_REL);
+ }
+}
+
+static enum prfcnt_block_type
+kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)
+{
+ enum prfcnt_block_type block_type;
+
+ switch (type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+ block_type = PRFCNT_BLOCK_TYPE_FE;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ block_type = PRFCNT_BLOCK_TYPE_TILER;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+ block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ block_type = PRFCNT_BLOCK_TYPE_MEMORY;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+ default:
+ block_type = PRFCNT_BLOCK_TYPE_RESERVED;
+ break;
+ }
+
+ return block_type;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta
+ * item array.
+ * @dst: Non-NULL pointer to the sample's dump buffer object.
+ * @block_meta_base: Non-NULL double pointer to the start of the block meta
+ * data items.
+ * @base_addr: Address of allocated pages for array of samples. Used
+ * to calculate offset of block values.
+ * @counter_set: The SET which blocks represent.
+ */
+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst,
+ struct prfcnt_metadata **block_meta_base,
+ u64 base_addr, u8 counter_set)
+{
+ size_t grp, blk, blk_inst;
+ struct prfcnt_metadata **ptr_md = block_meta_base;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ if (!dst || !*block_meta_base)
+ return -EINVAL;
+
+ metadata = dst->metadata;
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ u64 *dst_blk;
+
+ /* Skip unused blocks */
+ if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK;
+ (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION;
+ (*ptr_md)->u.block_md.block_type =
+ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
+ kbase_hwcnt_metadata_block_type(metadata, grp,
+ blk));
+ (*ptr_md)->u.block_md.block_idx = (u8)blk_inst;
+ (*ptr_md)->u.block_md.set = counter_set;
+ (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN;
+ (*ptr_md)->u.block_md.values_offset = (u32)((u64)(uintptr_t)dst_blk - base_addr);
+
+ /* update the buf meta data block pointer to next item */
+ (*ptr_md)++;
+ }
+
+ return 0;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_set_sample_metadata() - Set sample metadata for sample
+ * output.
+ * @cli: Non-NULL pointer to a kinstr_prfcnt client.
+ * @dump_buf: Non-NULL pointer to dump buffer where sample is stored.
+ * @ptr_md: Non-NULL pointer to sample metadata.
+ */
+static void kbasep_kinstr_prfcnt_set_sample_metadata(
+ struct kbase_kinstr_prfcnt_client *cli,
+ struct kbase_hwcnt_dump_buffer *dump_buf,
+ struct prfcnt_metadata *ptr_md)
+{
+ u8 clk_cnt, i;
+
+ clk_cnt = cli->kinstr_ctx->metadata->clk_cnt;
+
+ /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item */
+ ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_SAMPLE;
+ ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION;
+ ptr_md->u.sample_md.seq = atomic_read(&cli->write_idx);
+ ptr_md->u.sample_md.flags = cli->sample_flags;
+
+ /* Place the PRFCNT_SAMPLE_META_TYPE_CLOCK optionally as the 2nd */
+ ptr_md++;
+ if (clk_cnt > MAX_REPORTED_DOMAINS)
+ clk_cnt = MAX_REPORTED_DOMAINS;
+
+ /* Handle the prfcnt_clock_metadata meta item */
+ ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_CLOCK;
+ ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION;
+ ptr_md->u.clock_md.num_domains = clk_cnt;
+ for (i = 0; i < clk_cnt; i++)
+ ptr_md->u.clock_md.cycles[i] = dump_buf->clk_cnt_buf[i];
+
+ /* Dealing with counter blocks */
+ ptr_md++;
+ if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(
+ dump_buf, &ptr_md, cli->sample_arr.page_addr, cli->config.counter_set)))
+ return;
+
+ /* Handle the last sentinel item */
+ ptr_md->hdr.item_type = FLEX_LIST_TYPE_NONE;
+ ptr_md->hdr.item_version = 0;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample
+ * for output.
+ * @cli: Non-NULL pointer to a kinstr_prfcnt client.
+ * @buf_idx: The index to the sample array for saving the sample.
+ */
+static void kbasep_kinstr_prfcnt_client_output_empty_sample(
+ struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx)
+{
+ struct kbase_hwcnt_dump_buffer *dump_buf;
+ struct prfcnt_metadata *ptr_md;
+
+ if (WARN_ON(buf_idx >= cli->sample_arr.sample_count))
+ return;
+
+ dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf;
+ ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
+
+ kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map);
+
+ /* Use end timestamp from most recent async dump */
+ ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns;
+ ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns;
+
+ kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output.
+ * @cli: Non-NULL pointer to a kinstr_prfcnt client.
+ * @buf_idx: The index to the sample array for saving the sample.
+ * @user_data: User data to return to the user.
+ * @ts_start_ns: Time stamp for the start point of the sample dump.
+ * @ts_end_ns: Time stamp for the end point of the sample dump.
+ */
+static void kbasep_kinstr_prfcnt_client_output_sample(
+ struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx,
+ u64 user_data, u64 ts_start_ns, u64 ts_end_ns)
+{
+ struct kbase_hwcnt_dump_buffer *dump_buf;
+ struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf;
+ struct prfcnt_metadata *ptr_md;
+
+ if (WARN_ON(buf_idx >= cli->sample_arr.sample_count))
+ return;
+
+ dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf;
+ ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
+
+ /* Patch the dump buf headers, to hide the counters that other hwcnt
+ * clients are using.
+ */
+ kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &cli->enable_map);
+
+ /* Copy the temp buffer to the userspace visible buffer. The strict
+ * variant will explicitly zero any non-enabled counters to ensure
+ * nothing except exactly what the user asked for is made visible.
+ */
+ kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf,
+ &cli->enable_map);
+
+ /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item.
+ * Set timestamp and user data for real dump.
+ */
+ ptr_md->u.sample_md.timestamp_start = ts_start_ns;
+ ptr_md->u.sample_md.timestamp_end = ts_end_ns;
+ ptr_md->u.sample_md.user_data = user_data;
+
+ kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_client_dump() - Perform a dump for a client.
+ * @cli: Non-NULL pointer to a kinstr_prfcnt client.
+ * @event_id: Event type that triggered the dump.
+ * @user_data: User data to return to the user.
+ * @async_dump: Whether this is an asynchronous dump or not.
+ * @empty_sample: Sample block data will be 0 if this is true.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int
+kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
+ enum base_hwcnt_reader_event event_id,
+ u64 user_data, bool async_dump,
+ bool empty_sample)
+{
+ int ret;
+ u64 ts_start_ns = 0;
+ u64 ts_end_ns = 0;
+ unsigned int write_idx;
+ unsigned int read_idx;
+ size_t available_samples_count;
+
+ WARN_ON(!cli);
+ lockdep_assert_held(&cli->kinstr_ctx->lock);
+
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+
+ /* Check if there is a place to copy HWC block into. Calculate the
+ * number of available samples count, by taking into account the type
+ * of dump.
+ * Asynchronous dumps have the ability to reserve space in the samples
+ * array for future dumps, unlike synchronous dumps. Because of that,
+ * the samples count for synchronous dumps is managed by a variable
+ * called sync_sample_count, that originally is defined as equal to the
+ * size of the whole array but later decreases every time an
+ * asynchronous dump request is pending and then re-increased every
+ * time an asynchronous dump request is completed.
+ */
+ available_samples_count = async_dump ?
+ cli->sample_arr.sample_count :
+ atomic_read(&cli->sync_sample_count);
+ if (write_idx - read_idx == available_samples_count) {
+ /* For periodic sampling, the current active dump
+ * will be accumulated in the next sample, when
+ * a buffer becomes available.
+ */
+ if (event_id == BASE_HWCNT_READER_EVENT_PERIODIC)
+ cli->sample_flags |= SAMPLE_FLAG_OVERFLOW;
+ return -EBUSY;
+ }
+
+ /* For the rest of the function, use the actual sample_count
+ * that represents the real size of the array.
+ */
+ write_idx %= cli->sample_arr.sample_count;
+
+ if (!empty_sample) {
+ ret = kbase_hwcnt_virtualizer_client_dump(
+ cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf);
+ /* HWC dump error, set the sample with error flag */
+ if (ret)
+ cli->sample_flags |= SAMPLE_FLAG_ERROR;
+
+ /* Make the sample ready and copy it to the userspace mapped buffer */
+ kbasep_kinstr_prfcnt_client_output_sample(
+ cli, write_idx, user_data, ts_start_ns, ts_end_ns);
+ } else {
+ if (!async_dump) {
+ struct prfcnt_metadata *ptr_md;
+ /* User data will not be updated for empty samples. */
+ ptr_md = cli->sample_arr.samples[write_idx].sample_meta;
+ ptr_md->u.sample_md.user_data = user_data;
+ }
+
+ /* Make the sample ready and copy it to the userspace mapped buffer */
+ kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx);
+ }
+
+ /* Notify client. Make sure all changes to memory are visible. */
+ wmb();
+ atomic_inc(&cli->write_idx);
+ if (async_dump) {
+ /* Remember the end timestamp of async dump for empty samples */
+ if (!empty_sample)
+ cli->async.ts_end_ns = ts_end_ns;
+
+ atomic_inc(&cli->sync_sample_count);
+ }
+ wake_up_interruptible(&cli->waitq);
+ /* Reset the flags for the next sample dump */
+ cli->sample_flags = 0;
+
+ return 0;
+}
+
+static int
+kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
+ u64 user_data)
+{
+ int ret;
+ u64 tm_start, tm_end;
+
+ WARN_ON(!cli);
+ lockdep_assert_held(&cli->cmd_sync_lock);
+
+ /* If the client is already started, the command is a no-op */
+ if (cli->active)
+ return 0;
+
+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
+ &cli->config.phys_em);
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+ /* Enable HWC from the configuration of the client creation */
+ ret = kbase_hwcnt_virtualizer_client_set_counters(
+ cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL);
+
+ if (!ret) {
+ atomic_set(&cli->sync_sample_count, cli->sample_count);
+ cli->active = true;
+ cli->user_data = user_data;
+ cli->sample_flags = 0;
+
+ if (cli->dump_interval_ns)
+ kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx);
+ }
+
+ mutex_unlock(&cli->kinstr_ctx->lock);
+
+ return ret;
+}
+
+static int kbasep_kinstr_prfcnt_client_wait_async_done(
+ struct kbase_kinstr_prfcnt_client *cli)
+{
+ lockdep_assert_held(&cli->cmd_sync_lock);
+
+ return wait_event_interruptible(cli->waitq,
+ atomic_read(&cli->sync_sample_count) ==
+ cli->sample_count);
+}
+
+static int
+kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
+ u64 user_data)
+{
+ int ret;
+ u64 tm_start = 0;
+ u64 tm_end = 0;
+ struct kbase_hwcnt_physical_enable_map phys_em;
+ struct kbase_hwcnt_dump_buffer *tmp_buf = NULL;
+ unsigned int write_idx;
+ unsigned int read_idx;
+
+ WARN_ON(!cli);
+ lockdep_assert_held(&cli->cmd_sync_lock);
+
+ /* If the client is not started, the command is invalid */
+ if (!cli->active)
+ return -EINVAL;
+
+ /* Wait until pending async sample operation done */
+ ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
+
+ if (ret < 0)
+ return -ERESTARTSYS;
+
+ phys_em.fe_bm = 0;
+ phys_em.tiler_bm = 0;
+ phys_em.mmu_l2_bm = 0;
+ phys_em.shader_bm = 0;
+
+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+
+ /* Check whether one has the buffer to hold the last sample */
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+
+ /* Check if there is a place to save the last stop produced sample */
+ if (write_idx - read_idx < cli->sample_arr.sample_count)
+ tmp_buf = &cli->tmp_buf;
+
+ ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli,
+ &cli->enable_map,
+ &tm_start, &tm_end,
+ &cli->tmp_buf);
+ /* If the last stop sample is in error, set the sample flag */
+ if (ret)
+ cli->sample_flags |= SAMPLE_FLAG_ERROR;
+
+ if (tmp_buf) {
+ write_idx %= cli->sample_arr.sample_count;
+ /* Handle the last stop sample */
+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
+ &cli->config.phys_em);
+ /* As this is a stop sample, mark it as MANUAL */
+ kbasep_kinstr_prfcnt_client_output_sample(
+ cli, write_idx, user_data, tm_start, tm_end);
+ /* Notify client. Make sure all changes to memory are visible. */
+ wmb();
+ atomic_inc(&cli->write_idx);
+ wake_up_interruptible(&cli->waitq);
+ }
+
+ cli->active = false;
+ cli->user_data = user_data;
+
+ if (cli->dump_interval_ns)
+ kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx);
+
+ mutex_unlock(&cli->kinstr_ctx->lock);
+
+ return ret;
+}
+
+static int
+kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli,
+ u64 user_data)
+{
+ int ret;
+ bool empty_sample = false;
+
+ lockdep_assert_held(&cli->cmd_sync_lock);
+
+ /* If the client is not started, or not manual, the command invalid */
+ if (!cli->active || cli->dump_interval_ns)
+ return -EINVAL;
+
+ /* Wait until pending async sample operation done, this is required to
+ * satisfy the stated sample sequence following their issuing order,
+ * reflected by the sample start timestamp.
+ */
+ if (atomic_read(&cli->sync_sample_count) != cli->sample_count) {
+ /* Return empty sample instead of performing real dump.
+ * As there is an async dump currently in-flight which will
+ * have the desired information.
+ */
+ empty_sample = true;
+ ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
+
+ if (ret < 0)
+ return -ERESTARTSYS;
+ }
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+
+ ret = kbasep_kinstr_prfcnt_client_dump(cli,
+ BASE_HWCNT_READER_EVENT_MANUAL,
+ user_data, false, empty_sample);
+
+ mutex_unlock(&cli->kinstr_ctx->lock);
+
+ return ret;
+}
+
+static int
+kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
+ u64 user_data)
+{
+ unsigned int write_idx;
+ unsigned int read_idx;
+ unsigned int active_async_dumps;
+ unsigned int new_async_buf_idx;
+ int ret;
+
+ lockdep_assert_held(&cli->cmd_sync_lock);
+
+ /* If the client is not started, or not manual, the command invalid */
+ if (!cli->active || cli->dump_interval_ns)
+ return -EINVAL;
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+ active_async_dumps =
+ cli->sample_count - atomic_read(&cli->sync_sample_count);
+ new_async_buf_idx = write_idx + active_async_dumps;
+
+ /* Check if there is a place to copy HWC block into.
+ * If successful, reserve space in the buffer for the asynchronous
+ * operation to make sure that it can actually take place.
+ * Because we reserve space for asynchronous dumps we need to take that
+ * in consideration here.
+ */
+ ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ?
+ -EBUSY :
+ 0;
+
+ if (ret == -EBUSY) {
+ mutex_unlock(&cli->kinstr_ctx->lock);
+ return ret;
+ }
+
+ if (active_async_dumps > 0) {
+ struct prfcnt_metadata *ptr_md;
+ unsigned int buf_idx =
+ new_async_buf_idx % cli->sample_arr.sample_count;
+ /* Instead of storing user_data, write it directly to future
+ * empty sample.
+ */
+ ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
+ ptr_md->u.sample_md.user_data = user_data;
+
+ atomic_dec(&cli->sync_sample_count);
+ } else {
+ cli->async.user_data = user_data;
+ atomic_dec(&cli->sync_sample_count);
+
+ kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt,
+ &cli->async.dump_work);
+ }
+
+ mutex_unlock(&cli->kinstr_ctx->lock);
+
+ return ret;
+}
+
+static int
+kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli)
+{
+ WARN_ON(!cli);
+ lockdep_assert_held(&cli->cmd_sync_lock);
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+
+ /* Discard (Clear) all internally buffered samples */
+ atomic_set(&cli->read_idx, atomic_read(&cli->write_idx));
+
+ mutex_unlock(&cli->kinstr_ctx->lock);
+
+ return 0;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session.
+ * @cli: Non-NULL pointer to kinstr_prfcnt client.
+ * @control_cmd: Control command to execute.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
+ struct prfcnt_control_cmd *control_cmd)
+{
+ int ret = 0;
+
+ mutex_lock(&cli->cmd_sync_lock);
+
+ switch (control_cmd->cmd) {
+ case PRFCNT_CONTROL_CMD_START:
+ ret = kbasep_kinstr_prfcnt_client_start(cli,
+ control_cmd->user_data);
+ break;
+ case PRFCNT_CONTROL_CMD_STOP:
+ ret = kbasep_kinstr_prfcnt_client_stop(cli,
+ control_cmd->user_data);
+ break;
+ case PRFCNT_CONTROL_CMD_SAMPLE_SYNC:
+ ret = kbasep_kinstr_prfcnt_client_sync_dump(
+ cli, control_cmd->user_data);
+ break;
+ case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC:
+ ret = kbasep_kinstr_prfcnt_client_async_dump(
+ cli, control_cmd->user_data);
+ break;
+ case PRFCNT_CONTROL_CMD_DISCARD:
+ ret = kbasep_kinstr_prfcnt_client_discard(cli);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&cli->cmd_sync_lock);
+
+ return ret;
+}
+
+static int
+kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
+ struct prfcnt_sample_access *sample_access)
+{
+ unsigned int write_idx;
+ unsigned int read_idx;
+ u64 sample_offset_bytes;
+ struct prfcnt_metadata *sample_meta;
+
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+
+ if (write_idx == read_idx)
+ return -EINVAL;
+
+ read_idx %= cli->sample_arr.sample_count;
+ sample_offset_bytes =
+ (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta -
+ (u64)(uintptr_t)cli->sample_arr.page_addr;
+ sample_meta =
+ (struct prfcnt_metadata *)cli->sample_arr.samples[read_idx]
+ .sample_meta;
+
+ /* Verify that a valid sample has been dumped in the read_idx.
+ * There are situations where this may not be the case,
+ * for instance if the client is trying to get an asynchronous
+ * sample which has not been dumped yet.
+ */
+ if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE)
+ return -EINVAL;
+ if (sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION)
+ return -EINVAL;
+
+ sample_access->sequence = sample_meta->u.sample_md.seq;
+ sample_access->sample_offset_bytes = sample_offset_bytes;
+
+ /* read_idx is not incremented here, because the interface allows
+ * only one sample to be "in flight" between kernel space and user space.
+ */
+
+ return 0;
+}
+
+static int
+kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli,
+ struct prfcnt_sample_access *sample_access)
+{
+ unsigned int write_idx;
+ unsigned int read_idx;
+ u64 sample_offset_bytes;
+
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+
+ if (write_idx == read_idx)
+ return -EINVAL;
+
+ if (sample_access->sequence != read_idx)
+ return -EINVAL;
+
+ read_idx %= cli->sample_arr.sample_count;
+ sample_offset_bytes =
+ (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta -
+ (u64)(uintptr_t)cli->sample_arr.page_addr;
+
+ if (sample_access->sample_offset_bytes != sample_offset_bytes)
+ return -EINVAL;
+
+ atomic_inc(&cli->read_idx);
+
+ return 0;
+}
+
+/**
* kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
* @filp: Non-NULL pointer to file structure.
* @cmd: User command.
@@ -243,10 +1029,11 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp,
unsigned int cmd,
unsigned long arg)
{
- long rcode;
+ long rcode = 0;
struct kbase_kinstr_prfcnt_client *cli;
+ void __user *uarg = (void __user *)arg;
- if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER))
+ if (!filp)
return -EINVAL;
cli = filp->private_data;
@@ -255,8 +1042,36 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp,
return -EINVAL;
switch (_IOC_NR(cmd)) {
+ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_CMD): {
+ struct prfcnt_control_cmd control_cmd;
+ int err;
+
+ err = copy_from_user(&control_cmd, uarg, sizeof(control_cmd));
+ if (err)
+ return -EFAULT;
+ rcode = kbasep_kinstr_prfcnt_cmd(cli, &control_cmd);
+ } break;
+ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE): {
+ struct prfcnt_sample_access sample_access;
+ int err;
+
+ memset(&sample_access, 0, sizeof(sample_access));
+ rcode = kbasep_kinstr_prfcnt_get_sample(cli, &sample_access);
+ err = copy_to_user(uarg, &sample_access, sizeof(sample_access));
+ if (err)
+ return -EFAULT;
+ } break;
+ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE): {
+ struct prfcnt_sample_access sample_access;
+ int err;
+
+ err = copy_from_user(&sample_access, uarg,
+ sizeof(sample_access));
+ if (err)
+ return -EFAULT;
+ rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access);
+ } break;
default:
- pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
rcode = -EINVAL;
break;
}
@@ -279,7 +1094,6 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp,
if (!filp || !vma)
return -EINVAL;
-
cli = filp->private_data;
if (!cli)
@@ -334,10 +1148,10 @@ kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
return;
kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
- kfree(cli->dump_bufs_meta);
kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
kbase_hwcnt_enable_map_free(&cli->enable_map);
+ mutex_destroy(&cli->cmd_sync_lock);
kfree(cli);
}
@@ -377,6 +1191,31 @@ static const struct file_operations kinstr_prfcnt_client_fops = {
.release = kbasep_kinstr_prfcnt_hwcnt_reader_release,
};
+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata)
+{
+ size_t grp, blk, blk_inst;
+ size_t md_count = 0;
+
+ if (!metadata)
+ return 0;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ /* Skip unused blocks */
+ if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst))
+ continue;
+
+ md_count++;
+ }
+
+ /* add counts for clock_meta and sample meta, respectively */
+ md_count += 2;
+
+ /* Reserve one for last sentinel item. */
+ md_count++;
+
+ return md_count;
+}
+
static size_t kbasep_kinstr_prfcnt_get_sample_size(
const struct kbase_hwcnt_metadata *metadata,
struct kbase_hwcnt_dump_buffer *dump_buf)
@@ -384,19 +1223,12 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size(
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
size_t sample_meta_bytes;
- size_t block_count = 0;
- size_t grp, blk, blk_inst;
+ size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata);
if (!metadata)
return 0;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- block_count++;
-
- /* Reserve one for last sentinel item. */
- block_count++;
-
- sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count;
+ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count;
dump_buf_bytes = metadata->dump_buf_bytes;
clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
@@ -411,7 +1243,68 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size(
*/
static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
{
- /* Do nothing. */
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of(
+ work, struct kbase_kinstr_prfcnt_context, dump_work);
+ struct kbase_kinstr_prfcnt_client *pos;
+ u64 cur_time_ns;
+
+ mutex_lock(&kinstr_ctx->lock);
+
+ cur_time_ns = kbasep_kinstr_prfcnt_timestamp_ns();
+
+ list_for_each_entry(pos, &kinstr_ctx->clients, node) {
+ if (pos->active && (pos->next_dump_time_ns != 0) &&
+ (pos->next_dump_time_ns < cur_time_ns))
+ kbasep_kinstr_prfcnt_client_dump(
+ pos, BASE_HWCNT_READER_EVENT_PERIODIC,
+ pos->user_data, false, false);
+ }
+
+ kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx);
+
+ mutex_unlock(&kinstr_ctx->lock);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client
+ * to take a single asynchronous
+ * sample.
+ * @work: Work structure.
+ */
+static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work)
+{
+ struct kbase_kinstr_prfcnt_async *cli_async =
+ container_of(work, struct kbase_kinstr_prfcnt_async, dump_work);
+ struct kbase_kinstr_prfcnt_client *cli = container_of(
+ cli_async, struct kbase_kinstr_prfcnt_client, async);
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+ /* While the async operation is in flight, a sync stop might have been
+ * executed, for which the dump should be skipped. Further as we are
+ * doing an async dump, we expect that there is reserved buffer for
+ * this to happen. This is to avoid the rare corner case where the
+ * user side has issued a stop/start pair before the async work item
+ * get the chance to execute.
+ */
+ if (cli->active &&
+ (atomic_read(&cli->sync_sample_count) < cli->sample_count))
+ kbasep_kinstr_prfcnt_client_dump(cli,
+ BASE_HWCNT_READER_EVENT_MANUAL,
+ cli->async.user_data, true,
+ false);
+
+ /* While the async operation is in flight, more async dump requests
+ * may have been submitted. In this case, no more async dumps work
+ * will be queued. Instead space will be reserved for that dump and
+ * an empty sample will be return after handling the current async
+ * dump.
+ */
+ while (cli->active &&
+ (atomic_read(&cli->sync_sample_count) < cli->sample_count)) {
+ kbasep_kinstr_prfcnt_client_dump(
+ cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true);
+ }
+ mutex_unlock(&cli->kinstr_ctx->lock);
}
/**
@@ -422,6 +1315,17 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
static enum hrtimer_restart
kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer)
{
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of(
+ timer, struct kbase_kinstr_prfcnt_context, dump_timer);
+
+ /* We don't need to check kinstr_ctx->suspend_count here.
+ * Suspend and resume functions already ensure that the worker
+ * is cancelled when the driver is suspended, and resumed when
+ * the suspend_count reaches 0.
+ */
+ kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt,
+ &kinstr_ctx->dump_work);
+
return HRTIMER_NORESTART;
}
@@ -555,20 +1459,14 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
size_t sample_meta_bytes;
- size_t block_count = 0;
+ size_t md_count;
size_t sample_size;
- size_t grp, blk, blk_inst;
if (!metadata || !sample_arr)
return -EINVAL;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- block_count++;
-
- /* Reserve one for last sentinel item. */
- block_count++;
-
- sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count;
+ md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata);
+ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count;
dump_buf_bytes = metadata->dump_buf_bytes;
clk_cnt_buf_bytes =
sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt;
@@ -602,7 +1500,8 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(
/* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */
samples[sample_idx].dump_buf.metadata = metadata;
samples[sample_idx].sample_meta =
- (u64 *)(uintptr_t)(addr + sample_meta_offset);
+ (struct prfcnt_metadata *)(uintptr_t)(
+ addr + sample_meta_offset);
samples[sample_idx].dump_buf.dump_buf =
(u64 *)(uintptr_t)(addr + dump_buf_offset);
samples[sample_idx].dump_buf.clk_cnt_buf =
@@ -724,6 +1623,31 @@ static int kbasep_kinstr_prfcnt_parse_request_enable(
}
/**
+ * kbasep_kinstr_prfcnt_parse_request_scope - Parse a scope request
+ * @req_scope: Performance counters scope request to parse.
+ * @config: Client object the session configuration should be written to.
+ *
+ * This function parses a performance counters scope request.
+ * There are only 2 acceptable outcomes: either the client leaves the scope
+ * as undefined, or all the scope requests are set to the same value.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_kinstr_prfcnt_parse_request_scope(
+ const struct prfcnt_request_scope *req_scope,
+ struct kbase_kinstr_prfcnt_client_config *config)
+{
+ int err = 0;
+
+ if (config->scope == PRFCNT_SCOPE_RESERVED)
+ config->scope = req_scope->scope;
+ else if (config->scope != req_scope->scope)
+ err = -EINVAL;
+
+ return err;
+}
+
+/**
* kbasep_kinstr_prfcnt_parse_setup - Parse session setup
* @kinstr_ctx: Pointer to the kinstr_prfcnt context.
* @setup: Session setup information to parse.
@@ -742,35 +1666,48 @@ static int kbasep_kinstr_prfcnt_parse_setup(
{
uint32_t i;
struct prfcnt_request_item *req_arr;
+ unsigned int item_count = setup->in.request_item_count;
+ unsigned long bytes;
int err = 0;
- if (!setup->in.requests_ptr || (setup->in.request_item_count == 0) ||
- (setup->in.request_item_size == 0)) {
+ /* Limiting the request items to 2x of the expected: acommodating
+ * moderate duplications but rejecting excessive abuses.
+ */
+ if (!setup->in.requests_ptr || (item_count < 2) ||
+ (setup->in.request_item_size == 0) ||
+ item_count > 2 * kinstr_ctx->info_item_count) {
return -EINVAL;
}
- req_arr =
- (struct prfcnt_request_item *)(uintptr_t)setup->in.requests_ptr;
+ bytes = item_count * sizeof(*req_arr);
+ req_arr = kmalloc(bytes, GFP_KERNEL);
+ if (!req_arr)
+ return -ENOMEM;
- if (req_arr[setup->in.request_item_count - 1].hdr.item_type !=
- FLEX_LIST_TYPE_NONE) {
- return -EINVAL;
+ if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr),
+ bytes)) {
+ err = -EFAULT;
+ goto free_buf;
}
- if (req_arr[setup->in.request_item_count - 1].hdr.item_version != 0)
- return -EINVAL;
+ if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE ||
+ req_arr[item_count - 1].hdr.item_version != 0) {
+ err = -EINVAL;
+ goto free_buf;
+ }
/* The session configuration can only feature one value for some
- * properties (like capture mode and block counter set), but the client
- * may potential issue multiple requests and try to set more than one
- * value for those properties. While issuing multiple requests for the
+ * properties (like capture mode, block counter set and scope), but the
+ * client may potential issue multiple requests and try to set more than
+ * one value for those properties. While issuing multiple requests for the
* same property is allowed by the protocol, asking for different values
* is illegal. Leaving these properties as undefined is illegal, too.
*/
config->prfcnt_mode = PRFCNT_MODE_RESERVED;
config->counter_set = KBASE_HWCNT_SET_UNDEFINED;
+ config->scope = PRFCNT_SCOPE_RESERVED;
- for (i = 0; i < setup->in.request_item_count - 1; i++) {
+ for (i = 0; i < item_count - 1; i++) {
if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) {
err = -EINVAL;
break;
@@ -797,17 +1734,20 @@ static int kbasep_kinstr_prfcnt_parse_setup(
break;
if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) {
- config->period_us =
+ config->period_ns =
req_arr[i]
.u.req_mode.mode_config.periodic
- .period_us;
+ .period_ns;
- if ((config->period_us != 0) &&
- (config->period_us <
- DUMP_INTERVAL_MIN_US)) {
- config->period_us =
- DUMP_INTERVAL_MIN_US;
+ if ((config->period_ns != 0) &&
+ (config->period_ns <
+ DUMP_INTERVAL_MIN_NS)) {
+ config->period_ns =
+ DUMP_INTERVAL_MIN_NS;
}
+
+ if (config->period_ns == 0)
+ err = -EINVAL;
}
break;
@@ -816,6 +1756,11 @@ static int kbasep_kinstr_prfcnt_parse_setup(
&req_arr[i].u.req_enable, config);
break;
+ case PRFCNT_REQUEST_TYPE_SCOPE:
+ err = kbasep_kinstr_prfcnt_parse_request_scope(
+ &req_arr[i].u.req_scope, config);
+ break;
+
default:
err = -EINVAL;
break;
@@ -825,14 +1770,19 @@ static int kbasep_kinstr_prfcnt_parse_setup(
break;
}
- /* Verify that properties (like capture mode and block counter set)
- * have been defined by the user space client.
- */
- if (config->prfcnt_mode == PRFCNT_MODE_RESERVED)
- err = -EINVAL;
+free_buf:
+ kfree(req_arr);
- if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED)
- err = -EINVAL;
+ if (!err) {
+ /* Verify that properties (like capture mode and block counter
+ * set) have been defined by the user space client.
+ */
+ if (config->prfcnt_mode == PRFCNT_MODE_RESERVED)
+ err = -EINVAL;
+
+ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED)
+ err = -EINVAL;
+ }
return err;
}
@@ -872,8 +1822,12 @@ static int kbasep_kinstr_prfcnt_client_create(
goto error;
cli->config.buffer_count = MAX_BUFFER_COUNT;
- cli->dump_interval_ns = cli->config.period_us * NSEC_PER_USEC;
+ cli->dump_interval_ns = cli->config.period_ns;
cli->next_dump_time_ns = 0;
+ cli->active = false;
+ atomic_set(&cli->write_idx, 0);
+ atomic_set(&cli->read_idx, 0);
+
err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata,
&cli->enable_map);
@@ -888,6 +1842,7 @@ static int kbasep_kinstr_prfcnt_client_create(
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
cli->sample_count = cli->config.buffer_count;
+ atomic_set(&cli->sync_sample_count, cli->sample_count);
cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(
kinstr_ctx->metadata, &cli->tmp_buf);
@@ -914,15 +1869,6 @@ static int kbasep_kinstr_prfcnt_client_create(
if (err < 0)
goto error;
- err = -ENOMEM;
-
- cli->dump_bufs_meta =
- kmalloc_array(cli->config.buffer_count,
- sizeof(*cli->dump_bufs_meta), GFP_KERNEL);
-
- if (!cli->dump_bufs_meta)
- goto error;
-
err = kbase_hwcnt_virtualizer_client_create(
kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli);
@@ -930,6 +1876,9 @@ static int kbasep_kinstr_prfcnt_client_create(
goto error;
init_waitqueue_head(&cli->waitq);
+ INIT_WORK(&cli->async.dump_work,
+ kbasep_kinstr_prfcnt_async_dump_worker);
+ mutex_init(&cli->cmd_sync_lock);
*out_vcli = cli;
return 0;
@@ -965,48 +1914,11 @@ static void kbasep_kinstr_prfcnt_get_request_info_list(
*arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests);
}
-static enum prfcnt_block_type
-kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)
-{
- enum prfcnt_block_type block_type;
-
- switch (type) {
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
- block_type = PRFCNT_BLOCK_TYPE_FE;
- break;
-
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
- block_type = PRFCNT_BLOCK_TYPE_TILER;
- break;
-
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
- block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE;
- break;
-
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
- block_type = PRFCNT_BLOCK_TYPE_MEMORY;
- break;
-
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
- default:
- block_type = PRFCNT_BLOCK_TYPE_RESERVED;
- break;
- }
-
- return block_type;
-}
-
-static int kbasep_kinstr_prfcnt_get_block_info_list(
- const struct kbase_hwcnt_metadata *metadata, size_t block_set,
- struct prfcnt_enum_item *item_arr, size_t *arr_idx)
+int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata,
+ size_t block_set, struct prfcnt_enum_item *item_arr,
+ size_t *arr_idx)
{
- size_t grp;
- size_t blk;
+ size_t grp, blk;
if (!metadata || !item_arr || !arr_idx)
return -EINVAL;
@@ -1015,19 +1927,30 @@ static int kbasep_kinstr_prfcnt_get_block_info_list(
for (blk = 0;
blk < kbase_hwcnt_metadata_block_count(metadata, grp);
blk++, (*arr_idx)++) {
+ size_t blk_inst;
+ size_t unused_blk_inst_count = 0;
+ size_t blk_inst_count =
+ kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk);
+
item_arr[*arr_idx].hdr.item_type =
PRFCNT_ENUM_TYPE_BLOCK;
item_arr[*arr_idx].hdr.item_version =
PRFCNT_READER_API_VERSION;
item_arr[*arr_idx].u.block_counter.set = block_set;
-
item_arr[*arr_idx].u.block_counter.block_type =
kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
kbase_hwcnt_metadata_block_type(
metadata, grp, blk));
+
+ /* Count number of unused blocks to updated number of instances */
+ for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) {
+ if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk,
+ blk_inst))
+ unused_blk_inst_count++;
+ }
+
item_arr[*arr_idx].u.block_counter.num_instances =
- kbase_hwcnt_metadata_block_instance_count(
- metadata, grp, blk);
+ blk_inst_count - unused_blk_inst_count;
item_arr[*arr_idx].u.block_counter.num_values =
kbase_hwcnt_metadata_block_values_count(
metadata, grp, blk);
@@ -1086,8 +2009,11 @@ static int kbasep_kinstr_prfcnt_enum_info_list(
if (enum_info->info_item_count != kinstr_ctx->info_item_count)
return -EINVAL;
- prfcnt_item_arr =
- (struct prfcnt_enum_item *)(uintptr_t)enum_info->info_list_ptr;
+ prfcnt_item_arr = kcalloc(enum_info->info_item_count,
+ sizeof(*prfcnt_item_arr), GFP_KERNEL);
+ if (!prfcnt_item_arr)
+ return -ENOMEM;
+
kbasep_kinstr_prfcnt_get_request_info_list(kinstr_ctx, prfcnt_item_arr,
&arr_idx);
metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
@@ -1118,6 +2044,16 @@ static int kbasep_kinstr_prfcnt_enum_info_list(
FLEX_LIST_TYPE_NONE;
prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0;
+ if (!err) {
+ unsigned long bytes =
+ enum_info->info_item_count * sizeof(*prfcnt_item_arr);
+
+ if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr),
+ prfcnt_item_arr, bytes))
+ err = -EFAULT;
+ }
+
+ kfree(prfcnt_item_arr);
return err;
}
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h
index 83d76be..c42408b 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.h
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h
@@ -26,6 +26,8 @@
#ifndef _KBASE_KINSTR_PRFCNT_H_
#define _KBASE_KINSTR_PRFCNT_H_
+#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
+
struct kbase_kinstr_prfcnt_context;
struct kbase_hwcnt_virtualizer;
struct kbase_ioctl_hwcnt_reader_setup;
@@ -76,6 +78,49 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
*/
void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx);
+#if MALI_KERNEL_TEST_API
+/**
+ * kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types
+ * with their information.
+ * @metadata: Non-NULL pointer to the hardware counter metadata.
+ * @block_set: Which SET the blocks will represent.
+ * @item_arr: Non-NULL pointer to array of enumeration items to populate.
+ * @arr_idx: Non-NULL pointer to index of array @item_arr.
+ *
+ * Populate list of counter blocks with information for enumeration.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata,
+ size_t block_set, struct prfcnt_enum_item *item_arr,
+ size_t *arr_idx);
+
+/**
+ * kbasep_kinstr_prfcnt_get_sample_md_count() - Get count of sample
+ * metadata items.
+ * @metadata: Non-NULL pointer to the hardware counter metadata.
+ *
+ * Return: Number of metadata items for available blocks in each sample.
+ */
+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta
+ * item array.
+ * @dst: Non-NULL pointer to the sample's dump buffer object.
+ * @block_meta_base: Non-NULL double pointer to the start of the block meta
+ * data items.
+ * @base_addr: Address of allocated pages for array of samples. Used
+ * to calculate offset of block values.
+ * @counter_set: The SET which blocks represent.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst,
+ struct prfcnt_metadata **block_meta_base,
+ u64 base_addr, u8 counter_set);
+#endif /* MALI_KERNEL_TEST_API */
+
/**
* kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information.
* @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context.
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 320ffef..de854f3 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -4468,8 +4468,8 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;
- ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
- &mapping);
+ ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
+ KBASE_REG_CPU_RD, &mapping);
if (!ptr) {
dev_warn(kctx->kbdev->dev,
"%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 95533f5..9cb4088 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -287,6 +287,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
* that triggered incremental rendering by growing too much.
* @rbtree: Backlink to the red-black tree of memory regions.
* @start_pfn: The Page Frame Number in GPU virtual address space.
+ * @user_data: The address of GPU command queue when VA region represents
+ * a ring buffer.
* @nr_pages: The size of the region in pages.
* @initial_commit: Initial commit, for aligning the start address and
* correctly growing KBASE_REG_TILER_ALIGN_TOP regions.
@@ -324,6 +326,7 @@ struct kbase_va_region {
struct list_head link;
struct rb_root *rbtree;
u64 start_pfn;
+ void *user_data;
size_t nr_pages;
size_t initial_commit;
size_t threshold_pages;
@@ -476,6 +479,7 @@ struct kbase_va_region {
struct list_head jit_node;
u16 jit_usage_id;
u8 jit_bin_id;
+
#if MALI_JIT_PRESSURE_LIMIT_BASE
/* Pointer to an object in GPU memory defining an end of an allocated
* region
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 527bec4..d252373 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -1029,7 +1029,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
struct kbase_va_region *reg, enum kbase_sync_type sync_fn)
{
int ret = -EINVAL;
- struct dma_buf *dma_buf;
+ struct dma_buf __maybe_unused *dma_buf;
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
lockdep_assert_held(&kctx->reg_lock);
@@ -3214,8 +3214,12 @@ static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev,
* assigned one, otherwise a dummy page. Always return the
* dummy page in no mali builds.
*/
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page));
+#else
if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page));
+#endif
return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET +
(u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE));
}
@@ -3461,8 +3465,12 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
/* Don't map in the actual register page if GPU is powered down.
* Always map in the dummy page in no mali builds.
*/
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
+#else
if (!kbdev->pm.backend.gpu_powered)
pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
+#endif
ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
KBASE_MEM_GROUP_CSF_FW, vma,
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index 4078da1..af154d5 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -144,7 +144,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
-void kbase_pm_driver_suspend(struct kbase_device *kbdev)
+int kbase_pm_driver_suspend(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
@@ -162,7 +162,7 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev)
mutex_lock(&kbdev->pm.lock);
if (WARN_ON(kbase_pm_is_suspending(kbdev))) {
mutex_unlock(&kbdev->pm.lock);
- return;
+ return 0;
}
kbdev->pm.suspending = true;
mutex_unlock(&kbdev->pm.lock);
@@ -193,7 +193,12 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev)
*/
kbasep_js_suspend(kbdev);
#else
- kbase_csf_scheduler_pm_suspend(kbdev);
+ if (kbase_csf_scheduler_pm_suspend(kbdev)) {
+ mutex_lock(&kbdev->pm.lock);
+ kbdev->pm.suspending = false;
+ mutex_unlock(&kbdev->pm.lock);
+ return -1;
+ }
#endif
/* Wait for the active count to reach zero. This is not the same as
@@ -209,7 +214,12 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev)
/* NOTE: We synchronize with anything that was just finishing a
* kbase_pm_context_idle() call by locking the pm.lock below
*/
- kbase_hwaccess_pm_suspend(kbdev);
+ if (kbase_hwaccess_pm_suspend(kbdev)) {
+ mutex_lock(&kbdev->pm.lock);
+ kbdev->pm.suspending = false;
+ mutex_unlock(&kbdev->pm.lock);
+ return -1;
+ }
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbdev->arb.arb_if) {
@@ -218,6 +228,8 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev)
mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock);
}
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+ return 0;
}
void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
@@ -273,16 +285,19 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx);
}
-void kbase_pm_suspend(struct kbase_device *kbdev)
+int kbase_pm_suspend(struct kbase_device *kbdev)
{
+ int result = 0;
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbdev->arb.arb_if)
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT);
else
- kbase_pm_driver_suspend(kbdev);
+ result = kbase_pm_driver_suspend(kbdev);
#else
- kbase_pm_driver_suspend(kbdev);
+ result = kbase_pm_driver_suspend(kbdev);
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+ return result;
}
void kbase_pm_resume(struct kbase_device *kbdev)
diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h
index 980a8d1..730feea 100644
--- a/mali_kbase/mali_kbase_pm.h
+++ b/mali_kbase/mali_kbase_pm.h
@@ -165,8 +165,10 @@ void kbase_pm_context_idle(struct kbase_device *kbdev);
* @note the mechanisms used here rely on all user-space threads being frozen
* by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
* the GPU e.g. via atom submission.
+ *
+ * Return: 0 on success.
*/
-void kbase_pm_suspend(struct kbase_device *kbdev);
+int kbase_pm_suspend(struct kbase_device *kbdev);
/**
* Resume the GPU, allow register accesses to it, and resume running atoms on
@@ -207,8 +209,10 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data);
* @note the mechanisms used here rely on all user-space threads being frozen
* by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
* the GPU e.g. via atom submission.
+ *
+ * Return: 0 on success.
*/
-void kbase_pm_driver_suspend(struct kbase_device *kbdev);
+int kbase_pm_driver_suspend(struct kbase_device *kbdev);
/**
* kbase_pm_driver_resume() - Put GPU and driver in resume
diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.h b/mali_kbase/mali_kbase_regs_history_debugfs.h
index 26decb4..1b4196d 100644
--- a/mali_kbase/mali_kbase_regs_history_debugfs.h
+++ b/mali_kbase/mali_kbase_regs_history_debugfs.h
@@ -69,7 +69,7 @@ void kbase_io_history_dump(struct kbase_device *kbdev);
*/
void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
-#else /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#else /* !defined(CONFIG_DEBUG_FS) || IS_ENABLED(CONFIG_MALI_NO_MALI) */
#define kbase_io_history_init(...) ((int)0)
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index bee3513..df34854 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -95,7 +95,8 @@ static int kbasep_read_soft_event_status(
unsigned char *mapped_evt;
struct kbase_vmap_struct map;
- mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+ mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt),
+ KBASE_REG_CPU_RD, &map);
if (!mapped_evt)
return -EFAULT;
@@ -116,7 +117,8 @@ static int kbasep_write_soft_event_status(
(new_status != BASE_JD_SOFT_EVENT_RESET))
return -EINVAL;
- mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+ mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt),
+ KBASE_REG_CPU_WR, &map);
if (!mapped_evt)
return -EFAULT;
@@ -1203,8 +1205,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
* Write the address of the JIT allocation to the user provided
* GPU allocation.
*/
- ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
- &mapping);
+ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+ KBASE_REG_CPU_WR, &mapping);
if (!ptr) {
/*
* Leave the allocations "live" as the JIT free atom
@@ -1649,7 +1651,12 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
if (copy_to_user((__user void *)(uintptr_t)katom->jc,
&fence, sizeof(fence)) != 0) {
kbase_sync_fence_out_remove(katom);
- kbase_sync_fence_close_fd(fd);
+ /* fd should have been closed here, but there's
+ * no good way of doing that. Since
+ * copy_to_user() very rarely fails, and the fd
+ * will get closed on process termination this
+ * won't be a problem.
+ */
fence.basep.fd = -EINVAL;
return -EINVAL;
}
diff --git a/mali_kbase/mali_kbase_sync.h b/mali_kbase/mali_kbase_sync.h
index ad05cdf..11cb8b9 100644
--- a/mali_kbase/mali_kbase_sync.h
+++ b/mali_kbase/mali_kbase_sync.h
@@ -157,21 +157,6 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom);
void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
#endif /* !MALI_USE_CSF */
-/**
- * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence
- * @fd: File descriptor to close
- */
-static inline void kbase_sync_fence_close_fd(int fd)
-{
-#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
- close_fd(fd);
-#elif KERNEL_VERSION(4, 17, 0) <= LINUX_VERSION_CODE
- ksys_close(fd);
-#else
- sys_close(fd);
-#endif
-}
-
#if !MALI_USE_CSF
/**
* kbase_sync_fence_in_info_get() - Retrieves information about input fence
diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h
index 3ddfcd9..92c8d31 100644
--- a/mali_kbase/mali_malisw.h
+++ b/mali_kbase/mali_malisw.h
@@ -98,7 +98,14 @@
/* LINUX_VERSION_CODE < 5.4 */
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
-#define fallthrough CSTD_NOP(...) /* fallthrough */
-#endif
+#if defined(GCC_VERSION) && GCC_VERSION >= 70000
+#ifndef __fallthrough
+#define __fallthrough __attribute__((fallthrough))
+#endif /* __fallthrough */
+#define fallthrough __fallthrough
+#else
+#define fallthrough CSTD_NOP(...) /* fallthrough */
+#endif /* GCC_VERSION >= 70000 */
+#endif /* KERNEL_VERSION(5, 4, 0) */
#endif /* _MALISW_H_ */
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 5f6cc7a..a450d38 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -41,10 +41,91 @@
#include <mmu/mali_kbase_mmu_internal.h>
#include <mali_kbase_cs_experimental.h>
#include <device/mali_kbase_device.h>
+#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h>
+#if !MALI_USE_CSF
+#include <mali_kbase_hwaccess_jm.h>
+#endif
#include <mali_kbase_trace_gpu_mem.h>
/**
+ * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
+ * through GPU_CONTROL interface
+ * @kbdev: kbase device to check GPU model ID on.
+ *
+ * This function returns whether a cache flush for page table update should
+ * run through GPU_CONTROL interface or MMU_AS_CONTROL interface.
+ *
+ * Return: True if cache flush should be done on GPU command.
+ */
+static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
+{
+ uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id &
+ GPU_ID2_ARCH_MAJOR) >>
+ GPU_ID2_ARCH_MAJOR_SHIFT;
+
+ return arch_maj_cur > 11;
+}
+
+/**
+ * mmu_flush_invalidate_on_gpu_ctrl() - Flush and invalidate the GPU caches
+ * through GPU_CONTROL interface.
+ * @kbdev: kbase device to issue the MMU operation on.
+ * @as: address space to issue the MMU operation on.
+ * @op_param: parameters for the operation.
+ *
+ * This wrapper function alternates AS_COMMAND_FLUSH_PT and AS_COMMAND_FLUSH_MEM
+ * to equivalent GPU_CONTROL command FLUSH_CACHES.
+ * The function first issue LOCK to MMU-AS with kbase_mmu_hw_do_operation().
+ * And issues cache-flush with kbase_gpu_cache_flush_and_busy_wait() function
+ * then issue UNLOCK to MMU-AS with kbase_mmu_hw_do_operation().
+ *
+ * Return: Zero if the operation was successful, non-zero otherwise.
+ */
+static int
+mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev,
+ struct kbase_as *as,
+ struct kbase_mmu_hw_op_param *op_param)
+{
+ u32 flush_op;
+ int ret, ret2;
+
+ if (WARN_ON(kbdev == NULL) ||
+ WARN_ON(as == NULL) ||
+ WARN_ON(op_param == NULL))
+ return -EINVAL;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+ /* Translate operation to command */
+ if (op_param->op == KBASE_MMU_OP_FLUSH_PT) {
+ flush_op = GPU_COMMAND_CACHE_CLN_INV_L2;
+ } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
+ flush_op = GPU_COMMAND_CACHE_CLN_INV_L2_LSC;
+ } else {
+ dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n",
+ op_param->op);
+ return -EINVAL;
+ }
+
+ /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */
+ op_param->op = KBASE_MMU_OP_LOCK;
+ ret = kbase_mmu_hw_do_operation(kbdev, as, op_param);
+ if (ret)
+ return ret;
+
+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */
+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, flush_op);
+
+ /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */
+ op_param->op = KBASE_MMU_OP_UNLOCK;
+ ret2 = kbase_mmu_hw_do_operation(kbdev, as, op_param);
+
+ return ret ?: ret2;
+}
+
+/**
* kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
* @kctx: The KBase context.
* @vpfn: The virtual page frame number to start the flush on.
@@ -244,7 +325,11 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
.kctx_id = kctx_id,
.mmu_sync_info = mmu_sync_info,
};
- kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, &op_param);
+ } else {
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+ }
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -934,7 +1019,13 @@ page_fault_retry:
.kctx_id = kctx->id,
.mmu_sync_info = mmu_sync_info,
};
- kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as,
+ &op_param);
+ } else {
+ kbase_mmu_hw_do_operation(kbdev, faulting_as,
+ &op_param);
+ }
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
@@ -1046,11 +1137,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
int i;
struct page *p;
-#ifdef CONFIG_MALI_2MB_ALLOC
- p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]);
-#else /* CONFIG_MALI_2MB_ALLOC */
p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
-#endif /* CONFIG_MALI_2MB_ALLOC */
if (!p)
return 0;
@@ -1087,12 +1174,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
return page_to_phys(p);
alloc_free:
-
-#ifdef CONFIG_MALI_2MB_ALLOC
- kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], p, false);
-#else /* CONFIG_MALI_2MB_ALLOC */
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
-#endif /* CONFIG_MALI_2MB_ALLOC */
return 0;
}
@@ -1341,11 +1423,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
*/
mutex_unlock(&kctx->mmu.mmu_lock);
err = kbase_mem_pool_grow(
-#ifdef CONFIG_MALI_2MB_ALLOC
- &kbdev->mem_pools.large[
-#else
&kbdev->mem_pools.small[
-#endif
kctx->mmu.group_id],
MIDGARD_MMU_BOTTOMLEVEL);
mutex_lock(&kctx->mmu.mmu_lock);
@@ -1433,11 +1511,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev,
p = pfn_to_page(PFN_DOWN(pgd));
-#ifdef CONFIG_MALI_2MB_ALLOC
- kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
-#else
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
-#endif
p, dirty);
atomic_sub(1, &kbdev->memdev.used_pages);
@@ -1523,11 +1597,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
*/
mutex_unlock(&mmut->mmu_lock);
err = kbase_mem_pool_grow(
-#ifdef CONFIG_MALI_2MB_ALLOC
- &kbdev->mem_pools.large[mmut->group_id],
-#else
&kbdev->mem_pools.small[mmut->group_id],
-#endif
cur_level);
mutex_lock(&mmut->mmu_lock);
} while (!err);
@@ -1681,8 +1751,15 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
.kctx_id = kctx->id,
.mmu_sync_info = mmu_sync_info,
};
- err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr],
- &op_param);
+
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ err = mmu_flush_invalidate_on_gpu_ctrl(
+ kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ } else {
+ err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr],
+ &op_param);
+ }
+
if (err) {
/* Flush failed to complete, assume the
* GPU has hung and perform a reset to recover
@@ -1744,7 +1821,13 @@ kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
else
op_param.op = KBASE_MMU_OP_FLUSH_PT;
- err = kbase_mmu_hw_do_operation(kbdev, as, &op_param);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, as, &op_param);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ } else {
+ err = kbase_mmu_hw_do_operation(kbdev, as, &op_param);
+ }
if (err) {
/* Flush failed to complete, assume the GPU has hung and
@@ -1850,6 +1933,15 @@ void kbase_mmu_disable(struct kbase_context *kctx)
kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0);
kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+#if !MALI_USE_CSF
+ /*
+ * JM GPUs has some L1 read only caches that need to be invalidated
+ * with START_FLUSH configuration. Purge the MMU disabled kctx from
+ * the slot_rb tracking field so such invalidation is performed when
+ * a new katom is executed on the affected slots.
+ */
+ kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx);
+#endif
}
KBASE_EXPORT_TEST_API(kbase_mmu_disable);
@@ -2271,11 +2363,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
int err;
err = kbase_mem_pool_grow(
-#ifdef CONFIG_MALI_2MB_ALLOC
- &kbdev->mem_pools.large[mmut->group_id],
-#else
&kbdev->mem_pools.small[mmut->group_id],
-#endif
MIDGARD_MMU_BOTTOMLEVEL);
if (err) {
kbase_mmu_term(kbdev, mmut);
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 45a628c..fe721fc 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -30,8 +30,9 @@ struct kbase_context;
struct kbase_mmu_table;
/**
- * MMU-synchronous caller info. A pointer to this type is passed down from the outer-most callers
- * in the kbase module - where the information resides as to the synchronous / asynchronous
+ * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info.
+ * A pointer to this type is passed down from the outer-most callers in the kbase
+ * module - where the information resides as to the synchronous / asynchronous
* nature of the call flow, with respect to MMU operations. ie - does the call flow relate to
* existing GPU work does it come from requests (like ioctl) from user-space, power management,
* etc.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 7c0e95e..7cdf426 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -78,7 +78,7 @@ enum kbase_mmu_op_type {
* struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_operation()
* @vpfn: MMU Virtual Page Frame Number to start the operation on.
* @nr: Number of pages to work on.
- * @type: Operation type (written to ASn_COMMAND).
+ * @op: Operation type (written to ASn_COMMAND).
* @kctx_id: Kernel context ID for MMU command tracepoint
* @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*/
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 6306946..0ebc1bc 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -128,23 +128,19 @@ static int wait_ready(struct kbase_device *kbdev,
unsigned int as_nr)
{
unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
- u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
- /* Wait for the MMU status to indicate there is no active command, in
- * case one is pending. Do not log remaining register accesses.
- */
- while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
- val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+ /* Wait for the MMU status to indicate there is no active command. */
+ while (--max_loops &&
+ kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+ AS_STATUS_AS_ACTIVE) {
+ ;
+ }
if (max_loops == 0) {
dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
return -1;
}
- /* If waiting in loop was performed, log last read value. */
- if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
- kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
-
return 0;
}
@@ -216,6 +212,11 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
int ret;
u64 lock_addr = 0x0;
+ if (WARN_ON(kbdev == NULL) ||
+ WARN_ON(as == NULL) ||
+ WARN_ON(op_param == NULL))
+ return -EINVAL;
+
lockdep_assert_held(&kbdev->mmu_hw_mutex);
if (op_param->op == KBASE_MMU_OP_UNLOCK) {
diff --git a/mali_kbase/tests/include/kutf/kutf_utils.h b/mali_kbase/tests/include/kutf/kutf_utils.h
index 18dcc3d..5f6d769 100644
--- a/mali_kbase/tests/include/kutf/kutf_utils.h
+++ b/mali_kbase/tests/include/kutf/kutf_utils.h
@@ -54,6 +54,7 @@
* Return: Returns pointer to allocated string, or NULL on error.
*/
const char *kutf_dsprintf(struct kutf_mempool *pool,
- const char *fmt, ...);
+ const char *fmt, ...) __printf(2, 3);
+
#endif /* _KERNEL_UTF_UTILS_H_ */
diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
index c101563..567c5f1 100644
--- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
+++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
@@ -44,6 +44,12 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
GPU_ID2_ARCH_MAJOR) >>
GPU_ID2_ARCH_MAJOR_SHIFT;
u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8;
+ u32 const supports_gpu_sleep =
+#ifdef KBASE_PM_RUNTIME
+ kbdev->pm.backend.gpu_sleep_supported;
+#else
+ false;
+#endif /* KBASE_PM_RUNTIME */
/* Summarize the Address Space objects. */
for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
@@ -62,11 +68,11 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
kbdev);
/* Trace the creation of a new kbase device and set its properties. */
- __kbase_tlstream_tl_kbase_new_device(
- summary, kbdev->gpu_props.props.raw_props.gpu_id,
- kbdev->gpu_props.num_cores, kbdev->csf.global_iface.group_num,
- kbdev->nr_hw_address_spaces, num_sb_entries,
- kbdev_has_cross_stream_sync);
+ __kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id,
+ kbdev->gpu_props.num_cores,
+ kbdev->csf.global_iface.group_num,
+ kbdev->nr_hw_address_spaces, num_sb_entries,
+ kbdev_has_cross_stream_sync, supports_gpu_sleep);
/* Lock the context list, to ensure no changes to the list are made
* while we're summarizing the contexts and their contents.
@@ -89,7 +95,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
__kbase_tlstream_tl_kbase_device_program_csg(
summary,
kbdev->gpu_props.props.raw_props.gpu_id,
- group->kctx->id, group->handle, slot_i);
+ group->kctx->id, group->handle, slot_i, 0);
}
/* Reset body stream buffers while holding the kctx lock.
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index 54e51f8..abbed05 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -120,8 +120,14 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END,
+ KBASE_TL_KBASE_CSFFW_FW_RELOADING,
+ KBASE_TL_KBASE_CSFFW_FW_ENABLING,
+ KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP,
+ KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP,
+ KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT,
+ KBASE_TL_KBASE_CSFFW_FW_DISABLING,
+ KBASE_TL_KBASE_CSFFW_FW_OFF,
KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW,
- KBASE_TL_KBASE_CSFFW_RESET,
KBASE_TL_JS_SCHED_START,
KBASE_TL_JS_SCHED_END,
KBASE_TL_JD_SUBMIT_ATOM_START,
@@ -312,12 +318,12 @@ enum tl_msg_id_aux {
"gpu") \
TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \
"New KBase Device", \
- "@IIIIII", \
- "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync") \
+ "@IIIIIII", \
+ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
"CSG is programmed to a slot", \
- "@IIII", \
- "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \
+ "@IIIII", \
+ "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
"CSG is deprogrammed from a slot", \
"@II", \
@@ -506,14 +512,38 @@ enum tl_msg_id_aux {
"KCPU Queue ends a group suspend", \
"@pI", \
"kcpu_queue,execute_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_RELOADING, \
+ "CSF FW is being reloaded", \
+ "@L", \
+ "csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_ENABLING, \
+ "CSF FW is being enabled", \
+ "@L", \
+ "csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, \
+ "CSF FW sleep is requested", \
+ "@L", \
+ "csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, \
+ "CSF FW wake up is requested", \
+ "@L", \
+ "csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, \
+ "CSF FW halt is requested", \
+ "@L", \
+ "csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_DISABLING, \
+ "CSF FW is being disabled", \
+ "@L", \
+ "csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_OFF, \
+ "CSF FW is off", \
+ "@L", \
+ "csffw_cycle") \
TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \
"An overflow has happened with the CSFFW Timeline stream", \
"@LL", \
"csffw_timestamp,csffw_cycle") \
- TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_RESET, \
- "A reset has happened with the CSFFW", \
- "@L", \
- "csffw_cycle") \
TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \
"Scheduling starts", \
"@I", \
@@ -2046,7 +2076,8 @@ void __kbase_tlstream_tl_kbase_new_device(
u32 kbase_device_max_num_csgs,
u32 kbase_device_as_count,
u32 kbase_device_sb_entry_count,
- u32 kbase_device_has_cross_stream_sync)
+ u32 kbase_device_has_cross_stream_sync,
+ u32 kbase_device_supports_gpu_sleep)
{
const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2056,6 +2087,7 @@ void __kbase_tlstream_tl_kbase_new_device(
+ sizeof(kbase_device_as_count)
+ sizeof(kbase_device_sb_entry_count)
+ sizeof(kbase_device_has_cross_stream_sync)
+ + sizeof(kbase_device_supports_gpu_sleep)
;
char *buffer;
unsigned long acq_flags;
@@ -2077,6 +2109,8 @@ void __kbase_tlstream_tl_kbase_new_device(
pos, &kbase_device_sb_entry_count, sizeof(kbase_device_sb_entry_count));
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_has_cross_stream_sync, sizeof(kbase_device_has_cross_stream_sync));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_supports_gpu_sleep, sizeof(kbase_device_supports_gpu_sleep));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -2086,7 +2120,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
u32 kbase_device_id,
u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
- u32 kbase_device_csg_slot_index)
+ u32 kbase_device_csg_slot_index,
+ u32 kbase_device_csg_slot_resumed)
{
const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2094,6 +2129,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
+ sizeof(kernel_ctx_id)
+ sizeof(gpu_cmdq_grp_handle)
+ sizeof(kbase_device_csg_slot_index)
+ + sizeof(kbase_device_csg_slot_resumed)
;
char *buffer;
unsigned long acq_flags;
@@ -2111,6 +2147,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle));
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -3309,14 +3347,12 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
+void __kbase_tlstream_tl_kbase_csffw_fw_reloading(
struct kbase_tlstream *stream,
- u64 csffw_timestamp,
u64 csffw_cycle)
{
- const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW;
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_RELOADING;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
- + sizeof(csffw_timestamp)
+ sizeof(csffw_cycle)
;
char *buffer;
@@ -3328,18 +3364,104 @@ void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
pos = kbasep_serialize_timestamp(buffer, pos);
pos = kbasep_serialize_bytes(buffer,
- pos, &csffw_timestamp, sizeof(csffw_timestamp));
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_csffw_fw_enabling(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_ENABLING;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_csffw_fw_request_halt(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
pos = kbasep_serialize_bytes(buffer,
pos, &csffw_cycle, sizeof(csffw_cycle));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_csffw_reset(
+void __kbase_tlstream_tl_kbase_csffw_fw_disabling(
struct kbase_tlstream *stream,
u64 csffw_cycle)
{
- const u32 msg_id = KBASE_TL_KBASE_CSFFW_RESET;
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_DISABLING;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(csffw_cycle)
;
@@ -3357,6 +3479,54 @@ void __kbase_tlstream_tl_kbase_csffw_reset(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_csffw_fw_off(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_OFF;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
+ struct kbase_tlstream *stream,
+ u64 csffw_timestamp,
+ u64 csffw_cycle)
+{
+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(csffw_timestamp)
+ + sizeof(csffw_cycle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_timestamp, sizeof(csffw_timestamp));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &csffw_cycle, sizeof(csffw_cycle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_js_sched_start(
struct kbase_tlstream *stream,
u32 dummy)
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index 3fc871c..aa10bc0 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -310,13 +310,15 @@ void __kbase_tlstream_tl_kbase_new_device(
u32 kbase_device_max_num_csgs,
u32 kbase_device_as_count,
u32 kbase_device_sb_entry_count,
- u32 kbase_device_has_cross_stream_sync);
+ u32 kbase_device_has_cross_stream_sync,
+ u32 kbase_device_supports_gpu_sleep);
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
- u32 kbase_device_csg_slot_index);
+ u32 kbase_device_csg_slot_index,
+ u32 kbase_device_csg_slot_resumed);
void __kbase_tlstream_tl_kbase_device_deprogram_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
@@ -498,13 +500,31 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u32 execute_error);
-void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
+void __kbase_tlstream_tl_kbase_csffw_fw_reloading(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle);
+void __kbase_tlstream_tl_kbase_csffw_fw_enabling(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle);
+void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle);
+void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle);
+void __kbase_tlstream_tl_kbase_csffw_fw_request_halt(
struct kbase_tlstream *stream,
- u64 csffw_timestamp,
u64 csffw_cycle);
-void __kbase_tlstream_tl_kbase_csffw_reset(
+void __kbase_tlstream_tl_kbase_csffw_fw_disabling(
struct kbase_tlstream *stream,
u64 csffw_cycle);
+void __kbase_tlstream_tl_kbase_csffw_fw_off(
+ struct kbase_tlstream *stream,
+ u64 csffw_cycle);
+void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
+ struct kbase_tlstream *stream,
+ u64 csffw_timestamp,
+ u64 csffw_cycle);
void __kbase_tlstream_tl_js_sched_start(
struct kbase_tlstream *stream,
u32 dummy);
@@ -1684,6 +1704,7 @@ struct kbase_tlstream;
* @kbase_device_sb_entry_count: The number of entries each scoreboard set in the
* physical hardware has available
* @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported
+ * @kbase_device_supports_gpu_sleep: Whether GPU sleep is supported
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \
@@ -1693,14 +1714,15 @@ struct kbase_tlstream;
kbase_device_max_num_csgs, \
kbase_device_as_count, \
kbase_device_sb_entry_count, \
- kbase_device_has_cross_stream_sync \
+ kbase_device_has_cross_stream_sync, \
+ kbase_device_supports_gpu_sleep \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
__kbase_tlstream_tl_kbase_new_device( \
__TL_DISPATCH_STREAM(kbdev, obj), \
- kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count, kbase_device_sb_entry_count, kbase_device_has_cross_stream_sync); \
+ kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count, kbase_device_sb_entry_count, kbase_device_has_cross_stream_sync, kbase_device_supports_gpu_sleep); \
} while (0)
#else
#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \
@@ -1710,7 +1732,8 @@ struct kbase_tlstream;
kbase_device_max_num_csgs, \
kbase_device_as_count, \
kbase_device_sb_entry_count, \
- kbase_device_has_cross_stream_sync \
+ kbase_device_has_cross_stream_sync, \
+ kbase_device_supports_gpu_sleep \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -1724,6 +1747,7 @@ struct kbase_tlstream;
* @kernel_ctx_id: Unique ID for the KBase Context
* @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
* @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_resumed: Whether the csg is being resumed
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \
@@ -1731,14 +1755,15 @@ struct kbase_tlstream;
kbase_device_id, \
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
- kbase_device_csg_slot_index \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_resumed \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
__kbase_tlstream_tl_kbase_device_program_csg( \
__TL_DISPATCH_STREAM(kbdev, obj), \
- kbase_device_id, kernel_ctx_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index); \
+ kbase_device_id, kernel_ctx_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index, kbase_device_csg_slot_resumed); \
} while (0)
#else
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \
@@ -1746,7 +1771,8 @@ struct kbase_tlstream;
kbase_device_id, \
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
- kbase_device_csg_slot_index \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_resumed \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -3146,59 +3172,221 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW -
- * An overflow has happened with the CSFFW Timeline stream
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING -
+ * CSF FW is being reloaded
*
* @kbdev: Kbase device
- * @csffw_timestamp: Timestamp of a CSFFW event
* @csffw_cycle: Cycle number of a CSFFW event
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \
kbdev, \
- csffw_timestamp, \
csffw_cycle \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \
+ __kbase_tlstream_tl_kbase_csffw_fw_reloading( \
__TL_DISPATCH_STREAM(kbdev, obj), \
- csffw_timestamp, csffw_cycle); \
+ csffw_cycle); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \
kbdev, \
- csffw_timestamp, \
csffw_cycle \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET -
- * A reset has happened with the CSFFW
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING -
+ * CSF FW is being enabled
*
* @kbdev: Kbase device
* @csffw_cycle: Cycle number of a CSFFW event
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \
kbdev, \
csffw_cycle \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_csffw_reset( \
+ __kbase_tlstream_tl_kbase_csffw_fw_enabling( \
__TL_DISPATCH_STREAM(kbdev, obj), \
csffw_cycle); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP -
+ * CSF FW sleep is requested
+ *
+ * @kbdev: Kbase device
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_cycle); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP -
+ * CSF FW wake up is requested
+ *
+ * @kbdev: Kbase device
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_cycle); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT -
+ * CSF FW halt is requested
+ *
+ * @kbdev: Kbase device
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \
kbdev, \
csffw_cycle \
) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_fw_request_halt( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_cycle); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING -
+ * CSF FW is being disabled
+ *
+ * @kbdev: Kbase device
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_fw_disabling( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_cycle); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF -
+ * CSF FW is off
+ *
+ * @kbdev: Kbase device
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_fw_off( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_cycle); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \
+ kbdev, \
+ csffw_cycle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW -
+ * An overflow has happened with the CSFFW Timeline stream
+ *
+ * @kbdev: Kbase device
+ * @csffw_timestamp: Timestamp of a CSFFW event
+ * @csffw_cycle: Cycle number of a CSFFW event
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \
+ kbdev, \
+ csffw_timestamp, \
+ csffw_cycle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ csffw_timestamp, csffw_cycle); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \
+ kbdev, \
+ csffw_timestamp, \
+ csffw_cycle \
+ ) \
do { } while (0)
#endif /* MALI_USE_CSF */