diff options
author | Sidath Senanayake <sidaths@google.com> | 2021-06-15 13:39:30 +0100 |
---|---|---|
committer | Sidath Senanayake <sidaths@google.com> | 2021-06-15 14:11:16 +0100 |
commit | fca8613cfcf585bf9113dca96a05daea9fd89794 (patch) | |
tree | f2baa14910f83edf00450bc30d3703eb255a0bba | |
parent | 8037b534570814775d79aeddd06b76e5ee941f59 (diff) | |
download | gpu-fca8613cfcf585bf9113dca96a05daea9fd89794.tar.gz |
Mali Valhall DDK r31p0 KMD
Provenance: 2ea0ef9bd (collaborate/EAC/v_r31p0)
VX504X08X-BU-00000-r31p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r31p0-01eac0 - Valhall Android Document Bundle
VX504X08X-DC-11001-r31p0-01eac0 - Valhall Android DDK Software Errata
VX504X08X-SW-99006-r31p0-01eac0 - Valhall Android Renderscript AOSP parts
Signed-off-by: Sidath Senanayake <sidaths@google.com>
Change-Id: Ide9d5fdc6d9c95fa66a3546b01f619b43c09496d
132 files changed, 4422 insertions, 3277 deletions
diff --git a/common/include/linux/priority_control_manager.h b/common/include/linux/priority_control_manager.h index d3e22f5..df3b3cd 100644 --- a/common/include/linux/priority_control_manager.h +++ b/common/include/linux/priority_control_manager.h @@ -53,7 +53,7 @@ struct priority_control_manager_ops { * Return: The priority that would actually be given, could be lower than requested_priority */ int (*pcm_scheduler_priority_check)( - struct priority_control_manager_device *mgm_dev, + struct priority_control_manager_device *pcm_dev, struct task_struct *task, int requested_priority); }; @@ -62,6 +62,7 @@ struct priority_control_manager_ops { * control manager * * @ops: Callbacks associated with this device + * @data: Pointer to device private data * @owner: Pointer to the module owner * * This structure should be registered with the platform device using @@ -69,6 +70,7 @@ struct priority_control_manager_ops { */ struct priority_control_manager_device { struct priority_control_manager_ops ops; + void *data; struct module *owner; }; diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h new file mode 100644 index 0000000..61da071 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _UAPI_KBASE_MODEL_DUMMY_H_ +#define _UAPI_KBASE_MODEL_DUMMY_H_ + +#include <linux/types.h> + +#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE \ + (64*KBASE_DUMMY_MODEL_COUNTER_PER_CORE) +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) +#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ + (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) + +#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4 +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60 +#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \ + (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + \ + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK) +#define KBASE_DUMMY_MODEL_BLOCK_SIZE \ + (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) +#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 +#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ + (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) +#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE) + +#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */ diff --git a/mali_kbase/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index 9a13760..7fa874b 100644 --- a/mali_kbase/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -19,8 +19,10 @@ * */ -#ifndef _BASE_CSF_KERNEL_H_ -#define _BASE_CSF_KERNEL_H_ +#ifndef _UAPI_BASE_CSF_KERNEL_H_ +#define _UAPI_BASE_CSF_KERNEL_H_ + +#include <linux/types.h> /* Memory allocation, access/hint flags. * @@ -203,7 +205,7 @@ /** * Valid set of just-in-time memory allocation flags */ -#define BASE_JIT_ALLOC_VALID_FLAGS ((u8)0) +#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) /* Flags to pass to ::base_context_init. * Flags can be ORed together to enable multiple things. @@ -211,7 +213,7 @@ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must * not collide with them. */ -typedef u32 base_context_create_flags; +typedef __u32 base_context_create_flags; /* No flags set */ #define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) @@ -228,11 +230,10 @@ typedef u32 base_context_create_flags; #define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ ((base_context_create_flags)1 << 1) -/* Create CSF event thread. +/* Base context creates a CSF event notification thread. * - * The creation of a CSF event thread is conditional and only allowed in - * unit tests for the moment, in order to avoid clashes with the existing - * Base unit tests. + * The creation of a CSF event notification thread is conditional but + * mandatory for the handling of CSF events. */ #define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) @@ -289,23 +290,26 @@ typedef u32 base_context_create_flags; #define BASE_QUEUE_MAX_PRIORITY (15U) -/* CQS Sync object is an array of u32 event_mem[2], error field index is 1 */ +/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */ #define BASEP_EVENT_VAL_INDEX (0U) #define BASEP_EVENT_ERR_INDEX (1U) /* The upper limit for number of objects that could be waited/set per command. * This limit is now enforced as internally the error inherit inputs are - * converted to 32-bit flags in a u32 variable occupying a previously padding + * converted to 32-bit flags in a __u32 variable occupying a previously padding * field. */ #define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) +#if MALI_UNIT_TEST /** * enum base_kcpu_command_type - Kernel CPU queue command type. * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, @@ -320,6 +324,8 @@ enum base_kcpu_command_type { BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, BASE_KCPU_COMMAND_TYPE_CQS_WAIT, BASE_KCPU_COMMAND_TYPE_CQS_SET, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, + BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, @@ -327,10 +333,41 @@ enum base_kcpu_command_type { BASE_KCPU_COMMAND_TYPE_JIT_FREE, BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, -#if MALI_UNIT_TEST BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME, -#endif /* MALI_UNIT_TEST */ }; +#else +/** + * enum base_kcpu_command_type - Kernel CPU queue command type. + * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, + * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, + * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, + * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, + * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, + * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, + * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, + */ +enum base_kcpu_command_type { + BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, + BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_SET, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, + BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, + BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, + BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, + BASE_KCPU_COMMAND_TYPE_JIT_FREE, + BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, +}; +#endif /* MALI_UNIT_TEST */ /** * enum base_queue_group_priority - Priority of a GPU Command Queue Group. @@ -363,29 +400,118 @@ enum base_queue_group_priority { }; struct base_kcpu_command_fence_info { - u64 fence; + __u64 fence; }; struct base_cqs_wait_info { - u64 addr; - u32 val; - u32 padding; + __u64 addr; + __u32 val; + __u32 padding; }; struct base_kcpu_command_cqs_wait_info { - u64 objs; - u32 nr_objs; - u32 inherit_err_flags; + __u64 objs; + __u32 nr_objs; + __u32 inherit_err_flags; }; struct base_cqs_set { - u64 addr; + __u64 addr; }; struct base_kcpu_command_cqs_set_info { - u64 objs; - u32 nr_objs; - u32 propagate_flags; + __u64 objs; + __u32 nr_objs; + __u32 padding; +}; + +/** + * basep_cqs_data_type - Enumeration of CQS Data Types + * + * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value + * is an unsigned 32-bit integer + * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value + * is an unsigned 64-bit integer + */ +typedef enum PACKED { + BASEP_CQS_DATA_TYPE_U32 = 0, + BASEP_CQS_DATA_TYPE_U64 = 1, +} basep_cqs_data_type; + +/** + * basep_cqs_wait_operation_op - Enumeration of CQS Object Wait + * Operation conditions + * + * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a + * wait will be satisfied when a CQS Object's + * value is Less than or Equal to + * the Wait Operation value + * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a + * wait will be satisfied when a CQS Object's + * value is Greater than the Wait Operation value + */ +typedef enum { + BASEP_CQS_WAIT_OPERATION_LE = 0, + BASEP_CQS_WAIT_OPERATION_GT = 1, +} basep_cqs_wait_operation_op; + +struct base_cqs_wait_operation_info { + __u64 addr; + __u64 val; + __u8 operation; + __u8 data_type; + __u8 padding[6]; +}; + +/** + * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information + * about the Timeline CQS wait objects + * + * @objs: An array of Timeline CQS waits. + * @nr_objs: Number of Timeline CQS waits in the array. + * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field + * to be served as the source for importing into the + * queue's error-state. + */ +struct base_kcpu_command_cqs_wait_operation_info { + __u64 objs; + __u32 nr_objs; + __u32 inherit_err_flags; +}; + +/** + * basep_cqs_set_operation_op - Enumeration of CQS Set Operations + * + * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value + * to a synchronization object + * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value + * of a synchronization object + */ +typedef enum { + BASEP_CQS_SET_OPERATION_ADD = 0, + BASEP_CQS_SET_OPERATION_SET = 1, +} basep_cqs_set_operation_op; + +struct base_cqs_set_operation_info { + __u64 addr; + __u64 val; + __u8 operation; + __u8 data_type; + __u8 padding[6]; +}; + +/** + * struct base_kcpu_command_cqs_set_operation_info - structure which contains information + * about the Timeline CQS set objects + * + * @objs: An array of Timeline CQS sets. + * @nr_objs: Number of Timeline CQS sets in the array. + * @padding: Structure padding, unused bytes. + */ +struct base_kcpu_command_cqs_set_operation_info { + __u64 objs; + __u32 nr_objs; + __u32 padding; }; /** @@ -395,7 +521,7 @@ struct base_kcpu_command_cqs_set_info { * @handle: Address of imported user buffer. */ struct base_kcpu_command_import_info { - u64 handle; + __u64 handle; }; /** @@ -408,9 +534,9 @@ struct base_kcpu_command_import_info { * @padding: Padding to a multiple of 64 bits. */ struct base_kcpu_command_jit_alloc_info { - u64 info; - u8 count; - u8 padding[7]; + __u64 info; + __u8 count; + __u8 padding[7]; }; /** @@ -422,9 +548,9 @@ struct base_kcpu_command_jit_alloc_info { * @padding: Padding to a multiple of 64 bits. */ struct base_kcpu_command_jit_free_info { - u64 ids; - u8 count; - u8 padding[7]; + __u64 ids; + __u8 count; + __u8 padding[7]; }; /** @@ -437,15 +563,15 @@ struct base_kcpu_command_jit_free_info { * @padding: padding to a multiple of 64 bits. */ struct base_kcpu_command_group_suspend_info { - u64 buffer; - u32 size; - u8 group_handle; - u8 padding[3]; + __u64 buffer; + __u32 size; + __u8 group_handle; + __u8 padding[3]; }; #if MALI_UNIT_TEST struct base_kcpu_command_sample_time_info { - u64 time; + __u64 time; }; #endif /* MALI_UNIT_TEST */ @@ -466,12 +592,14 @@ struct base_kcpu_command_sample_time_info { * @info.padding: padding */ struct base_kcpu_command { - u8 type; - u8 padding[sizeof(u64) - sizeof(u8)]; + __u8 type; + __u8 padding[sizeof(__u64) - sizeof(__u8)]; union { struct base_kcpu_command_fence_info fence; struct base_kcpu_command_cqs_wait_info cqs_wait; struct base_kcpu_command_cqs_set_info cqs_set; + struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation; + struct base_kcpu_command_cqs_set_operation_info cqs_set_operation; struct base_kcpu_command_import_info import; struct base_kcpu_command_jit_alloc_info jit_alloc; struct base_kcpu_command_jit_free_info jit_free; @@ -479,7 +607,7 @@ struct base_kcpu_command { #if MALI_UNIT_TEST struct base_kcpu_command_sample_time_info sample_time; #endif /* MALI_UNIT_TEST */ - u64 padding[2]; /* No sub-struct should be larger */ + __u64 padding[2]; /* No sub-struct should be larger */ } info; }; @@ -490,8 +618,8 @@ struct base_kcpu_command { * @padding: Padding to a multiple of 64 bits. */ struct basep_cs_stream_control { - u32 features; - u32 padding; + __u32 features; + __u32 padding; }; /** @@ -503,10 +631,10 @@ struct basep_cs_stream_control { * @padding: Padding to a multiple of 64 bits. */ struct basep_cs_group_control { - u32 features; - u32 stream_num; - u32 suspend_size; - u32 padding; + __u32 features; + __u32 stream_num; + __u32 suspend_size; + __u32 padding; }; /** @@ -521,9 +649,9 @@ struct basep_cs_group_control { * @padding: Padding to make multiple of 64bits */ struct base_gpu_queue_group_error_fatal_payload { - u64 sideband; - u32 status; - u32 padding; + __u64 sideband; + __u32 status; + __u32 padding; }; /** @@ -539,10 +667,10 @@ struct base_gpu_queue_group_error_fatal_payload { * @padding: Padding to make multiple of 64bits */ struct base_gpu_queue_error_fatal_payload { - u64 sideband; - u32 status; - u8 csi_index; - u8 padding[3]; + __u64 sideband; + __u32 status; + __u8 csi_index; + __u8 padding[3]; }; /** @@ -579,8 +707,8 @@ enum base_gpu_queue_group_error_type { * @payload.fatal_queue: Unrecoverable fault error associated with command queue */ struct base_gpu_queue_group_error { - u8 error_type; - u8 padding[7]; + __u8 error_type; + __u8 padding[7]; union { struct base_gpu_queue_group_error_fatal_payload fatal_group; struct base_gpu_queue_error_fatal_payload fatal_queue; @@ -621,17 +749,17 @@ enum base_csf_notification_type { * */ struct base_csf_notification { - u8 type; - u8 padding[7]; + __u8 type; + __u8 padding[7]; union { struct { - u8 handle; - u8 padding[7]; + __u8 handle; + __u8 padding[7]; struct base_gpu_queue_group_error error; } csg_error; - u8 align[56]; + __u8 align[56]; } payload; }; -#endif /* _BASE_CSF_KERNEL_H_ */ +#endif /* _UAPI_BASE_CSF_KERNEL_H_ */ diff --git a/mali_kbase/csf/mali_gpu_csf_control_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h index 8c4fc82..570cba8 100644 --- a/mali_kbase/csf/mali_gpu_csf_control_registers.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,10 +23,10 @@ * This header was autogenerated, it should not be edited. */ -#ifndef _GPU_CSF_CONTROL_REGISTERS_H_ -#define _GPU_CSF_CONTROL_REGISTERS_H_ +#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ +#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ /* GPU_REGISTERS register offsets */ #define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */ -#endif /* _GPU_CSF_CONTROL_REGISTERS_H_ */ +#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */ diff --git a/mali_kbase/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h index d37b9cc..f233a0d 100644 --- a/mali_kbase/csf/mali_gpu_csf_registers.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h @@ -23,8 +23,8 @@ * This header was autogenerated, it should not be edited. */ -#ifndef _GPU_CSF_REGISTERS_H_ -#define _GPU_CSF_REGISTERS_H_ +#ifndef _UAPI_GPU_CSF_REGISTERS_H_ +#define _UAPI_GPU_CSF_REGISTERS_H_ /* * Begin register sets @@ -155,6 +155,7 @@ #define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ #define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ #define CSG_CONFIG 0x0050 /* () CSG configuration options */ +#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ /* CSG_OUTPUT_BLOCK register offsets */ #define CSG_ACK 0x0000 /* () CSG acknowledge flags */ @@ -172,6 +173,7 @@ #define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ #define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ #define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ +#define GLB_INSTR_FEATURES 0x001C /* () TRACE_POINT instrumentation features */ #define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ #define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) #define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) @@ -1132,16 +1134,21 @@ #define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 #define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) #define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ - (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> GLB_REQ_PRFCNT_THRESHOLD_SHIFT) + (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ + GLB_REQ_PRFCNT_THRESHOLD_SHIFT) #define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ - (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & GLB_REQ_PRFCNT_THRESHOLD_MASK)) + (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ + (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ + GLB_REQ_PRFCNT_THRESHOLD_MASK)) #define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 #define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) -#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> GLB_REQ_PRFCNT_OVERFLOW_SHIFT) +#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ + GLB_REQ_PRFCNT_OVERFLOW_SHIFT) #define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ - (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ - (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK)) + (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ + (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ + GLB_REQ_PRFCNT_OVERFLOW_MASK)) #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) #define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) @@ -1256,19 +1263,25 @@ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) #define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 -#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) #define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) #define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) #define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 -#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) #define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ - (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) #define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ - (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ - (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ @@ -1398,4 +1411,4 @@ (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) -#endif /* _GPU_CSF_REGISTERS_H_ */ +#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index 8c63e1c..237cc2e 100644 --- a/mali_kbase/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -19,8 +19,8 @@ * */ -#ifndef _KBASE_CSF_IOCTL_H_ -#define _KBASE_CSF_IOCTL_H_ +#ifndef _UAPI_KBASE_CSF_IOCTL_H_ +#define _UAPI_KBASE_CSF_IOCTL_H_ #include <asm-generic/ioctl.h> #include <linux/types.h> @@ -34,10 +34,16 @@ * 1.2: * - Add new CSF GPU_FEATURES register into the property structure * returned by KBASE_IOCTL_GET_GPUPROPS + * 1.3: + * - Add __u32 group_uid member to + * &struct_kbase_ioctl_cs_queue_group_create.out + * 1.4: + * - Replace padding in kbase_ioctl_cs_get_glb_iface with + * instr_features member of same size */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 2 +#define BASE_UK_VERSION_MINOR 4 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -146,6 +152,7 @@ struct kbase_ioctl_cs_queue_terminate { * @out: Output parameters * @out.group_handle: Handle of a newly created queue group. * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. */ union kbase_ioctl_cs_queue_group_create { struct { @@ -162,7 +169,8 @@ union kbase_ioctl_cs_queue_group_create { } in; struct { __u8 group_handle; - __u8 padding[7]; + __u8 padding[3]; + __u32 group_uid; } out; }; @@ -287,25 +295,25 @@ struct kbase_ioctl_cs_tiler_heap_term { * union kbase_ioctl_cs_get_glb_iface - Request the global control block * of CSF interface capabilities * - * @in: Input parameters - * @in.max_group_num: The maximum number of groups to be read. Can be 0, in - * which case groups_ptr is unused. - * @in.max_total_stream_num: The maximum number of CSs to be read. Can be 0, in - * which case streams_ptr is unused. - * @in.groups_ptr: Pointer where to store all the group data (sequentially). - * @in.streams_ptr: Pointer where to store all the CS data (sequentially). - * @out: Output parameters - * @out.glb_version: Global interface version. - * @out.features: Bit mask of features (e.g. whether certain types of job - * can be suspended). - * @out.group_num: Number of CSGs supported. - * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16 - * hold the size of firmware performance counter data - * and 15:0 hold the size of hardware performance counter - * data. - * @out.total_stream_num: Total number of CSs, summed across all groups. - * @out.padding: Will be zeroed. - * + * @in: Input parameters + * @in.max_group_num: The maximum number of groups to be read. Can be 0, in + * which case groups_ptr is unused. + * @in.max_total_stream _num: The maximum number of CSs to be read. Can be 0, in + * which case streams_ptr is unused. + * @in.groups_ptr: Pointer where to store all the group data (sequentially). + * @in.streams_ptr: Pointer where to store all the CS data (sequentially). + * @out: Output parameters + * @out.glb_version: Global interface version. + * @out.features: Bit mask of features (e.g. whether certain types of job + * can be suspended). + * @out.group_num: Number of CSGs supported. + * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16 + * hold the size of firmware performance counter data + * and 15:0 hold the size of hardware performance counter + * data. + * @out.total_stream_num: Total number of CSs, summed across all groups. + * @out.instr_features: Instrumentation features. Bits 7:4 hold the maximum + * size of events. Bits 3:0 hold the offset update rate. * */ union kbase_ioctl_cs_get_glb_iface { @@ -321,7 +329,7 @@ union kbase_ioctl_cs_get_glb_iface { __u32 group_num; __u32 prfcnt_size; __u32 total_stream_num; - __u32 padding; + __u32 instr_features; } out; }; @@ -379,4 +387,4 @@ union kbase_ioctl_cs_event_memory_read { #endif /* MALI_UNIT_TEST */ -#endif /* _KBASE_CSF_IOCTL_H_ */ +#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */ diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h index 65a06d2..c87154f 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -19,10 +19,12 @@ * */ -#ifndef _KBASE_GPU_REGMAP_CSF_H_ -#define _KBASE_GPU_REGMAP_CSF_H_ +#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ +#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ -#if !MALI_USE_CSF +#include <linux/types.h> + +#if !MALI_USE_CSF && defined(__KERNEL__) #error "Cannot be compiled with JM" #endif @@ -61,8 +63,7 @@ #define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ #define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ - -#include "csf/mali_gpu_csf_control_registers.h" +#include "../../csf/mali_gpu_csf_control_registers.h" /* Set to implementation defined, outer caching */ #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull @@ -188,7 +189,7 @@ /* GPU_COMMAND command + payload */ #define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ - ((u32)opcode | ((u32)payload << 8)) + ((__u32)opcode | ((__u32)payload << 8)) /* Final GPU_COMMAND form */ /* No operation, nothing happens */ @@ -282,9 +283,9 @@ /* Implementation-dependent exception codes used to indicate CSG * and CS errors that are not specified in the specs. */ -#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((u8)0x70) -#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((u8)0x71) -#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((u8)0x72) +#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) +#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) +#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) /* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ #define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 @@ -331,4 +332,4 @@ /* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */ #define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */ -#endif /* _KBASE_GPU_REGMAP_CSF_H_ */ +#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h index 1669d5a..1982668 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,10 @@ * */ -#ifndef _KBASE_GPU_REGMAP_JM_H_ -#define _KBASE_GPU_REGMAP_JM_H_ +#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_ +#define _UAPI_KBASE_GPU_REGMAP_JM_H_ -#if MALI_USE_CSF +#if MALI_USE_CSF && defined(__KERNEL__) #error "Cannot be compiled with CSF" #endif @@ -284,4 +284,4 @@ #define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) -#endif /* _KBASE_GPU_REGMAP_JM_H_ */ +#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_coherency.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h index a075ed0..98186d2 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_coherency.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,12 +19,12 @@ * */ -#ifndef _KBASE_GPU_COHERENCY_H_ -#define _KBASE_GPU_COHERENCY_H_ +#ifndef _UAPI_KBASE_GPU_COHERENCY_H_ +#define _UAPI_KBASE_GPU_COHERENCY_H_ #define COHERENCY_ACE_LITE 0 #define COHERENCY_ACE 1 #define COHERENCY_NONE 31 #define COHERENCY_FEATURE_BIT(x) (1 << (x)) -#endif /* _KBASE_GPU_COHERENCY_H_ */ +#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h index 8d687c4..0145920 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_id.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,10 @@ * */ -#ifndef _KBASE_GPU_ID_H_ -#define _KBASE_GPU_ID_H_ +#ifndef _UAPI_KBASE_GPU_ID_H_ +#define _UAPI_KBASE_GPU_ID_H_ + +#include <linux/types.h> /* GPU_ID register */ #define GPU_ID_VERSION_STATUS_SHIFT 0 @@ -55,18 +57,18 @@ * a product ignoring its version. */ #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ - ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ - (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ - (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) /* Helper macro to create a partial GPU_ID (new format) that specifies the * revision (major, minor, status) of a product */ #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ - ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ - (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ - (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + ((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) /* Helper macro to create a complete GPU_ID (new format) */ #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ @@ -80,15 +82,15 @@ * a particular GPU model by its arch_major and product_major. */ #define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ - ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) /* Strip off the non-relevant bits from a product_id value and make it suitable * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU * model. */ #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ - ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + ((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ GPU_ID2_PRODUCT_MODEL) #define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) @@ -110,9 +112,9 @@ * minor, status */ #define GPU_ID_MAKE(id, major, minor, status) \ - ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ - (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ - (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) + ((((__u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + (((__u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + (((__u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + (((__u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) -#endif /* _KBASE_GPU_ID_H_ */ +#endif /* _UAPI_KBASE_GPU_ID_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h new file mode 100644 index 0000000..9977212 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_H_ +#define _UAPI_KBASE_GPU_REGMAP_H_ + +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" +#if MALI_USE_CSF +#include "backend/mali_kbase_gpu_regmap_csf.h" +#else +#include "backend/mali_kbase_gpu_regmap_jm.h" +#endif + +/* Begin Register Offsets */ +/* GPU control registers */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core + * supergroup are l2 coherent + */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ +#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ +#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ +#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) +#define ASN_HASH_COUNT 3 + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +/* MMU address space control registers */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C + +/* End Register Offsets */ + +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers + */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) + +/* + * Begin MMU TRANSCFG register values + */ +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + +/* + * Begin Command Values + */ + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +/* Flush all L2 caches then issue a flush region command to all MMUs + * (deprecated - only for use with T60x) + */ +#define AS_COMMAND_FLUSH 0x04 +/* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_PT 0x04 +/* Wait for memory accesses to complete, flush all the L1s cache then flush all + * L2 caches then issue a flush region command to all MMUs + */ +#define AS_COMMAND_FLUSH_MEM 0x05 + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 + +/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +/* There is no LPAE support for non-cacheable, since the memory type is always + * write-back. + * Marking this setting as reserved for LPAE + */ +#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) + +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +#define SC_TLS_HASH_ENABLE (1ul << 17) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +#define SC_VAR_ALGORITHM (1ul << 29) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +/* End TILER_CONFIG register */ + +/* L2_CONFIG register */ +#define L2_CONFIG_SIZE_SHIFT 16 +#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +#define L2_CONFIG_HASH_SHIFT 24 +#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 +#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) +/* End L2_CONFIG register */ + +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) + +#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h index a72819e..cd81421 100644 --- a/mali_kbase/jm/mali_base_jm_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -19,8 +19,10 @@ * */ -#ifndef _BASE_JM_KERNEL_H_ -#define _BASE_JM_KERNEL_H_ +#ifndef _UAPI_BASE_JM_KERNEL_H_ +#define _UAPI_BASE_JM_KERNEL_H_ + +#include <linux/types.h> /* Memory allocation, access/hint flags. * @@ -207,8 +209,8 @@ #define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) /** - * If set, the heap info address points to a u32 holding the used size in bytes; - * otherwise it points to a u64 holding the lowest address of unused memory. + * If set, the heap info address points to a __u32 holding the used size in bytes; + * otherwise it points to a __u64 holding the lowest address of unused memory. */ #define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) @@ -230,7 +232,7 @@ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must * not collide with them. */ -typedef u32 base_context_create_flags; +typedef __u32 base_context_create_flags; /* No flags set */ #define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) @@ -320,7 +322,7 @@ typedef u32 base_context_create_flags; * @blob: per-job data array */ struct base_jd_udata { - u64 blob[2]; + __u64 blob[2]; }; /** @@ -333,7 +335,7 @@ struct base_jd_udata { * When the flag is set for a particular dependency to signal that it is an * ordering only dependency then errors will not be propagated. */ -typedef u8 base_jd_dep_type; +typedef __u8 base_jd_dep_type; #define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ #define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ @@ -349,7 +351,7 @@ typedef u8 base_jd_dep_type; * Special case is ::BASE_JD_REQ_DEP, which is used to express complex * dependencies, and that doesn't execute anything on the hardware. */ -typedef u32 base_jd_core_req; +typedef __u32 base_jd_core_req; /* Requirements that come from the HW */ @@ -581,6 +583,13 @@ typedef u32 base_jd_core_req; */ #define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) +/* SW-only requirement: The atom needs to run on a limited core mask affinity. + * + * If this bit is set then the kbase_context.limited_core_mask will be applied + * to the affinity. + */ +#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20) + /* These requirement bits are currently unused in base_jd_core_req */ #define BASEP_JD_REQ_RESERVED \ @@ -591,7 +600,7 @@ typedef u32 base_jd_core_req; BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ - BASE_JD_REQ_END_RENDERPASS)) + BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK)) /* Mask of all bits in base_jd_core_req that control the type of the atom. * @@ -636,7 +645,7 @@ enum kbase_jd_atom_state { /** * typedef base_atom_id - Type big enough to store an atom number in. */ -typedef u8 base_atom_id; +typedef __u8 base_atom_id; /** * struct base_dependency - @@ -699,10 +708,10 @@ struct base_dependency { * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. */ struct base_jd_fragment { - u64 norm_read_norm_write; - u64 norm_read_forced_write; - u64 forced_read_forced_write; - u64 forced_read_norm_write; + __u64 norm_read_norm_write; + __u64 norm_read_forced_write; + __u64 forced_read_forced_write; + __u64 forced_read_norm_write; }; /** @@ -742,7 +751,7 @@ struct base_jd_fragment { * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. */ -typedef u8 base_jd_prio; +typedef __u8 base_jd_prio; /* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ #define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) @@ -793,32 +802,32 @@ typedef u8 base_jd_prio; * @padding: Unused. Must be zero. * * This structure has changed since UK 10.2 for which base_jd_core_req was a - * u16 value. + * __u16 value. * - * In UK 10.3 a core_req field of a u32 type was added to the end of the - * structure, and the place in the structure previously occupied by u16 + * In UK 10.3 a core_req field of a __u32 type was added to the end of the + * structure, and the place in the structure previously occupied by __u16 * core_req was kept but renamed to compat_core_req. * - * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2]. + * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2]. * Compatibility with UK 10.x from UK 11.y is not handled because * the major version increase prevents this. * * For UK 11.20 jit_id[2] must be initialized to zero. */ struct base_jd_atom_v2 { - u64 jc; + __u64 jc; struct base_jd_udata udata; - u64 extres_list; - u16 nr_extres; - u8 jit_id[2]; + __u64 extres_list; + __u16 nr_extres; + __u8 jit_id[2]; struct base_dependency pre_dep[2]; base_atom_id atom_number; base_jd_prio prio; - u8 device_nr; - u8 jobslot; + __u8 device_nr; + __u8 jobslot; base_jd_core_req core_req; - u8 renderpass_id; - u8 padding[7]; + __u8 renderpass_id; + __u8 padding[7]; }; /** @@ -853,20 +862,20 @@ struct base_jd_atom_v2 { * @padding: Unused. Must be zero. */ typedef struct base_jd_atom { - u64 seq_nr; - u64 jc; + __u64 seq_nr; + __u64 jc; struct base_jd_udata udata; - u64 extres_list; - u16 nr_extres; - u8 jit_id[2]; + __u64 extres_list; + __u16 nr_extres; + __u8 jit_id[2]; struct base_dependency pre_dep[2]; base_atom_id atom_number; base_jd_prio prio; - u8 device_nr; - u8 jobslot; + __u8 device_nr; + __u8 jobslot; base_jd_core_req core_req; - u8 renderpass_id; - u8 padding[7]; + __u8 renderpass_id; + __u8 padding[7]; } base_jd_atom; /* Job chain event code bits @@ -1181,11 +1190,11 @@ struct base_jd_event_v2 { */ struct base_dump_cpu_gpu_counters { - u64 system_time; - u64 cycle_counter; - u64 sec; - u32 usec; - u8 padding[36]; + __u64 system_time; + __u64 cycle_counter; + __u64 sec; + __u32 usec; + __u8 padding[36]; }; -#endif /* _BASE_JM_KERNEL_H_ */ +#endif /* _UAPI_BASE_JM_KERNEL_H_ */ diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 93c9c44..1eb6bcb 100644 --- a/mali_kbase/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,8 @@ * */ -#ifndef _KBASE_JM_IOCTL_H_ -#define _KBASE_JM_IOCTL_H_ +#ifndef _UAPI_KBASE_JM_IOCTL_H_ +#define _UAPI_KBASE_JM_IOCTL_H_ #include <asm-generic/ioctl.h> #include <linux/types.h> @@ -116,9 +116,12 @@ * 11.30: * - Add a new priority level BASE_JD_PRIO_REALTIME * - Add ioctl 54: This controls the priority setting. + * 11.31: + * - Added BASE_JD_REQ_LIMITED_CORE_MASK. + * - Added ioctl 55: set_limited_core_count. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 30 +#define BASE_UK_VERSION_MINOR 31 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -217,4 +220,4 @@ union kbase_kinstr_jm_fd { #define KBASE_IOCTL_VERSION_CHECK_RESERVED \ _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) -#endif /* _KBASE_JM_IOCTL_H_ */ +#endif /* _UAPI_KBASE_JM_IOCTL_H_ */ diff --git a/mali_kbase/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h index 5c173eb..554c5a3 100644 --- a/mali_kbase/mali_base_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -23,38 +23,46 @@ * Base structures shared with the kernel. */ -#ifndef _BASE_KERNEL_H_ -#define _BASE_KERNEL_H_ +#ifndef _UAPI_BASE_KERNEL_H_ +#define _UAPI_BASE_KERNEL_H_ + +#include <linux/types.h> struct base_mem_handle { struct { - u64 handle; + __u64 handle; } basep; }; #include "mali_base_mem_priv.h" -#include "gpu/mali_kbase_gpu_coherency.h" #include "gpu/mali_kbase_gpu_id.h" +#include "gpu/mali_kbase_gpu_coherency.h" #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 #define BASE_MAX_COHERENT_GROUPS 16 -#if defined CDBG_ASSERT +#if defined(CDBG_ASSERT) #define LOCAL_ASSERT CDBG_ASSERT -#elif defined KBASE_DEBUG_ASSERT +#elif defined(KBASE_DEBUG_ASSERT) #define LOCAL_ASSERT KBASE_DEBUG_ASSERT #else +#if defined(__KERNEL__) #error assert macro not defined! +#else +#define LOCAL_ASSERT(...) ((void)#__VA_ARGS__) +#endif #endif #if defined(PAGE_MASK) && defined(PAGE_SHIFT) #define LOCAL_PAGE_SHIFT PAGE_SHIFT #define LOCAL_PAGE_LSB ~PAGE_MASK #else -#include <osu/mali_osu.h> +#ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12 +#endif -#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2) #define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 #define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) #else @@ -82,7 +90,7 @@ struct base_mem_handle { * More flags can be added to this list, as long as they don't clash * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). */ -typedef u32 base_mem_alloc_flags; +typedef __u32 base_mem_alloc_flags; /* A mask for all the flags which are modifiable via the base_mem_set_flags * interface. @@ -135,8 +143,8 @@ enum base_mem_import_type { */ struct base_mem_import_user_buffer { - u64 ptr; - u64 length; + __u64 ptr; + __u64 length; }; /* Mask to detect 4GB boundary alignment */ @@ -197,8 +205,8 @@ struct base_fence { */ struct base_mem_aliasing_info { struct base_mem_handle handle; - u64 offset; - u64 length; + __u64 offset; + __u64 length; }; /* Maximum percentage of just-in-time memory allocation trimming to perform @@ -220,11 +228,11 @@ struct base_mem_aliasing_info { * An array of structures was not supported */ struct base_jit_alloc_info_10_2 { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extension; - u8 id; + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; }; /* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up @@ -247,16 +255,16 @@ struct base_jit_alloc_info_10_2 { * 11.10: Arrays of this structure are supported */ struct base_jit_alloc_info_11_5 { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extension; - u8 id; - u8 bin_id; - u8 max_allocations; - u8 flags; - u8 padding[2]; - u16 usage_id; + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; + __u8 bin_id; + __u8 max_allocations; + __u8 flags; + __u8 padding[2]; + __u16 usage_id; }; /** @@ -302,17 +310,17 @@ struct base_jit_alloc_info_11_5 { * 11.20: added @heap_info_gpu_addr */ struct base_jit_alloc_info { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extension; - u8 id; - u8 bin_id; - u8 max_allocations; - u8 flags; - u8 padding[2]; - u16 usage_id; - u64 heap_info_gpu_addr; + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; + __u8 bin_id; + __u8 max_allocations; + __u8 flags; + __u8 padding[2]; + __u16 usage_id; + __u64 heap_info_gpu_addr; }; enum base_external_resource_access { @@ -321,7 +329,7 @@ enum base_external_resource_access { }; struct base_external_resource { - u64 ext_resource; + __u64 ext_resource; }; @@ -339,13 +347,13 @@ struct base_external_resource { * sized at allocation time. */ struct base_external_resource_list { - u64 count; + __u64 count; struct base_external_resource ext_res[1]; }; struct base_jd_debug_copy_buffer { - u64 address; - u64 size; + __u64 address; + __u64 size; struct base_external_resource extres; }; @@ -457,7 +465,7 @@ struct base_jd_debug_copy_buffer { * population count, since faulty cores may be disabled during production, * producing a non-contiguous mask. * - * The memory requirements for this algorithm can be determined either by a u64 + * The memory requirements for this algorithm can be determined either by a __u64 * population count on the L2_PRESENT mask (a LUT helper already is * required for the above), or simple assumption that there can be no more than * 16 coherent groups, since core groups are typically 4 cores. @@ -496,16 +504,16 @@ struct base_jd_debug_copy_buffer { * @num_exec_engines: The number of execution engines. */ struct mali_base_gpu_core_props { - u32 product_id; - u16 version_status; - u16 minor_revision; - u16 major_revision; - u16 padding; - u32 gpu_freq_khz_max; - u32 log2_program_counter_size; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - u64 gpu_available_memory_size; - u8 num_exec_engines; + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; + __u8 num_exec_engines; }; /* @@ -513,15 +521,15 @@ struct mali_base_gpu_core_props { * required by upper-level apis. */ struct mali_base_gpu_l2_cache_props { - u8 log2_line_size; - u8 log2_cache_size; - u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ - u8 padding[5]; + __u8 log2_line_size; + __u8 log2_cache_size; + __u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ + __u8 padding[5]; }; struct mali_base_gpu_tiler_props { - u32 bin_size_bytes; /* Max is 4*2^15 */ - u32 max_active_levels; /* Max is 2^15 */ + __u32 bin_size_bytes; /* Max is 4*2^15 */ + __u32 max_active_levels; /* Max is 2^15 */ }; /** @@ -543,15 +551,15 @@ struct mali_base_gpu_tiler_props { * allocated for */ struct mali_base_gpu_thread_props { - u32 max_threads; - u32 max_workgroup_size; - u32 max_barrier_size; - u16 max_registers; - u8 max_task_queue; - u8 max_thread_group_split; - u8 impl_tech; - u8 padding[3]; - u32 tls_alloc; + __u32 max_threads; + __u32 max_workgroup_size; + __u32 max_barrier_size; + __u16 max_registers; + __u8 max_task_queue; + __u8 max_thread_group_split; + __u8 impl_tech; + __u8 padding[3]; + __u32 tls_alloc; }; /** @@ -570,9 +578,9 @@ struct mali_base_gpu_thread_props { * wastage. */ struct mali_base_gpu_coherent_group { - u64 core_mask; - u16 num_cores; - u16 padding[3]; + __u64 core_mask; + __u16 num_cores; + __u16 padding[3]; }; /** @@ -591,17 +599,17 @@ struct mali_base_gpu_coherent_group { * @group: Descriptors of coherent groups * * Note that the sizes of the members could be reduced. However, the \c group - * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte + * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte * aligned, thus leading to wastage if the other members sizes were reduced. * * The groups are sorted by core mask. The core masks are non-repeating and do * not intersect. */ struct mali_base_gpu_coherent_group_info { - u32 num_groups; - u32 num_core_groups; - u32 coherency; - u32 padding; + __u32 num_groups; + __u32 num_core_groups; + __u32 coherency; + __u32 padding; struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; }; @@ -644,37 +652,37 @@ struct mali_base_gpu_coherent_group_info { * */ struct gpu_raw_gpu_props { - u64 shader_present; - u64 tiler_present; - u64 l2_present; - u64 stack_present; - u32 l2_features; - u32 core_features; - u32 mem_features; - u32 mmu_features; + __u64 shader_present; + __u64 tiler_present; + __u64 l2_present; + __u64 stack_present; + __u32 l2_features; + __u32 core_features; + __u32 mem_features; + __u32 mmu_features; - u32 as_present; + __u32 as_present; - u32 js_present; - u32 js_features[GPU_MAX_JOB_SLOTS]; - u32 tiler_features; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u32 js_present; + __u32 js_features[GPU_MAX_JOB_SLOTS]; + __u32 tiler_features; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - u32 gpu_id; + __u32 gpu_id; - u32 thread_max_threads; - u32 thread_max_workgroup_size; - u32 thread_max_barrier_size; - u32 thread_features; + __u32 thread_max_threads; + __u32 thread_max_workgroup_size; + __u32 thread_max_barrier_size; + __u32 thread_features; /* * Note: This is the _selected_ coherency mode rather than the * available modes as exposed in the coherency_features register. */ - u32 coherency_mode; + __u32 coherency_mode; - u32 thread_tls_alloc; - u64 gpu_features; + __u32 thread_tls_alloc; + __u64 gpu_features; }; /** @@ -695,7 +703,7 @@ struct gpu_raw_gpu_props { struct base_gpu_props { struct mali_base_gpu_core_props core_props; struct mali_base_gpu_l2_cache_props l2_props; - u64 unused_1; + __u64 unused_1; struct mali_base_gpu_tiler_props tiler_props; struct mali_base_gpu_thread_props thread_props; struct gpu_raw_gpu_props raw_props; @@ -717,7 +725,7 @@ struct base_gpu_props { * * Return: group ID(0~15) extracted from the parameter */ -static inline int base_mem_group_id_get(base_mem_alloc_flags flags) +static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags) { LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> @@ -736,7 +744,7 @@ static inline int base_mem_group_id_get(base_mem_alloc_flags flags) * The return value can be combined with other flags against base_mem_alloc * to identify a specific memory group. */ -static inline base_mem_alloc_flags base_mem_group_id_set(int id) +static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id) { if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { /* Set to default value when id is out of range. */ @@ -757,7 +765,7 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id) * * Return: Bitmask of flags to pass to base_context_init. */ -static inline base_context_create_flags base_context_mmu_group_id_set( +static __inline__ base_context_create_flags base_context_mmu_group_id_set( int const group_id) { LOCAL_ASSERT(group_id >= 0); @@ -777,7 +785,7 @@ static inline base_context_create_flags base_context_mmu_group_id_set( * * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). */ -static inline int base_context_mmu_group_id_get( +static __inline__ int base_context_mmu_group_id_get( base_context_create_flags const flags) { LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); @@ -809,4 +817,10 @@ static inline int base_context_mmu_group_id_get( BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ BASE_TIMEINFO_USER_SOURCE_FLAG) -#endif /* _BASE_KERNEL_H_ */ +/* Maximum number of source allocations allowed to create an alias allocation. + * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array + * layers, since each cube map in the array will have 6 faces. + */ +#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576) + +#endif /* _UAPI_BASE_KERNEL_H_ */ diff --git a/mali_kbase/mali_base_mem_priv.h b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h index 9f59a4f..982bd3d 100644 --- a/mali_kbase/mali_base_mem_priv.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,12 @@ * */ -#ifndef _BASE_MEM_PRIV_H_ -#define _BASE_MEM_PRIV_H_ +#ifndef _UAPI_BASE_MEM_PRIV_H_ +#define _UAPI_BASE_MEM_PRIV_H_ + +#include <linux/types.h> + +#include "mali_base_kernel.h" #define BASE_SYNCSET_OP_MSYNC (1U << 0) #define BASE_SYNCSET_OP_CSYNC (1U << 1) @@ -45,10 +49,10 @@ */ struct basep_syncset { struct base_mem_handle mem_handle; - u64 user_addr; - u64 size; - u8 type; - u8 padding[7]; + __u64 user_addr; + __u64 size; + __u8 type; + __u8 padding[7]; }; -#endif +#endif /* _UAPI_BASE_MEM_PRIV_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h index 9f2172b..615dbb0 100644 --- a/mali_kbase/mali_kbase_hwcnt_reader.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -19,17 +19,18 @@ * */ -#ifndef _KBASE_HWCNT_READER_H_ -#define _KBASE_HWCNT_READER_H_ +#ifndef _UAPI_KBASE_HWCNT_READER_H_ +#define _UAPI_KBASE_HWCNT_READER_H_ #include <stddef.h> +#include <linux/types.h> /* The ids of ioctl commands. */ #define KBASE_HWCNT_READER 0xBE -#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) -#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) -#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) -#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) +#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32) +#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32) +#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32) +#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32) #define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ offsetof(struct kbase_hwcnt_reader_metadata, cycles)) #define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ @@ -38,10 +39,10 @@ offsetof(struct kbase_hwcnt_reader_metadata, cycles)) #define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) -#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) -#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) -#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) +#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32) +#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32) +#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32) +#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32) #define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ _IOW(KBASE_HWCNT_READER, 0xFF, \ struct kbase_hwcnt_reader_api_version) @@ -53,8 +54,8 @@ * @shader_cores: the cycles that have elapsed on the GPU shader cores */ struct kbase_hwcnt_reader_metadata_cycles { - u64 top; - u64 shader_cores; + __u64 top; + __u64 shader_cores; }; /** @@ -65,9 +66,9 @@ struct kbase_hwcnt_reader_metadata_cycles { * @cycles: the GPU cycles that occurred since the last sample */ struct kbase_hwcnt_reader_metadata { - u64 timestamp; - u32 event_id; - u32 buffer_idx; + __u64 timestamp; + __u32 event_id; + __u32 buffer_idx; struct kbase_hwcnt_reader_metadata_cycles cycles; }; @@ -84,7 +85,6 @@ enum base_hwcnt_reader_event { BASE_HWCNT_READER_EVENT_PERIODIC, BASE_HWCNT_READER_EVENT_PREJOB, BASE_HWCNT_READER_EVENT_POSTJOB, - BASE_HWCNT_READER_EVENT_COUNT }; @@ -97,9 +97,9 @@ enum base_hwcnt_reader_event { * @features: available features in this API version */ struct kbase_hwcnt_reader_api_version { - u32 version; - u32 features; + __u32 version; + __u32 features; }; -#endif /* _KBASE_HWCNT_READER_H_ */ +#endif /* _UAPI_KBASE_HWCNT_READER_H_ */ diff --git a/mali_kbase/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h index 36dfc34..5ca528a 100644 --- a/mali_kbase/mali_kbase_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -19,8 +19,8 @@ * */ -#ifndef _KBASE_IOCTL_H_ -#define _KBASE_IOCTL_H_ +#ifndef _UAPI_KBASE_IOCTL_H_ +#define _UAPI_KBASE_IOCTL_H_ #ifdef __cpluscplus extern "C" { @@ -64,16 +64,16 @@ struct kbase_ioctl_set_flags { * @flags may be used in the future to request a different format for the * buffer. With @flags == 0 the following format is used. * - * The buffer will be filled with pairs of values, a u32 key identifying the + * The buffer will be filled with pairs of values, a __u32 key identifying the * property followed by the value. The size of the value is identified using * the bottom bits of the key. The value then immediately followed the key and * is tightly packed (there is no padding). All keys and values are * little-endian. * - * 00 = u8 - * 01 = u16 - * 10 = u32 - * 11 = u64 + * 00 = __u8 + * 01 = __u16 + * 10 = __u32 + * 11 = __u64 */ struct kbase_ioctl_get_gpuprops { __u64 buffer; @@ -134,9 +134,9 @@ union kbase_ioctl_mem_query { #define KBASE_IOCTL_MEM_QUERY \ _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) -#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1) -#define KBASE_MEM_QUERY_VA_SIZE ((u64)2) -#define KBASE_MEM_QUERY_FLAGS ((u64)3) +#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) +#define KBASE_MEM_QUERY_FLAGS ((__u64)3) /** * struct kbase_ioctl_mem_free - Free a memory region @@ -529,7 +529,7 @@ struct kbase_ioctl_mem_profile_add { /** * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource * @count: Number of resources - * @address: Array of u64 GPU addresses of the external resources to map + * @address: Array of __u64 GPU addresses of the external resources to map */ struct kbase_ioctl_sticky_resource_map { __u64 count; @@ -543,7 +543,7 @@ struct kbase_ioctl_sticky_resource_map { * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was * previously permanently mapped * @count: Number of resources - * @address: Array of u64 GPU addresses of the external resources to unmap + * @address: Array of __u64 GPU addresses of the external resources to unmap */ struct kbase_ioctl_sticky_resource_unmap { __u64 count; @@ -581,7 +581,6 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { #define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) - #define KBASE_IOCTL_CINSTR_GWT_START \ _IO(KBASE_IOCTL_TYPE, 33) @@ -642,7 +641,7 @@ struct kbase_ioctl_mem_exec_init { * @out: Output parameters * @out.sec: Integer field of the monotonic time, unit in seconds. * @out.nsec: Fractional sec of the monotonic time, in nano-seconds. - * @out.padding: Unused, for u64 alignment + * @out.padding: Unused, for __u64 alignment * @out.timestamp: System wide timestamp (counter) value. * @out.cycle_counter: GPU cycle counter value. */ @@ -675,6 +674,19 @@ struct kbase_ioctl_context_priority_check { #define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) +/** + * struct kbase_ioctl_set_limited_core_count - Set the limited core count. + * + * @max_core_count: Maximum core count + */ +struct kbase_ioctl_set_limited_core_count { + __u8 max_core_count; +}; + +#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ + _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) + + /*************** * test ioctls * ***************/ @@ -685,23 +697,6 @@ struct kbase_ioctl_context_priority_check { #define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) -/** - * struct kbase_ioctl_tlstream_test - Start a timeline stream test - * - * @tpw_count: number of trace point writers in each context - * @msg_delay: time delay between tracepoints from one writer in milliseconds - * @msg_count: number of trace points written by one writer - * @aux_msg: if non-zero aux messages will be included - */ -struct kbase_ioctl_tlstream_test { - __u32 tpw_count; - __u32 msg_delay; - __u32 msg_count; - __u32 aux_msg; -}; - -#define KBASE_IOCTL_TLSTREAM_TEST \ - _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) /** * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes @@ -838,4 +833,4 @@ struct kbase_ioctl_tlstream_stats { } #endif -#endif +#endif /* _UAPI_KBASE_IOCTL_H_ */ diff --git a/mali_kbase/mali_kbase_kinstr_jm_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h index cbd495f..cb782bd 100644 --- a/mali_kbase/mali_kbase_kinstr_jm_reader.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,8 +34,8 @@ * 8. Close the file descriptor */ -#ifndef _KBASE_KINSTR_JM_READER_H_ -#define _KBASE_KINSTR_JM_READER_H_ +#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_ +#define _UAPI_KBASE_KINSTR_JM_READER_H_ /** * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom @@ -66,4 +66,4 @@ enum kbase_kinstr_jm_reader_atom_state { KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT }; -#endif /* _KBASE_KINSTR_JM_READER_H_ */ +#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */ diff --git a/mali_kbase/mali_uk.h b/common/include/uapi/gpu/arm/midgard/mali_uk.h index a499e02..81cbb9e 100644 --- a/mali_kbase/mali_uk.h +++ b/common/include/uapi/gpu/arm/midgard/mali_uk.h @@ -24,12 +24,12 @@ * and kernel side of the User-Kernel interface. */ -#ifndef _UK_H_ -#define _UK_H_ +#ifndef _UAPI_UK_H_ +#define _UAPI_UK_H_ #ifdef __cplusplus extern "C" { -#endif /* __cplusplus */ +#endif /* __cplusplus */ /** * DOC: uk_api User-Kernel Interface API @@ -66,5 +66,5 @@ enum uk_client_id { #ifdef __cplusplus } -#endif /* __cplusplus */ -#endif /* _UK_H_ */ +#endif /* __cplusplus */ +#endif /* _UAPI_UK_H_ */ diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 1c9e109..5463a24 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -20,11 +20,11 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r30p0-01eac0" +MALI_RELEASE_NAME ?= '"r31p0-01eac0"' # Paths required for build -# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +# make $(src) as absolute path if it is not already, by prefixing $(srctree) src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) KBASE_PATH = $(src) KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy @@ -64,7 +64,7 @@ DEFINES = \ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ - -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) @@ -114,7 +114,6 @@ SRC := \ mali_kbase_mem_profile_debugfs.c \ mmu/mali_kbase_mmu.c \ mmu/mali_kbase_mmu_hw_direct.c \ - mmu/mali_kbase_mmu_mode_lpae.c \ mmu/mali_kbase_mmu_mode_aarch64.c \ mali_kbase_disjoint_events.c \ mali_kbase_debug_mem_view.c \ @@ -170,9 +169,6 @@ ifeq ($(CONFIG_MALI_CINSTR_GWT),y) SRC += mali_kbase_gwt.c endif -ifeq ($(MALI_UNIT_TEST),1) - SRC += tl/mali_kbase_timeline_test.c -endif ifeq ($(MALI_CUSTOMER_RELEASE),0) SRC += mali_kbase_regs_dump_debugfs.c diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 2ba2d77..84103af 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -27,7 +27,7 @@ ifeq ($(KBUILD_EXTMOD),) export CONFIG_MALI_MIDGARD?=m ifneq ($(CONFIG_MALI_MIDGARD),n) -export CONFIF_MALI_CSF_SUPPORT?=n +export CONFIG_MALI_CSF_SUPPORT?=n export CONFIG_MALI_KUTF?=m export CONFIG_MALI_REAL_HW?=y @@ -39,7 +39,7 @@ export CONFIG_MALI_DEVFREQ?=y endif DEFINES += -DCONFIG_MALI_MIDGARD=$(CONFIG_MALI_MIDGARD) \ - -DCONFIF_MALI_CSF_SUPPORT=$(CONFIF_MALI_CSF_SUPPORT) \ + -DCONFIG_MALI_CSF_SUPPORT=$(CONFIG_MALI_CSF_SUPPORT) \ -DCONFIG_MALI_KUTF=$(CONFIG_MALI_KUTF) \ -DCONFIG_MALI_REAL_HW=$(CONFIG_MALI_REAL_HW) \ -DCONFIG_MALI_GATOR_SUPPORT=$(CONFIG_MALI_GATOR_SUPPORT) \ @@ -50,13 +50,8 @@ export DEFINES endif endif -BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. KBASE_PATH_RELATIVE = $(CURDIR) -ifeq ($(CONFIG_MALI_BUSLOG),y) -#Add bus logger symbols -EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers -endif # we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions all: diff --git a/mali_kbase/arbiter/mali_kbase_arbif.c b/mali_kbase/arbiter/mali_kbase_arbif.c index 5ed5f80..7d6ab0c 100644 --- a/mali_kbase/arbiter/mali_kbase_arbif.c +++ b/mali_kbase/arbiter/mali_kbase_arbif.c @@ -30,6 +30,66 @@ #include <linux/of_platform.h> #include "mali_kbase_arbiter_interface.h" +/* Arbiter interface version against which was implemented this module */ +#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 +#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ + MALI_KBASE_ARBITER_INTERFACE_VERSION +#error "Unsupported Mali Arbiter interface version." +#endif + +static void on_max_config(struct device *dev, uint32_t max_l2_slices, + uint32_t max_core_mask) +{ + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + if (!max_l2_slices || !max_core_mask) { + dev_dbg(dev, + "%s(): max_config ignored as one of the fields is zero", + __func__); + return; + } + + /* set the max config info in the kbase device */ + kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask); +} + +/** + * on_update_freq() - Updates GPU clock frequency + * @dev: arbiter interface device handle + * @freq: GPU clock frequency value reported from arbiter + * + * call back function to update GPU clock frequency with + * new value from arbiter + */ +static void on_update_freq(struct device *dev, uint32_t freq) +{ + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq); +} + /** * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop * @dev: arbiter interface device handle @@ -38,7 +98,18 @@ */ static void on_gpu_stop(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); @@ -52,7 +123,18 @@ static void on_gpu_stop(struct device *dev) */ static void on_gpu_granted(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); @@ -66,7 +148,18 @@ static void on_gpu_granted(struct device *dev) */ static void on_gpu_lost(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); } @@ -122,6 +215,12 @@ int kbase_arbif_init(struct kbase_device *kbdev) ops.arb_vm_gpu_stop = on_gpu_stop; ops.arb_vm_gpu_granted = on_gpu_granted; ops.arb_vm_gpu_lost = on_gpu_lost; + ops.arb_vm_max_config = on_max_config; + ops.arb_vm_update_freq = on_update_freq; + + + kbdev->arb.arb_freq.arb_freq = 0; + mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); /* register kbase arbiter_if callbacks */ if (arb_if->vm_ops.vm_arb_register_dev) { @@ -133,6 +232,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) return err; } } + #else /* CONFIG_OF */ dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); kbdev->arb.arb_dev = NULL; @@ -162,6 +262,22 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) } /** + * kbase_arbif_get_max_config() - Request max config info + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting max config info + */ +void kbase_arbif_get_max_config(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_get_max_config(arb_if); + } +} + +/** * kbase_arbif_gpu_request() - Request GPU from * @kbdev: The kbase device structure for the device (must be a valid pointer) * @@ -173,6 +289,7 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev) if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { dev_dbg(kbdev->dev, "%s\n", __func__); + KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); arb_if->vm_ops.vm_arb_gpu_request(arb_if); } } diff --git a/mali_kbase/arbiter/mali_kbase_arbif.h b/mali_kbase/arbiter/mali_kbase_arbif.h index c6a2031..710559c 100644 --- a/mali_kbase/arbiter/mali_kbase_arbif.h +++ b/mali_kbase/arbiter/mali_kbase_arbif.h @@ -72,6 +72,14 @@ int kbase_arbif_init(struct kbase_device *kbdev); void kbase_arbif_destroy(struct kbase_device *kbdev); /** + * kbase_arbif_get_max_config() - Request max config info + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting max config info + */ +void kbase_arbif_get_max_config(struct kbase_device *kbdev); + +/** * kbase_arbif_gpu_request() - Send GPU request message to the arbiter * @kbdev: The kbase device structure for the device (must be a valid pointer) * diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h index c754b6e..586c5d4 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h +++ b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,6 +44,8 @@ * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU * @vm_arb_starting: Work queue resume in progress * @vm_arb_stopping: Work queue suspend in progress + * @interrupts_installed: Flag set when interrupts are installed + * @vm_request_timer: Timer to monitor GPU request */ struct kbase_arbiter_vm_state { struct kbase_device *kbdev; @@ -55,6 +57,8 @@ struct kbase_arbiter_vm_state { struct work_struct vm_resume_work; bool vm_arb_starting; bool vm_arb_stopping; + bool interrupts_installed; + struct hrtimer vm_request_timer; }; /** @@ -62,10 +66,12 @@ struct kbase_arbiter_vm_state { * allocated from the probe method of Mali driver * @arb_if: Pointer to the arbiter interface device * @arb_dev: Pointer to the arbiter device + * @arb_freq: GPU clock frequency retrieved from arbiter. */ struct kbase_arbiter_device { struct arbiter_if_dev *arb_if; struct device *arb_dev; + struct kbase_arbiter_freq arb_freq; }; #endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h index 958b0a1..84389e8 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h +++ b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #define _MALI_KBASE_ARBITER_INTERFACE_H_ /** - * @brief Mali arbiter interface version + * Mali arbiter interface version * * This specifies the current version of the configuration interface. Whenever * the arbiter interface changes, so that integration effort is required, the @@ -39,8 +39,15 @@ * 1 - Added the Mali arbiter configuration interface. * 2 - Strip out reference code from header * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) + * 4 - Added max_config support + * 5 - Added GPU clock frequency reporting support from arbiter */ -#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 +#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5 + +/** + * NO_FREQ is used in case platform doesn't support reporting frequency + */ +#define NO_FREQ 0 struct arbiter_if_dev; @@ -86,6 +93,27 @@ struct arbiter_if_arb_vm_ops { * If successful, will respond with a vm_arb_gpu_stopped message. */ void (*arb_vm_gpu_lost)(struct device *dev); + + /** + * arb_vm_max_config() - Send max config info to the VM + * @dev: The arbif kernel module device. + * @max_l2_slices: The maximum number of L2 slices. + * @max_core_mask: The largest core mask. + * + * Informs KBase the maximum resources that can be allocated to the + * partition in use. + */ + void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices, + uint32_t max_core_mask); + + /** + * arb_vm_update_freq() - GPU clock frequency has been updated + * @dev: The arbif kernel module device. + * @freq: GPU clock frequency value reported from arbiter + * + * Informs KBase that the GPU clock frequency has been updated. + */ + void (*arb_vm_update_freq)(struct device *dev, uint32_t freq); }; /** @@ -115,6 +143,13 @@ struct arbiter_if_vm_arb_ops { void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); /** + * vm_arb_gpu_get_max_config() - Request the max config from the + * Arbiter. + * @arbif_dev: The arbiter interface we want to issue the request. + */ + void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev); + + /** * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. * @arbif_dev: The arbiter interface we want to issue the request. */ diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c index 08a6872..456cc70 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c +++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,14 +20,33 @@ */ /** - * @file mali_kbase_arbiter_pm.c + * @file * Mali arbiter power manager state machine and APIs */ #include <mali_kbase.h> #include <mali_kbase_pm.h> +#include <mali_kbase_hwaccess_jm.h> #include <mali_kbase_irq_internal.h> +#include <mali_kbase_hwcnt_context.h> +#include <mali_kbase_pm_internal.h> #include <tl/mali_kbase_tracepoints.h> +#include <mali_kbase_gpuprops.h> + +/* A dmesg warning will occur if the GPU is not granted + * after the following time (in milliseconds) has ellapsed. + */ +#define GPU_REQUEST_TIMEOUT 1000 + +#define MAX_L2_SLICES_MASK 0xFF + +/* Maximum time in ms, before deferring probe incase + * GPU_GRANTED message is not received + */ +static int gpu_req_timeout = 1; +module_param(gpu_req_timeout, int, 0644); +MODULE_PARM_DESC(gpu_req_timeout, + "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( @@ -195,6 +214,60 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) } /** + * request_timer_callback() - Issue warning on request timer expiration + * @timer: Request hr timer data + * + * Called when the Arbiter takes too long to grant the GPU after a + * request has been made. Issues a warning in dmesg. + * + * Return: Always returns HRTIMER_NORESTART + */ +static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, + struct kbase_arbiter_vm_state, vm_request_timer); + + KBASE_DEBUG_ASSERT(arb_vm_state); + KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); + + dev_warn(arb_vm_state->kbdev->dev, + "Still waiting for GPU to be granted from Arbiter after %d ms\n", + GPU_REQUEST_TIMEOUT); + return HRTIMER_NORESTART; +} + +/** + * start_request_timer() - Start a timer after requesting GPU + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Start a timer to track when kbase is waiting for the GPU from the + * Arbiter. If the timer expires before GPU is granted, a warning in + * dmesg will be issued. + */ +static void start_request_timer(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + hrtimer_start(&arb_vm_state->vm_request_timer, + HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), + HRTIMER_MODE_REL); +} + +/** + * cancel_request_timer() - Stop the request timer + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Stops the request timer once GPU has been granted. Safe to call + * even if timer is no longer running. + */ +static void cancel_request_timer(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + hrtimer_cancel(&arb_vm_state->vm_request_timer); +} + +/** * kbase_arbiter_pm_early_init() - Initialize arbiter for VM * Paravirtualized use. * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -230,6 +303,10 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); arb_vm_state->vm_arb_starting = false; atomic_set(&kbdev->pm.gpu_users_waiting, 0); + hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + arb_vm_state->vm_request_timer.function = + request_timer_callback; kbdev->pm.arb_vm_state = arb_vm_state; err = kbase_arbif_init(kbdev); @@ -237,17 +314,31 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) dev_err(kbdev->dev, "Failed to initialise arbif module\n"); goto arbif_init_fail; } + if (kbdev->arb.arb_if) { kbase_arbif_gpu_request(kbdev); dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); - wait_event(arb_vm_state->vm_state_wait, + err = wait_event_timeout(arb_vm_state->vm_state_wait, arb_vm_state->vm_state == - KBASE_VM_STATE_INITIALIZING_WITH_GPU); + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + msecs_to_jiffies(gpu_req_timeout)); + + if (!err) { + dev_dbg(kbdev->dev, + "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", + gpu_req_timeout); + err = -EPROBE_DEFER; + goto arbif_eprobe_defer; + } + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment - done\n"); } return 0; +arbif_eprobe_defer: + kbase_arbiter_pm_early_term(kbdev); + return err; arbif_init_fail: destroy_workqueue(arb_vm_state->vm_arb_wq); kfree(arb_vm_state); @@ -265,14 +356,15 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + cancel_request_timer(kbdev); mutex_lock(&arb_vm_state->vm_state_lock); if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, false); } mutex_unlock(&arb_vm_state->vm_state_lock); - kbase_arbif_destroy(kbdev); destroy_workqueue(arb_vm_state->vm_arb_wq); + kbase_arbif_destroy(kbdev); arb_vm_state->vm_arb_wq = NULL; kfree(kbdev->pm.arb_vm_state); kbdev->pm.arb_vm_state = NULL; @@ -282,19 +374,36 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * Releases interrupts if needed (GPU is available) otherwise does nothing + * Releases interrupts and set the interrupt flag to false */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; mutex_lock(&arb_vm_state->vm_state_lock); - if (!kbdev->arb.arb_if || - arb_vm_state->vm_state > - KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + if (arb_vm_state->interrupts_installed == true) { + arb_vm_state->interrupts_installed = false; kbase_release_interrupts(kbdev); + } + mutex_unlock(&arb_vm_state->vm_state_lock); +} +/** + * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Install interrupts and set the interrupt_install flag to true. + */ +int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + int err; + + mutex_lock(&arb_vm_state->vm_state_lock); + arb_vm_state->interrupts_installed = true; + err = kbase_install_interrupts(kbdev); mutex_unlock(&arb_vm_state->vm_state_lock); + return err; } /** @@ -317,7 +426,12 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); - kbase_release_interrupts(kbdev); + + if (arb_vm_state->interrupts_installed) { + arb_vm_state->interrupts_installed = false; + kbase_release_interrupts(kbdev); + } + switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STOPPING_ACTIVE: request_gpu = true; @@ -338,6 +452,71 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, request_gpu); + if (request_gpu) + start_request_timer(kbdev); +} + +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, + uint32_t max_l2_slices, + uint32_t max_core_mask) +{ + struct kbase_arbiter_vm_state *arb_vm_state; + struct max_config_props max_config; + + if (!kbdev) + return; + + /* Mask the max_l2_slices as it is stored as 8 bits into kbase */ + max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK; + max_config.core_mask = max_core_mask; + arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + /* Just set the max_props in kbase during initialization. */ + if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING) + kbase_gpuprops_set_max_config(kbdev, &max_config); + else + dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state; + int result = -EINVAL; + + if (!kbdev) + return result; + + /* First check the GPU_LOST state */ + kbase_pm_lock(kbdev); + if (kbase_pm_is_gpu_lost(kbdev)) { + kbase_pm_unlock(kbdev); + return 0; + } + kbase_pm_unlock(kbdev); + + /* Then the arbitration state machine */ + arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + result = 0; + break; + default: + result = 1; + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + + return result; } /** @@ -351,6 +530,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; lockdep_assert_held(&arb_vm_state->vm_state_lock); + cancel_request_timer(kbdev); switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_INITIALIZING: kbase_arbiter_pm_vm_set_state(kbdev, @@ -358,7 +538,14 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) break; case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); + arb_vm_state->interrupts_installed = true; kbase_install_interrupts(kbdev); + /* + * GPU GRANTED received while in stop can be a result of a + * repartitioning. + */ + kbase_gpuprops_req_curr_config_update(kbdev); + /* curr_config will be updated while resuming the PM. */ queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_resume_work); break; @@ -591,6 +778,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); /* Release lock and block resume OS function until we have * asynchronously received the GRANT message from the Arbiter and @@ -764,6 +952,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); } else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) break; @@ -811,3 +1000,60 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, } return res; } + +/** + * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received + * from arbiter. + * @arb_freq - Pointer to struchture holding GPU clock frequenecy data + * @freq - New frequency value + */ +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, + uint32_t freq) +{ + mutex_lock(&arb_freq->arb_freq_lock); + arb_freq->arb_freq = freq; + mutex_unlock(&arb_freq->arb_freq_lock); +} + +/** + * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns pointer to structure holding GPU clock frequency data reported from + * arbiter, only index 0 is valid. + */ +static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + if (index == 0) + return &kbdev->arb.arb_freq; + return NULL; +} + +/** + * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns the GPU clock frequency value saved when gpu is granted from arbiter + */ +static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ + uint32_t freq; + struct kbase_arbiter_freq *arb_dev_freq = + (struct kbase_arbiter_freq *) gpu_clk_handle; + + mutex_lock(&arb_dev_freq->arb_freq_lock); + freq = arb_dev_freq->arb_freq; + mutex_unlock(&arb_dev_freq->arb_freq_lock); + return freq; +} + +struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = { + .get_gpu_clk_rate = get_arb_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_arb_gpu_clk, + .gpu_clk_notifier_register = NULL, + .gpu_clk_notifier_unregister = NULL +}; diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h index ef82271..0f74b63 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h +++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -93,11 +93,19 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * Releases interrupts if needed (GPU is available) otherwise does nothing + * Releases interrupts and set the interrupt flag to false */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); /** + * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Install interrupts and set the interrupt_install flag to true. + */ +int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); + +/** * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine * @kbdev: The kbase device structure for the device (must be a valid pointer) * @@ -133,4 +141,42 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, */ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); +/** + * kbase_arbiter_set_max_config() - Set the max config data in kbase device. + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @max_l2_slices: The maximum number of L2 slices. + * @max_core_mask: The largest core mask. + * + * This function handles a stop event for the VM. + * It will update the VM state and forward the stop event to the driver. + */ +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, + uint32_t max_l2_slices, + uint32_t max_core_mask); + +/** + * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the VM does not have access, 1 if it does, and a negative number + * if an error occurred + */ +int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev); + +extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops; + +/** + * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved + * from arbiter + * @arb_freq: GPU clock frequency value + * @arb_freq_lock: Mutex protecting access to arbfreq value + */ +struct kbase_arbiter_freq { + uint32_t arb_freq; + struct mutex arb_freq_lock; +}; + +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, + uint32_t freq); + #endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 84fb1fc..fcf4e5b 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014-2016, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,7 @@ #define _KBASE_CACHE_POLICY_BACKEND_H_ #include "mali_kbase.h" -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> /** * kbase_cache_set_coherency_mode() - Sets the system coherency mode diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index dcd1b02..7076ab4 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,38 @@ #define CLK_RATE_TRACE_OPS (NULL) #endif +/** + * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops. + * @kbdev: Pointer to kbase device, used to check if arbitration is enabled + * when compiled with arbiter support. + * Return: Pointer to clk trace ops if supported or NULL. + */ +static struct kbase_clk_rate_trace_op_conf * +get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused) +{ + /* base case */ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + const void *arbiter_if_node; + + if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) + return callbacks; + + arbiter_if_node = + of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ + if (arbiter_if_node) + callbacks = &arb_clk_rate_trace_ops; + else + dev_dbg(kbdev->dev, + "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); + +#endif + + return callbacks; +} + static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { @@ -69,12 +101,13 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb, static int gpu_clk_data_init(struct kbase_device *kbdev, void *gpu_clk_handle, unsigned int index) { - struct kbase_clk_rate_trace_op_conf *callbacks = - (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_rate_trace_op_conf *callbacks; struct kbase_clk_data *clk_data; struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; int ret = 0; + callbacks = get_clk_rate_trace_callbacks(kbdev); + if (WARN_ON(!callbacks) || WARN_ON(!gpu_clk_handle) || WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) @@ -108,8 +141,9 @@ static int gpu_clk_data_init(struct kbase_device *kbdev, clk_data->clk_rate_change_nb.notifier_call = gpu_clk_rate_change_notifier; - ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle, - &clk_data->clk_rate_change_nb); + if (callbacks->gpu_clk_notifier_register) + ret = callbacks->gpu_clk_notifier_register(kbdev, + gpu_clk_handle, &clk_data->clk_rate_change_nb); if (ret) { dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); kfree(clk_data); @@ -120,12 +154,13 @@ static int gpu_clk_data_init(struct kbase_device *kbdev, int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) { - struct kbase_clk_rate_trace_op_conf *callbacks = - (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_rate_trace_op_conf *callbacks; struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; unsigned int i; int ret = 0; + callbacks = get_clk_rate_trace_callbacks(kbdev); + spin_lock_init(&clk_rtm->lock); INIT_LIST_HEAD(&clk_rtm->listeners); @@ -186,9 +221,10 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) if (!clk_rtm->clks[i]) break; - clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( - kbdev, clk_rtm->clks[i]->gpu_clk_handle, - &clk_rtm->clks[i]->clk_rate_change_nb); + if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister + (kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); kfree(clk_rtm->clks[i]); } diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 07767c2..9b82184 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -643,7 +643,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) /* Record the maximum frequency possible */ kbdev->gpu_props.props.core_props.gpu_freq_khz_max = dp->freq_table[0] / 1000; - }; + } err = kbase_devfreq_init_core_mask_table(kbdev); if (err) { diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 4254a64..7542209 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -121,6 +121,32 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, return -EIO; } +int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, + struct kbase_current_config_regdump *curr_config_regdump) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) + return -EINVAL; + + curr_config_regdump->mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES)); + + curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO)); + curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI)); + + curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO)); + curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI)); + + if (WARN_ON(kbase_is_gpu_removed(kbdev))) + return -EIO; + + return 0; + +} + int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { @@ -156,11 +182,15 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { u32 l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); + u32 l2_config = + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + if (kbase_is_gpu_removed(kbdev)) return -EIO; regdump->l2_features = l2_features; + regdump->l2_config = l2_config; } return 0; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 9cc425e..6868dc3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -107,7 +107,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, err = 0; - dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); return err; out_err: return err; @@ -167,7 +167,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx); err = 0; @@ -214,7 +214,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE); - dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); err = 0; @@ -325,7 +325,7 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); int kbase_instr_backend_init(struct kbase_device *kbdev) { - int ret = 0; + spin_lock_init(&kbdev->hwcnt.lock); kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; @@ -344,12 +344,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY; #endif #endif - return ret; + return 0; } void kbase_instr_backend_term(struct kbase_device *kbdev) { - (void)kbdev; + CSTD_UNUSED(kbdev); } #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h index 39b009d..05d5193 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014, 2016, 2018, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 7cfca97..e84f3a9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -40,10 +40,12 @@ #include <mali_kbase_regs_history_debugfs.h> static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, + const u64 affinity, const u64 limited_core_mask); static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, - int js) + int js, const u64 limited_core_mask) { u64 affinity; @@ -72,14 +74,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, */ if (js == 2 && num_core_groups > 1) affinity &= coherency_info->group[1].core_mask; - else + else if (num_core_groups > 1) affinity &= coherency_info->group[0].core_mask; + else + affinity &= kbdev->gpu_props.curr_config.shader_present; } else { /* Use all cores */ affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; } + if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { + /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */ + affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + } + if (unlikely(!affinity)) { #ifdef CONFIG_MALI_DEBUG u64 shaders_ready = @@ -89,6 +98,16 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, #endif affinity = kbdev->pm.backend.shaders_avail; + + if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { + /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ + affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + +#ifdef CONFIG_MALI_DEBUG + /* affinity should never be 0 */ + WARN_ON(!affinity); +#endif + } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), @@ -169,7 +188,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) } dev_dbg(kctx->kbdev->dev, - "Selected job chain 0x%llx for end atom %p in state %d\n", + "Selected job chain 0x%llx for end atom %pK in state %d\n", jc, (void *)katom, (int)rp->state); katom->jc = jc; @@ -193,7 +212,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); - dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, (void *)katom); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), @@ -201,7 +220,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> 32); - affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, + kctx->limited_core_mask); /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start @@ -257,7 +277,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom->start_timestamp = ktime_get(); /* GO ! */ - dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", + dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", katom, kctx, js, jc_head); KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, @@ -431,7 +451,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked( + kbdev, + RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } @@ -789,7 +811,7 @@ static int softstop_start_rp_nolock( if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { dev_dbg(kctx->kbdev->dev, - "Atom %p on job slot is not start RP\n", (void *)katom); + "Atom %pK on job slot is not start RP\n", (void *)katom); return -EPERM; } @@ -802,13 +824,13 @@ static int softstop_start_rp_nolock( rp->state != KBASE_JD_RP_RETRY)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg); if (WARN_ON(katom != rp->start_katom)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg, (void *)&rp->oom_reg_list); list_move_tail(®->link, &rp->oom_reg_list); dev_dbg(kctx->kbdev->dev, "Added region to list\n"); @@ -853,7 +875,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) if (timeout != 0) goto exit; - if (kbase_prepare_to_reset_gpu(kbdev)) { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { dev_err(kbdev->dev, "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ZAP_TIMEOUT); @@ -863,7 +885,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) /* Wait for the reset to complete */ kbase_reset_gpu_wait(kbdev); exit: - dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx); /* Ensure that the signallers of the waitqs have finished */ mutex_lock(&kctx->jctx.lock); @@ -924,7 +946,7 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { - dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); @@ -1337,6 +1359,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) /** * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU * @kbdev: kbase device + * @flags: Bitfield indicating impact of reset (see flag defines) * * This function just soft-stops all the slots to ensure that as many jobs as * possible are saved. @@ -1347,10 +1370,12 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags) { int i; + CSTD_UNUSED(flags); KBASE_DEBUG_ASSERT(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -1378,14 +1403,14 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) return true; } -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { - unsigned long flags; + unsigned long lock_flags; bool ret; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags); return ret; } @@ -1506,3 +1531,21 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwaccess.backend.reset_workq); } + +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, + const u64 affinity, const u64 limited_core_mask) +{ + const u64 result = affinity & limited_core_mask; + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, + "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", + (unsigned long int)affinity, + (unsigned long int)result, + (unsigned long int)limited_core_mask); +#else + CSTD_UNUSED(kbdev); +#endif + + return result; +} diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 7104658..5fdf9b6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1024,7 +1024,7 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); kbase_gpu_enqueue_atom(kbdev, katom); kbase_backend_slot_update(kbdev); @@ -1085,7 +1085,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_context *kctx = katom->kctx; dev_dbg(kbdev->dev, - "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", (void *)katom, completion_code, job_tail, js); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1205,7 +1205,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (job_tail != 0 && job_tail != katom->jc) { /* Some of the job has been executed */ dev_dbg(kbdev->dev, - "Update job chain address of atom %p to resume from 0x%llx\n", + "Update job chain address of atom %pK to resume from 0x%llx\n", (void *)katom, job_tail); katom->jc = job_tail; @@ -1266,7 +1266,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (katom) { dev_dbg(kbdev->dev, - "Cross-slot dependency %p has become runnable.\n", + "Cross-slot dependency %pK has become runnable.\n", (void *)katom); /* Check if there are lower priority jobs to soft stop */ @@ -1666,7 +1666,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) if (katom) dev_info(kbdev->dev, - " js%d idx%d : katom=%p gpu_rb_state=%d\n", + " js%d idx%d : katom=%pK gpu_rb_state=%d\n", js, idx, katom, katom->gpu_rb_state); else dev_info(kbdev->dev, " js%d idx%d : empty\n", diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index d28e7b0..cab222d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -257,7 +257,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) if (reset_needed) { dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } /* the timer is re-issued if there is contexts in the run-pool */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 921849b..0cfa93c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -498,7 +498,15 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) /* PM state was updated while we were doing the disable, * so we need to undo the disable we just performed. */ +#if MALI_USE_CSF + unsigned long lock_flags; + + kbase_csf_scheduler_spin_lock(kbdev, &lock_flags); +#endif kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, lock_flags); +#endif } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -664,10 +672,15 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) if (kbdev->pm.backend.hwcnt_disabled) { unsigned long flags; - +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif } /* Free any resources the policy allocated */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index c546766..3cf7608 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -102,10 +102,18 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); #ifdef CONFIG_MALI_DEVFREQ - return kbdev->pm.backend.ca_cores_enabled & debug_core_mask; + /* + * Although in the init we let the pm_backend->ca_cores_enabled to be + * the max config (it uses the base_gpu_props), at this function we need + * to limit it to be a subgroup of the curr config, otherwise the + * shaders state machine on the PM does not evolve. + */ + return kbdev->gpu_props.curr_config.shader_present & + kbdev->pm.backend.ca_cores_enabled & + debug_core_mask; #else - return kbdev->gpu_props.props.raw_props.shader_present & - debug_core_mask; + return kbdev->gpu_props.curr_config.shader_present & + debug_core_mask; #endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 1b4e141..0687a43 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -61,24 +61,9 @@ enum kbase_pm_core_type { KBASE_PM_CORE_STACK = STACK_PRESENT_LO }; -/** +/* * enum kbase_l2_core_state - The states used for the L2 cache & tiler power * state machine. - * - * @KBASE_L2_OFF: The L2 cache and tiler are off - * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on - * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. - * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being - * enabled - * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled - * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being - * disabled - * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest - * clock. Conditionally used. - * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off - * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off - * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state - * are unknown */ enum kbase_l2_core_state { #define KBASEP_L2_STATE(n) KBASE_L2_ ## n, @@ -87,26 +72,8 @@ enum kbase_l2_core_state { }; #if MALI_USE_CSF -/** +/* * enum kbase_mcu_state - The states used for the MCU state machine. - * - * @KBASE_MCU_OFF: The MCU is powered off. - * @KBASE_MCU_PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with - * firmware reloading) is in progress. - * @KBASE_MCU_ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration - * requests have been sent to the firmware. - * @KBASE_MCU_ON_HWCNT_ENABLE: The Global requests have completed and MCU is - * now ready for use and hwcnt is being enabled. - * @KBASE_MCU_ON: The MCU is active and hwcnt has been enabled. - * @KBASE_MCU_ON_CORE_MASK_UPDATE_PEND: The MCU is active and mask of enabled - * shader cores is being updated. - * @KBASE_MCU_ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. - * @KBASE_MCU_ON_HALT: The MCU is on and hwcnt has been disabled, - * MCU halt would be triggered. - * @KBASE_MCU_ON_PEND_HALT: MCU halt in progress, confirmation pending. - * @KBASE_MCU_POWER_DOWN: MCU halted operations, pending being disabled. - * @KBASE_MCU_PEND_OFF: MCU is being disabled, pending on powering off. - * @KBASE_MCU_RESET_WAIT: The GPU is resetting, MCU state is unknown. */ enum kbase_mcu_state { #define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, @@ -115,45 +82,8 @@ enum kbase_mcu_state { }; #endif -/** +/* * enum kbase_shader_core_state - The states used for the shaders' state machine. - * - * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have - * been requested to power on and hwcnt - * is being disabled - * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been - * requested to power on. Or after doing - * partial shader on/off, checking whether - * it's the desired state. - * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt - * already enabled. - * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks - * are on, hwcnt disabled, and checks - * to powering down or re-enabling - * hwcnt. - * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to - * power off, but they remain on for the - * duration of the hysteresis timer - * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach - * a state where jobs on the GPU are finished - * including jobs currently running and in the - * GPU queue because of GPU2017-861 - * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired - * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the - * level 2 cache is being flushed. - * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders - * are ready to be powered off. - * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are - * off, but the tick timer - * cancellation is still - * pending. - * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power - * states are unknown */ enum kbase_shader_core_state { #define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index da32510..a2f96b5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -407,9 +407,9 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, switch (type) { case KBASE_PM_CORE_L2: - return kbdev->gpu_props.props.raw_props.l2_present; + return kbdev->gpu_props.curr_config.l2_present; case KBASE_PM_CORE_SHADER: - return kbdev->gpu_props.props.raw_props.shader_present; + return kbdev->gpu_props.curr_config.shader_present; case KBASE_PM_CORE_TILER: return kbdev->gpu_props.props.raw_props.tiler_present; case KBASE_PM_CORE_STACK: @@ -695,8 +695,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_HWCNT_ENABLE: backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); backend->hwcnt_disabled = false; } backend->mcu_state = KBASE_MCU_ON; @@ -851,7 +855,7 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 l2_present = kbdev->gpu_props.curr_config.l2_present; #if !MALI_USE_CSF u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; #endif @@ -1255,7 +1259,6 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) &kbdev->pm.backend.shader_tick_timer; enum kbase_shader_core_state prev_state; u64 stacks_avail = 0; - int err = 0; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1350,8 +1353,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->pm_shaders_core_mask = shaders_ready; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { +#if MALI_USE_CSF + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, + &flags); +#endif kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, + flags); +#endif backend->hwcnt_disabled = false; } @@ -1531,8 +1544,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->pm_shaders_core_mask = 0; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { +#if MALI_USE_CSF + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, + &flags); +#endif kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, + flags); +#endif backend->hwcnt_disabled = false; } backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; @@ -1559,7 +1582,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) } while (backend->shaders_state != prev_state); - return err; + return 0; } #endif @@ -1883,17 +1906,9 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_LO))); -#if MALI_USE_CSF - /* PM timeout probably means hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); -#endif - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -2105,6 +2120,13 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) PM_NO_RESET); } } + /* + * This point means that the GPU trasitioned to ON. So there is a chance + * that a repartitioning occurred. In this case the current config + * should be read again. + */ + kbase_gpuprops_get_curr_config_props(kbdev, + &kbdev->gpu_props.curr_config); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -2253,7 +2275,7 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) struct kbasep_reset_timeout_data *rtdata = container_of(timer, struct kbasep_reset_timeout_data, timer); - rtdata->timed_out = 1; + rtdata->timed_out = true; /* Set the wait queue to wake up kbase_pm_init_hw even though the reset * hasn't completed @@ -2263,14 +2285,13 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) { #if MALI_USE_CSF - kbdev->hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CSF_CONFIG)); + kbdev->hw_quirks_gpu = + kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); #else - u32 hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JM_CONFIG)); + u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { /* Only for tMIx */ @@ -2284,39 +2305,38 @@ static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) */ if (coherency_features == COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { - hw_quirks_jm |= (COHERENCY_ACE_LITE | - COHERENCY_ACE) << - JM_FORCE_COHERENCY_FEATURES_SHIFT; + hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) + << JM_FORCE_COHERENCY_FEATURES_SHIFT; } } if (kbase_is_gpu_removed(kbdev)) return -EIO; - kbdev->hw_quirks_jm = hw_quirks_jm; + kbdev->hw_quirks_gpu = hw_quirks_gpu; #endif /* !MALI_USE_CSF */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { int default_idvs_group_size = 0xF; - u32 tmp; + u32 group_size = 0; - if (of_property_read_u32(kbdev->dev->of_node, - "idvs-group-size", &tmp)) - tmp = default_idvs_group_size; + if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", + &group_size)) + group_size = default_idvs_group_size; - if (tmp > IDVS_GROUP_MAX_SIZE) { + if (group_size > IDVS_GROUP_MAX_SIZE) { dev_err(kbdev->dev, "idvs-group-size of %d is too large. Maximum value is %d", - tmp, IDVS_GROUP_MAX_SIZE); - tmp = default_idvs_group_size; + group_size, IDVS_GROUP_MAX_SIZE); + group_size = default_idvs_group_size; } - kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; + kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT; } #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) if (corestack_driver_control) - kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; + kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL; return 0; } @@ -2370,18 +2390,17 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) GPU_ID_VERSION_PRODUCT_ID_SHIFT; int error = 0; - kbdev->hw_quirks_jm = 0; + kbdev->hw_quirks_gpu = 0; kbdev->hw_quirks_sc = 0; kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_jm", - &kbdev->hw_quirks_jm)) { + if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { dev_info(kbdev->dev, - "Found quirks_jm = [0x%x] in Devicetree\n", - kbdev->hw_quirks_jm); + "Found quirks_gpu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_gpu); } else { - error = kbase_set_jm_quirks(kbdev, prod_id); + error = kbase_set_gpu_quirks(kbdev, prod_id); if (error) return error; } @@ -2432,10 +2451,10 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) kbdev->hw_quirks_mmu); #if MALI_USE_CSF kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), - kbdev->hw_quirks_jm); + kbdev->hw_quirks_gpu); #else kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_jm); + kbdev->hw_quirks_gpu); #endif } @@ -2466,6 +2485,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) } } +#if !MALI_USE_CSF static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) { unsigned long irq_flags; @@ -2478,6 +2498,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) } spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } +#endif static int kbase_pm_do_reset(struct kbase_device *kbdev) { @@ -2504,7 +2525,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; - rtdata.timed_out = 0; + rtdata.timed_out = false; /* Create a timer to use as a timeout on the reset */ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -2516,7 +2537,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); - if (rtdata.timed_out == 0) { + if (!rtdata.timed_out) { /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); @@ -2556,7 +2577,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) GPU_COMMAND_HARD_RESET); /* Restart the timer to wait for the hard reset to complete */ - rtdata.timed_out = 0; + rtdata.timed_out = false; hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); @@ -2564,7 +2585,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); - if (rtdata.timed_out == 0) { + if (!rtdata.timed_out) { /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); @@ -2637,8 +2658,13 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); #if MALI_USE_CSF if (kbdev->protected_mode) { + unsigned long flags; + kbase_ipa_control_protm_exited(kbdev); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); + kbase_csf_scheduler_spin_unlock(kbdev, flags); } #endif kbdev->protected_mode = false; @@ -2685,12 +2711,14 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_enable_interrupts(kbdev); exit: +#if !MALI_USE_CSF if (!kbdev->pm.backend.protected_entry_transition_override) { /* Re-enable GPU hardware counters if we're resetting from * protected mode. */ reenable_protected_mode_hwcnt(kbdev); } +#endif return err; } @@ -2726,8 +2754,9 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) /* This might happen after GPU reset. * Then counter needs to be kicked. */ - if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & - GPU_STATUS_CYCLE_COUNT_ACTIVE)) { + if (!IS_ENABLED(CONFIG_MALI_NO_MALI) && + (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_CYCLE_COUNT_ACTIVE))) { kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index f6b8485..500578f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,6 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * * Return: 0 on success, error code on error */ +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #else /** * kbase_pm_wait_for_desired_state - Wait for the desired power state to be @@ -247,8 +248,8 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * * Return: 0 on success, error code on error */ -#endif int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); +#endif /** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on @@ -534,8 +535,22 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, #ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +/** + * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * Return: Returns 0 on failure and non zero on success. + */ +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); +#else /** - * kbase_platform_dvfs_event - Report utilisation to DVFS code + * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU * * Function provided by platform specific code when DVFS is enabled to allow * the power management metrics system to report utilisation. @@ -548,10 +563,6 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, * group. * Return: Returns 0 on failure and non zero on success. */ - -#if MALI_USE_CSF -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); -#else int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]); #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h index b9bd364..d66b928 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_l2_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,19 @@ * The function-like macro KBASEP_L2_STATE() must be defined before including * this header file. This header file can be included multiple times in the * same compilation unit with different definitions of KBASEP_L2_STATE(). + * + * @OFF: The L2 cache and tiler are off + * @PEND_ON: The L2 cache and tiler are powering on + * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. + * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled + * @ON: The L2 cache and tiler are on, and hwcnt is enabled + * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled + * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock. + * Conditionally used. + * @POWER_DOWN: The L2 cache and tiler are about to be powered off + * @PEND_OFF: The L2 cache and tiler are powering off + * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are + * unknown */ KBASEP_L2_STATE(OFF) KBASEP_L2_STATE(PEND_ON) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h index c03adf3..eab30eb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,24 @@ * The function-like macro KBASEP_MCU_STATE() must be defined before including * this header file. This header file can be included multiple times in the * same compilation unit with different definitions of KBASEP_MCU_STATE(). + * + * @OFF: The MCU is powered off. + * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with + * firmware reloading) is in progress. + * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration + * requests have been sent to the firmware. + * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now + * ready for use and hwcnt is being enabled. + * @ON: The MCU is active and hwcnt has been enabled. + * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores + * is being updated. + * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. + * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU + * halt would be triggered. + * @ON_PEND_HALT: MCU halt in progress, confirmation pending. + * @POWER_DOWN: MCU halted operations, pending being disabled. + * @PEND_OFF: MCU is being disabled, pending on powering off. + * @RESET_WAIT: The GPU is resetting, MCU state is unknown. */ KBASEP_MCU_STATE(OFF) KBASEP_MCU_STATE(PEND_ON_RELOAD) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index e5c7c71..769888f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -360,9 +360,9 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) { int utilisation; - int busy; struct kbasep_pm_metrics *diff; #if !MALI_USE_CSF + int busy; int util_gl_share; int util_cl_share[2]; #endif @@ -377,9 +377,9 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u); +#if !MALI_USE_CSF busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); -#if !MALI_USE_CSF util_gl_share = (100 * diff->busy_gl) / busy; util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 97bcb44..5c2aa0c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -405,7 +405,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, /* Reverse the suspension done */ if (reset_gpu) { dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); kbase_reset_gpu_wait(kbdev); } else if (sched_suspend) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h index 766bf1d..2276713 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_shader_states.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,41 @@ * including this header file. This header file can be included multiple * times in the same compilation unit with different definitions of * KBASEP_SHADER_STATE(). + * + * @OFF_CORESTACK_OFF: The shaders and core stacks are off + * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been + * requested to power on and hwcnt is being + * disabled + * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on. Or after doing + * partial shader on/off, checking whether + * it's the desired state. + * @ON_CORESTACK_ON: The shaders and core stacks are on, and + * hwcnt already enabled. + * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt + * disabled, and checks to powering down or + * re-enabling hwcnt. + * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power + * off, but they remain on for the duration + * of the hysteresis timer + * @WAIT_GPU_IDLE: The shaders partial poweroff needs to + * reach a state where jobs on the GPU are + * finished including jobs currently running + * and in the GPU queue because of + * GPU2017-861 + * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2 + * cache is being flushed. + * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are + * ready to be powered off. + * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the + * tick timer cancellation is still pending. + * @RESET_WAIT: The GPU is resetting, shader and core + * stack power states are unknown */ KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index f964af0..ea7b21a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -76,6 +76,9 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, */ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) { +#ifdef CONFIG_MALI_NO_MALI + return true; +#else bool success = false; const unsigned int timeout = 100; const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); @@ -87,8 +90,8 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) break; } } - return success; +#endif } #endif diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index e35c570..e3e046c 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -27,7 +27,7 @@ #include <linux/export.h> #include <linux/priority_control_manager.h> #include <linux/shmem_fs.h> -#include "mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> #include "mali_kbase_csf_tiler_heap.h" #include <mmu/mali_kbase_mmu.h> #include "mali_kbase_csf_timeout.h" @@ -588,7 +588,7 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event_lock, flags); dev_dbg(kctx->kbdev->dev, - "Remove any pending command queue fatal from context %p\n", + "Remove any pending command queue fatal from context %pK\n", (void *)kctx); list_del_init(&queue->error.link); spin_unlock_irqrestore(&kctx->csf.event_lock, flags); @@ -1132,6 +1132,26 @@ static int create_suspend_buffers(struct kbase_context *const kctx, } /** + * generate_group_uid() - Makes an ID unique to all kernel base devices + * and contexts, for a queue group and CSG. + * + * Return: A unique ID in the form of an unsigned 32-bit integer + */ +static u32 generate_group_uid(void) +{ + /* use first KBase device to store max UID */ + struct kbase_device *kbdev = kbase_find_device(-1); + u32 uid = 1; + + if (kbdev) + uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices); + else + WARN(1, "NULL kbase device pointer in group UID generation"); + + return uid; +} + +/** * create_queue_group() - Create a queue group * * @kctx: Address of the kbase context within which the queue group @@ -1142,7 +1162,7 @@ static int create_suspend_buffers(struct kbase_context *const kctx, * Return: a queue group handle on success, or a negative error code on failure. */ static int create_queue_group(struct kbase_context *const kctx, - const union kbase_ioctl_cs_queue_group_create *const create) + union kbase_ioctl_cs_queue_group_create *const create) { int group_handle = find_free_group_handle(kctx); @@ -1178,6 +1198,9 @@ static int create_queue_group(struct kbase_context *const kctx, group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; group->faulted = false; + group->group_uid = generate_group_uid(); + create->out.group_uid = group->group_uid; + INIT_LIST_HEAD(&group->link); INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); @@ -1409,7 +1432,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event_lock, flags); dev_dbg(kbdev->dev, - "Remove any pending group fatal error from context %p\n", + "Remove any pending group fatal error from context %pK\n", (void *)group->kctx); list_del_init(&group->error_tiler_oom.link); @@ -1503,7 +1526,7 @@ static void add_error(struct kbase_context *const kctx, error->data = *data; list_add_tail(&error->link, &kctx->csf.error_list); dev_dbg(kctx->kbdev->dev, - "Added error %p of type %d in context %p\n", + "Added error %pK of type %d in context %pK\n", (void *)error, data->type, (void *)kctx); } @@ -1796,7 +1819,7 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx, spin_lock_irqsave(&kctx->csf.event_lock, flags); list_add_tail(&event->link, &kctx->csf.event_callback_list); dev_dbg(kctx->kbdev->dev, - "Added event handler %p with param %p\n", event, + "Added event handler %pK with param %pK\n", event, event->param); spin_unlock_irqrestore(&kctx->csf.event_lock, flags); @@ -1818,7 +1841,7 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx, if ((event->callback == callback) && (event->param == param)) { list_del(&event->link); dev_dbg(kctx->kbdev->dev, - "Removed event handler %p with param %p\n", + "Removed event handler %pK with param %pK\n", event, event->param); kfree(event); break; @@ -1841,7 +1864,7 @@ bool kbase_csf_read_error(struct kbase_context *kctx, struct kbase_csf_notification, link); list_del_init(&error_data->link); *event_data = error_data->data; - dev_dbg(kctx->kbdev->dev, "Dequeued error %p in context %p\n", + dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", (void *)error_data, (void *)kctx); } else { got_event = false; @@ -1859,7 +1882,7 @@ bool kbase_csf_error_pending(struct kbase_context *kctx) spin_lock_irqsave(&kctx->csf.event_lock, flags); event_pended = !list_empty(&kctx->csf.error_list); - dev_dbg(kctx->kbdev->dev, "%s error is pending in context %p\n", + dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", event_pended ? "An" : "No", (void *)kctx); spin_unlock_irqrestore(&kctx->csf.event_lock, flags); @@ -1872,7 +1895,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) unsigned long flags; dev_dbg(kctx->kbdev->dev, - "Signal event (%s GPU notify) for context %p\n", + "Signal event (%s GPU notify) for context %pK\n", notify_gpu ? "with" : "without", (void *)kctx); /* First increment the signal count and wake up event thread. @@ -1903,7 +1926,7 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) enum kbase_csf_event_callback_action action; dev_dbg(kctx->kbdev->dev, - "Calling event handler %p with param %p\n", + "Calling event handler %pK with param %pK\n", (void *)event, event->param); action = event->callback(event->param); if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { @@ -1926,7 +1949,7 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) event, next_event, &kctx->csf.event_callback_list, link) { list_del(&event->link); dev_dbg(kctx->kbdev->dev, - "Removed event handler %p with param %p\n", + "Removed event handler %pK with param %pK\n", (void *)event, event->param); kfree(event); } @@ -2231,6 +2254,31 @@ static void protm_event_worker(struct work_struct *data) kbase_csf_scheduler_group_protm_enter(group); } +static void report_queue_fatal_error(struct kbase_queue *const queue, + u32 cs_fatal, u64 cs_fatal_info, + u8 group_handle) +{ + struct base_csf_notification error = + { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .handle = group_handle, + .error = { + .error_type = + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + .payload = { + .fatal_queue = { + .sideband = + cs_fatal_info, + .status = cs_fatal, + .csi_index = + queue->csi_index, + } } } } } }; + + add_error(queue->kctx, &queue->error, &error); + kbase_event_wakeup(queue->kctx); +} + /** * handle_fault_event - Handler for CS fault. * @@ -2268,51 +2316,10 @@ handle_fault_event(struct kbase_queue *const queue, kbase_gpu_exception_name(cs_fault_exception_type), cs_fault_exception_data, cs_fault_info_exception_data); - /* TODO GPUCORE-26291: We've'identified an issue with faulted CSIs not - * making progress in some cases. Until the issue is resolved, - * RESOURCE_EVICTION_TIMEOUT error shall be treated as a fatal error - * to give userspace a chance to terminate the group. This is intended - * to be a temporary workaround. - */ if (cs_fault_exception_type == CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT) - kbase_csf_add_queue_fatal_error( - queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, 0); -} - -static void report_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info, - u8 group_handle) -{ - struct base_csf_notification error = { - .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .handle = group_handle, - .error = { - .error_type = - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - .payload = { - .fatal_queue = { - .sideband = cs_fatal_info, - .status = cs_fatal, - .csi_index = queue->csi_index, - } - } - } - } - } - }; - - add_error(queue->kctx, &queue->error, &error); - kbase_event_wakeup(queue->kctx); -} - -void kbase_csf_add_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info) -{ - report_queue_fatal_error(queue, cs_fatal, cs_fatal_info, - queue->group->handle); + report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, + 0, queue->group->handle); } /** @@ -2643,8 +2650,20 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, process_cs_interrupts(group, ginfo, irqreq, irqack); } +/** + * process_prfcnt_interrupts - Process performance counter interrupts. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @glb_req: Global request register value. + * @glb_ack: Global acknowledge register value. + * + * Handles interrupts issued by the firmware that relate to the performance + * counters. For example, on completion of a performance counter sample. It is + * expected that the scheduler spinlock is already held on calling this + * function. + */ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, - u32 glb_ack, unsigned long *flags) + u32 glb_ack) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -2656,14 +2675,11 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { kbdev->csf.hwcnt.request_pending = false; - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); kbase_hwcnt_backend_csf_on_prfcnt_sample( &kbdev->hwcnt_gpu_iface); - - kbase_csf_scheduler_spin_lock(kbdev, flags); } /* Process PRFCNT_ENABLE interrupt. */ @@ -2671,32 +2687,25 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { kbdev->csf.hwcnt.enable_pending = false; - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_ENABLE status changed interrupt received."); - if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) { + if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) kbase_hwcnt_backend_csf_on_prfcnt_enable( &kbdev->hwcnt_gpu_iface); - } else { + else kbase_hwcnt_backend_csf_on_prfcnt_disable( &kbdev->hwcnt_gpu_iface); - } - - kbase_csf_scheduler_spin_lock(kbdev, flags); } /* Process PRFCNT_THRESHOLD interrupt. */ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); kbase_hwcnt_backend_csf_on_prfcnt_threshold( &kbdev->hwcnt_gpu_iface); - kbase_csf_scheduler_spin_lock(kbdev, flags); - /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to * the same value as GLB_ACK.PRFCNT_THRESHOLD * flag in order to enable reporting of another @@ -2709,13 +2718,11 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, /* Process PRFCNT_OVERFLOW interrupt. */ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { - kbase_csf_scheduler_spin_unlock(kbdev, *flags); dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); kbase_hwcnt_backend_csf_on_prfcnt_overflow( &kbdev->hwcnt_gpu_iface); - kbase_csf_scheduler_spin_lock(kbdev, flags); /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to * the same value as GLB_ACK.PRFCNT_OVERFLOW * flag in order to enable reporting of another @@ -2790,8 +2797,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) } } - process_prfcnt_interrupts(kbdev, glb_req, glb_ack, - &flags); + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); kbase_csf_scheduler_spin_unlock(kbdev, flags); diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index 6252515..effd468 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -366,19 +366,6 @@ void kbase_csf_add_group_fatal_error( struct base_gpu_queue_group_error const *const err_payload); /** - * kbase_csf_add_queue_fatal_error - Report a fatal queue error to userspace - * - * @queue: Pointer to queue for which fatal event was received. - * @cs_fatal: Fault information - * @cs_fatal_info: Additional fault information - * - * If a queue has already been in fatal error status, - * subsequent fatal error on the queue should never take place. - */ -void kbase_csf_add_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info); - -/** * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. * * @kbdev: The kbase device to handle an IRQ for diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c index fb3a718..b54b2fc 100644 --- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,8 +154,7 @@ int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, mutex_lock(&kctx->csf.lock); - if (kctx->csf.cpu_queue.buffer) - kfree(kctx->csf.cpu_queue.buffer); + kfree(kctx->csf.cpu_queue.buffer); if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == BASE_CSF_CPU_QUEUE_DUMP_PENDING) { diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index a6f1958..0517399 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -401,6 +401,8 @@ struct kbase_protected_suspend_buffer { * @tiler_mask: Mask of tiler endpoints the group is allowed to use. * @fragment_mask: Mask of fragment endpoints the group is allowed to use. * @compute_mask: Mask of compute endpoints the group is allowed to use. + * @group_uid: 32-bit wide unsigned identifier for the group, unique + * across all kbase devices and contexts. * @link: Link to this queue group in the 'runnable_groups' list of * the corresponding kctx. * @link_to_schedule: Link to this queue group in the list of prepared groups @@ -449,6 +451,8 @@ struct kbase_queue_group { u64 fragment_mask; u64 compute_mask; + u32 group_uid; + struct list_head link; struct list_head link_to_schedule; enum kbase_csf_group_state run_state; @@ -801,9 +805,6 @@ struct kbase_csf_csg_slot { * other phases. * @non_idle_scanout_grps: Count on the non-idle groups in the scan-out * list at the scheduling prepare stage. - * @apply_async_protm: Signalling the internal scheduling apply stage to - * act with some special handling for entering the - * protected mode asynchronously. * @pm_active_count: Count indicating if the scheduler is owning a power * management reference count. Reference is taken when * the count becomes 1 and is dropped when the count @@ -853,7 +854,6 @@ struct kbase_csf_scheduler { struct work_struct gpu_idle_work; atomic_t non_idle_offslot_grps; u32 non_idle_scanout_grps; - bool apply_async_protm; u32 pm_active_count; unsigned int csg_scheduling_period_ms; bool tick_timer_active; @@ -1055,7 +1055,7 @@ struct kbase_csf_firmware_interface { struct protected_memory_allocation **pma; }; -/** +/* * struct kbase_csf_hwcnt - Object containing members for handling the dump of * HW counters. * diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index ae039aa..73b8e03 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -48,10 +48,17 @@ #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) + static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); MODULE_PARM_DESC(fw_name, "firmware image"); +/* The waiting time for firmware to boot */ +static unsigned int csf_firmware_boot_timeout_ms = 500; +module_param(csf_firmware_boot_timeout_ms, uint, 0444); +MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, + "Maximum time to wait for firmware to boot."); + #ifdef CONFIG_MALI_DEBUG /* Makes Driver wait indefinitely for an acknowledgment for the different * requests it sends to firmware. Otherwise the timeouts interfere with the @@ -93,7 +100,6 @@ MODULE_PARM_DESC(fw_debug, #define TL_METADATA_ENTRY_NAME_OFFSET (0x8) -#define CSF_FIRMWARE_BOOT_TIMEOUT_MS (500) #define CSF_MAX_FW_STOP_LOOPS (100000) #define CSF_GLB_REQ_CFG_MASK \ @@ -232,7 +238,7 @@ static void stop_csf_firmware(struct kbase_device *kbdev) static void wait_for_firmware_boot(struct kbase_device *kbdev) { const long wait_timeout = - kbase_csf_timeout_in_jiffies(CSF_FIRMWARE_BOOT_TIMEOUT_MS); + kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); long remaining; /* Firmware will generate a global interface interrupt once booting @@ -987,6 +993,7 @@ static int parse_capabilities(struct kbase_device *kbdev) iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; + iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; if ((GROUP_CONTROL_0 + (unsigned long)iface->group_num * iface->group_stride) > @@ -1239,14 +1246,8 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) kbase_ctx_sched_release_ctx_lock(kctx); } - /* Internal FW error could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1669,6 +1670,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 return pwroff; } + int kbase_csf_firmware_init(struct kbase_device *kbdev) { const struct firmware *firmware; @@ -1836,6 +1838,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) if (ret != 0) goto error; + /* Firmware loaded successfully */ release_firmware(firmware); KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL, @@ -1987,7 +1990,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -int kbase_csf_firmware_ping(struct kbase_device *const kbdev) +void kbase_csf_firmware_ping(struct kbase_device *const kbdev) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1997,7 +2000,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev) set_global_request(global_iface, GLB_REQ_PING_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); +} +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +{ + kbase_csf_firmware_ping(kbdev); return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } @@ -2040,11 +2047,17 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); if (!err) { + unsigned long irq_flags; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->protected_mode = true; kbase_ipa_protection_mode_switch_event(kbdev); kbase_ipa_control_protm_entered(kbdev); + + kbase_csf_scheduler_spin_lock(kbdev, &irq_flags); kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); + kbase_csf_scheduler_spin_unlock(kbdev, irq_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } @@ -2139,26 +2152,28 @@ static u32 copy_grp_and_stm( return total_stream_num; } -u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *const group_data, u32 const max_group_num, struct basep_cs_stream_control *const stream_data, u32 const max_total_stream_num, u32 *const glb_version, - u32 *const features, u32 *const group_num, u32 *const prfcnt_size) + u32 *const features, u32 *const group_num, u32 *const prfcnt_size, + u32 *instr_features) { const struct kbase_csf_global_iface * const iface = &kbdev->csf.global_iface; - if (WARN_ON(!glb_version) || - WARN_ON(!features) || - WARN_ON(!group_num) || - WARN_ON(!prfcnt_size)) + if (WARN_ON(!glb_version) || WARN_ON(!features) || + WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || + WARN_ON(!instr_features)) return 0; *glb_version = iface->version; *features = iface->features; *group_num = iface->group_num; *prfcnt_size = iface->prfcnt_size; + *instr_features = iface->instr_features; return copy_grp_and_stm(iface, group_data, max_group_num, stream_data, max_total_stream_num); @@ -2237,9 +2252,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); va_reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); if (ret) goto va_region_add_error; + mutex_unlock(&kbdev->csf.reg_lock); gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; @@ -2261,9 +2276,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mmu_insert_pages_error: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(va_reg); - mutex_unlock(&kbdev->csf.reg_lock); va_region_add_error: kbase_free_alloced_region(va_reg); + mutex_unlock(&kbdev->csf.reg_lock); va_region_alloc_error: vunmap(cpu_addr); vmap_error: @@ -2293,8 +2308,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term( if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(csf_mapping->va_reg); - mutex_unlock(&kbdev->csf.reg_lock); kbase_free_alloced_region(csf_mapping->va_reg); + mutex_unlock(&kbdev->csf.reg_lock); } if (csf_mapping->phys) { diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index a2dc4fd..13ff701 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -23,7 +23,7 @@ #define _KBASE_CSF_FIRMWARE_H_ #include "device/mali_kbase_device.h" -#include "mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> /* * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: @@ -266,6 +266,7 @@ u32 kbase_csf_firmware_csg_output( * @group_stride: Stride in bytes in JASID0 virtual address between * CSG capability structures. * @prfcnt_size: Performance counters size. + * @instr_features: Instrumentation features. * @groups: Address of an array of CSG capability structures. */ struct kbase_csf_global_iface { @@ -277,6 +278,7 @@ struct kbase_csf_global_iface { u32 group_num; u32 group_stride; u32 prfcnt_size; + u32 instr_features; struct kbase_csf_cmd_stream_group_info *groups; }; @@ -397,13 +399,23 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev); /** * kbase_csf_firmware_ping - Send the ping request to firmware. * - * The function sends the ping request to firmware to confirm it is alive. + * The function sends the ping request to firmware. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_ping(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. + * + * The function sends the ping request to firmware and waits to confirm it is + * alive. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * * Return: 0 on success, or negative on failure. */ -int kbase_csf_firmware_ping(struct kbase_device *kbdev); +int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev); /** * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. @@ -570,12 +582,14 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); * in bytes. Bits 31:16 hold the size of firmware * performance counter data and 15:0 hold the size of * hardware performance counter data. - */ -u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, - struct basep_cs_group_control *group_data, u32 max_group_num, - struct basep_cs_stream_control *stream_data, u32 max_total_stream_num, - u32 *glb_version, u32 *features, u32 *group_num, u32 *prfcnt_size); - + * @instr_features: Instrumentation features. Bits 7:4 hold the max size + * of events. Bits 3:0 hold the offset update rate. + */ +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *group_data, + u32 max_group_num, struct basep_cs_stream_control *stream_data, + u32 max_total_stream_num, u32 *glb_version, u32 *features, + u32 *group_num, u32 *prfcnt_size, u32 *instr_features); /** * Get CSF firmware header timeline metadata content diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index 6349917..a3901cd 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -237,6 +237,9 @@ static int invent_capabilities(struct kbase_device *kbdev) iface->kbdev = kbdev; iface->features = 0; iface->prfcnt_size = 64; + iface->instr_features = + 0x81; /* update rate=1, max event size = 1<<8 = 256 */ + iface->group_num = ARRAY_SIZE(interface->csg); iface->group_stride = 0; @@ -463,14 +466,8 @@ static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) kbase_ctx_sched_release_ctx_lock(kctx); } - /* Internal FW error could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1032,7 +1029,7 @@ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -int kbase_csf_firmware_ping(struct kbase_device *const kbdev) +void kbase_csf_firmware_ping(struct kbase_device *const kbdev) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1042,7 +1039,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev) set_global_request(global_iface, GLB_REQ_PING_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); kbase_csf_scheduler_spin_unlock(kbdev, flags); +} +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +{ + kbase_csf_firmware_ping(kbdev); return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } @@ -1170,26 +1171,28 @@ static u32 copy_grp_and_stm( return total_stream_num; } -u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev, +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *const group_data, u32 const max_group_num, struct basep_cs_stream_control *const stream_data, u32 const max_total_stream_num, u32 *const glb_version, - u32 *const features, u32 *const group_num, u32 *const prfcnt_size) + u32 *const features, u32 *const group_num, u32 *const prfcnt_size, + u32 *const instr_features) { const struct kbase_csf_global_iface * const iface = &kbdev->csf.global_iface; - if (WARN_ON(!glb_version) || - WARN_ON(!features) || - WARN_ON(!group_num) || - WARN_ON(!prfcnt_size)) + if (WARN_ON(!glb_version) || WARN_ON(!features) || + WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || + WARN_ON(!instr_features)) return 0; *glb_version = iface->version; *features = iface->features; *group_num = iface->group_num; *prfcnt_size = iface->prfcnt_size; + *instr_features = iface->instr_features; return copy_grp_and_stm(iface, group_data, max_group_num, stream_data, max_total_stream_num); @@ -1269,9 +1272,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); va_reg->flags &= ~KBASE_REG_FREE; - mutex_unlock(&kbdev->csf.reg_lock); if (ret) goto va_region_add_error; + mutex_unlock(&kbdev->csf.reg_lock); gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; @@ -1293,9 +1296,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( mmu_insert_pages_error: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(va_reg); - mutex_unlock(&kbdev->csf.reg_lock); va_region_add_error: kbase_free_alloced_region(va_reg); + mutex_unlock(&kbdev->csf.reg_lock); va_region_alloc_error: vunmap(cpu_addr); vmap_error: @@ -1325,8 +1328,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term( if (csf_mapping->va_reg) { mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(csf_mapping->va_reg); - mutex_unlock(&kbdev->csf.reg_lock); kbase_free_alloced_region(csf_mapping->va_reg); + mutex_unlock(&kbdev->csf.reg_lock); } if (csf_mapping->phys) { diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index e5aee61..1203d2c 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -257,7 +257,7 @@ static int kbase_kcpu_jit_allocate_process( * No prior JIT_FREE command is active. Roll * back previous allocations and fail. */ - dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %p\n", cmd); + dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); ret = -ENOMEM; goto fail; } @@ -858,10 +858,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, "Sync memory %llx already freed", cqs_set->objs[i].addr); queue->has_error = true; } else { - if (cqs_set->propagate_flags & (1 << i)) - evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; - else - evt[BASEP_EVENT_ERR_INDEX] = false; + evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; /* Set to signaled */ evt[BASEP_EVENT_VAL_INDEX]++; kbase_phy_alloc_mapping_put(queue->kctx, mapping); @@ -908,8 +905,267 @@ static int kbase_kcpu_cqs_set_prepare( current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; current_command->info.cqs_set.nr_objs = nr_objs; current_command->info.cqs_set.objs = objs; - current_command->info.cqs_set.propagate_flags = - cqs_set_info->propagate_flags; + + return 0; +} + +static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ + WARN_ON(!cqs_wait_operation->nr_objs); + WARN_ON(!cqs_wait_operation->objs); + WARN_ON(!cqs_wait_operation->signaled); + WARN_ON(!queue->cqs_wait_count); + + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(cqs_wait_operation->signaled); + kfree(cqs_wait_operation->objs); + cqs_wait_operation->signaled = NULL; + cqs_wait_operation->objs = NULL; +} + +static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ + u32 i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_wait_operation->objs)) + return -EINVAL; + + /* Skip the CQS waits that have already been signaled when processing */ + for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { + if (!test_bit(i, cqs_wait_operation->signaled)) { + struct kbase_vmap_struct *mapping; + bool sig_set; + u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, + cqs_wait_operation->objs[i].addr, &mapping); + + /* GPUCORE-28172 RDT to review */ + if (!queue->command_started) + queue->command_started = true; + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); + queue->has_error = true; + return -EINVAL; + } + + switch (cqs_wait_operation->objs[i].operation) { + case BASEP_CQS_WAIT_OPERATION_LE: + sig_set = *evt <= cqs_wait_operation->objs[i].val; + break; + case BASEP_CQS_WAIT_OPERATION_GT: + sig_set = *evt > cqs_wait_operation->objs[i].val; + break; + default: + dev_warn(kbdev->dev, + "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + queue->has_error = true; + + return -EINVAL; + } + + /* Increment evt up to the error_state value depending on the CQS data type */ + switch (cqs_wait_operation->objs[i].data_type) { + default: + dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); + /* Fallthrough - hint to compiler that there's really only 2 options at present */ + case BASEP_CQS_DATA_TYPE_U32: + evt = (u64 *)((u8 *)evt + sizeof(u32)); + break; + case BASEP_CQS_DATA_TYPE_U64: + evt = (u64 *)((u8 *)evt + sizeof(u64)); + break; + } + + if (sig_set) { + bitmap_set(cqs_wait_operation->signaled, i, 1); + if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && + *evt > 0) { + queue->has_error = true; + } + + /* GPUCORE-28172 RDT to review */ + + queue->command_started = false; + } + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + + if (!sig_set) + break; + } + } + + /* For the queue to progress further, all cqs objects should get + * signaled. + */ + return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); +} + +static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, + struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, + struct kbase_kcpu_command *current_command) +{ + struct base_cqs_wait_operation_info *objs; + unsigned int nr_objs = cqs_wait_operation_info->nr_objs; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + if (++queue->cqs_wait_count == 1) { + if (kbase_csf_event_wait_add(queue->kctx, + event_cqs_callback, queue)) { + kfree(objs); + queue->cqs_wait_count--; + return -ENOMEM; + } + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION; + current_command->info.cqs_wait_operation.nr_objs = nr_objs; + current_command->info.cqs_wait_operation.objs = objs; + current_command->info.cqs_wait_operation.inherit_err_flags = + cqs_wait_operation_info->inherit_err_flags; + + current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), + sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); + if (!current_command->info.cqs_wait_operation.signaled) { + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(objs); + return -ENOMEM; + } + + return 0; +} + +static void kbase_kcpu_cqs_set_operation_process( + struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) +{ + unsigned int i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_set_operation->objs)) + return; + + for (i = 0; i < cqs_set_operation->nr_objs; i++) { + struct kbase_vmap_struct *mapping; + u64 *evt; + + evt = (u64 *)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_set_operation->objs[i].addr, &mapping); + + /* GPUCORE-28172 RDT to review */ + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); + queue->has_error = true; + } else { + switch (cqs_set_operation->objs[i].operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *evt += cqs_set_operation->objs[i].val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *evt = cqs_set_operation->objs[i].val; + break; + default: + dev_warn(kbdev->dev, + "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); + queue->has_error = true; + break; + } + + /* Increment evt up to the error_state value depending on the CQS data type */ + switch (cqs_set_operation->objs[i].data_type) { + default: + dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); + /* Fallthrough - hint to compiler that there's really only 2 options at present */ + case BASEP_CQS_DATA_TYPE_U32: + evt = (u64 *)((u8 *)evt + sizeof(u32)); + break; + case BASEP_CQS_DATA_TYPE_U64: + evt = (u64 *)((u8 *)evt + sizeof(u64)); + break; + } + + /* GPUCORE-28172 RDT to review */ + + /* Always propagate errors */ + *evt = queue->has_error; + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } + } + + kbase_csf_event_signal_notify_gpu(queue->kctx); + + kfree(cqs_set_operation->objs); + cqs_set_operation->objs = NULL; +} + +static int kbase_kcpu_cqs_set_operation_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct base_cqs_set_operation_info *objs; + unsigned int nr_objs = cqs_set_operation_info->nr_objs; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; + current_command->info.cqs_set_operation.nr_objs = nr_objs; + current_command->info.cqs_set_operation.objs = objs; return 0; } @@ -1365,6 +1621,28 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, &cmd->info.cqs_set); break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, + &cmd->info.cqs_wait_operation); + + if (!status && !ignore_waits) { + process_next = false; + } else { + /* Either all CQS objects were signaled or + * there was an error or the queue itself is + * being deleted. + * In all cases can move to the next command. + * TBD: handle the error + */ + cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); + } + + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + kbase_kcpu_cqs_set_operation_process(kbdev, queue, + &cmd->info.cqs_set_operation); + + break; case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: /* Clear the queue's error state */ queue->has_error = false; @@ -1404,7 +1682,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue->kctx, NULL, cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); - if (ret == false) { + if (!ret) { queue->has_error = true; dev_warn(kbdev->dev, "failed to release the reference. resource not found\n"); @@ -1425,7 +1703,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, queue->kctx, NULL, cmd->info.import.gpu_va); kbase_gpu_vm_unlock(queue->kctx); - if (ret == false) { + if (!ret) { queue->has_error = true; dev_warn(kbdev->dev, "failed to release the reference. resource not found\n"); @@ -1591,6 +1869,16 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( } break; } + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + { + /* GPUCORE-28172 RDT to review */ + break; + } + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + { + /* GPUCORE-28172 RDT to review */ + break; + } case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue); @@ -1758,6 +2046,14 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ret = kbase_kcpu_cqs_set_prepare(queue, &command.info.cqs_set, kcpu_cmd); break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + ret = kbase_kcpu_cqs_wait_operation_prepare(queue, + &command.info.cqs_wait_operation, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + ret = kbase_kcpu_cqs_set_operation_prepare(queue, + &command.info.cqs_set_operation, kcpu_cmd); + break; case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; ret = 0; diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index a528572..86aa7dc 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -69,13 +69,10 @@ struct kbase_kcpu_command_fence_info { * @objs: Array of structures which define CQS objects to be used by * the kcpu command. * @nr_objs: Number of CQS objects in the array. - * @propagate_flags: Bit-pattern for the CQSs in the array that are set - * to propagate queue error-state to the flagged CQSs. */ struct kbase_kcpu_command_cqs_set_info { struct base_cqs_set *objs; unsigned int nr_objs; - u32 propagate_flags; }; /** @@ -99,6 +96,36 @@ struct kbase_kcpu_command_cqs_wait_info { }; /** + * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information + * about CQS objects for the kcpu CQS timeline set command + * + * @objs: Array of structures which define CQS timeline objects to be used by + * the kcpu command. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_set_operation_info { + struct base_cqs_set_operation_info *objs; + unsigned int nr_objs; +}; + +/** + * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information + * about CQS objects for the kcpu CQS timeline wait command + * + * @objs: Array of structures which define CQS timeline objects to be used by + * the kcpu command. + * @signaled: Bit array used to report the status of the CQS wait objects. + * 1 is signaled, 0 otherwise. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_wait_operation_info { + struct base_cqs_wait_operation_info *objs; + unsigned long *signaled; + unsigned int nr_objs; + u32 inherit_err_flags; +}; + +/** * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information * needed for the kcpu command for jit allocations * @@ -200,6 +227,8 @@ struct kbase_kcpu_command { struct kbase_kcpu_command_fence_info fence; struct kbase_kcpu_command_cqs_wait_info cqs_wait; struct kbase_kcpu_command_cqs_set_info cqs_set; + struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation; + struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation; struct kbase_kcpu_command_import_info import; struct kbase_kcpu_command_jit_alloc_info jit_alloc; struct kbase_kcpu_command_jit_free_info jit_free; diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c index 5c2e8e3..d59e77c 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,7 +95,7 @@ static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, struct kbase_sync_fence_info info; kbase_sync_fence_info_get(cmd->info.fence.fence, &info); - seq_printf(file, ", Fence %p %s %s", + seq_printf(file, ", Fence %pK %s %s", info.fence, info.name, kbase_sync_status_string(info.status)); break; diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c index b59ffd4..e8da0f3 100644 --- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c +++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c @@ -307,6 +307,31 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev) kfree(buf); } +/** + * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the + * event of an error during GPU reset. + * @kbdev: Pointer to KBase device + */ +static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev) +{ + unsigned long flags; + + /* Treat this as an unrecoverable error for HWCNT */ + kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); + + /* Re-enable counters to ensure matching enable/disable pair. + * This might reduce the hwcnt disable count to 0, and therefore + * trigger actual re-enabling of hwcnt. + * However, as the backend is now in the unrecoverable error state, + * re-enabling will immediately fail and put the context into the error + * state, preventing the hardware from being touched (which could have + * risked a hang). + */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent) { @@ -396,8 +421,10 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, mutex_unlock(&kbdev->pm.lock); - if (WARN_ON(err)) - goto error; + if (WARN_ON(err)) { + kbase_csf_hwcnt_on_reset_error(kbdev); + return err; + } mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -414,40 +441,20 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, err = kbase_pm_wait_for_desired_state(kbdev); mutex_unlock(&kbdev->pm.lock); - if (err) - goto error; + if (WARN_ON(err)) { + kbase_csf_hwcnt_on_reset_error(kbdev); + return err; + } /* Re-enable GPU hardware counters */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_unlock(kbdev, flags); if (!silent) dev_err(kbdev->dev, "Reset complete"); return 0; -error: - WARN_ON(!err); - - /* If hardware init failed, we assume hardware counters will - * not work and put the backend into the unrecoverable error - * state. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - - /* Re-enable counters to ensure matching enable/disable pair. - * This might reduce the hwcnt disable count to 0, and therefore - * trigger actual re-enabling of hwcnt. - * However, as the backend is now in the unrecoverable error state, - * re-enabling will immediately fail and put the context into the error - * state, preventing the hardware from being touched (which could have - * risked a hang). - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return err; } static void kbase_csf_reset_gpu_worker(struct work_struct *data) @@ -484,25 +491,29 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited); } -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) + kbase_hwcnt_backend_csf_on_unrecoverable_error( + &kbdev->hwcnt_gpu_iface); + if (atomic_cmpxchg(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_NOT_PENDING, KBASE_CSF_RESET_GPU_PREPARED) != - KBASE_CSF_RESET_GPU_NOT_PENDING) { + KBASE_CSF_RESET_GPU_NOT_PENDING) /* Some other thread is already resetting the GPU */ return false; - } return true; } KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags) { lockdep_assert_held(&kbdev->hwaccess_lock); - return kbase_prepare_to_reset_gpu(kbdev); + return kbase_prepare_to_reset_gpu(kbdev, flags); } void kbase_reset_gpu(struct kbase_device *kbdev) diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index b9dc59c..84d6f81 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -28,8 +28,8 @@ #include "../tl/mali_kbase_tracepoints.h" #include "backend/gpu/mali_kbase_pm_internal.h" #include <linux/export.h> -#include "mali_gpu_csf_registers.h" -#include <mali_base_kernel.h> +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -373,6 +373,45 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group) } /** + * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function waits for the GPU to exit protected mode which is confirmed + * when active_protm_grp is set to NULL. + */ +static void scheduler_wait_protm_quit(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long remaining; + + lockdep_assert_held(&scheduler->lock); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); + + if (!remaining) + dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); +} + +/** + * scheduler_force_protm_exit() - Force GPU to exit protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function sends a ping request to the firmware and waits for the GPU + * to exit protected mode. + */ +static void scheduler_force_protm_exit(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + kbase_csf_firmware_ping(kbdev); + scheduler_wait_protm_quit(kbdev); +} + +/** * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up * automatically for periodic tasks. * @@ -607,7 +646,7 @@ static int halt_stream_sync(struct kbase_queue *queue) if (!remaining) { dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d", csi_index, group->handle, group->csg_nr); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); return -ETIMEDOUT; @@ -629,26 +668,14 @@ static int halt_stream_sync(struct kbase_queue *queue) (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) == CS_ACK_STATE_STOP), remaining); - /* Queues that have failed to stop in time shall raise a fatal error - * as their group would fail to suspend which could no longer be safely - * resumed. - */ if (!remaining) { - unsigned long flags; - dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d", queue->csi_index, group->handle, group->csg_nr); - spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); - kbase_csf_add_queue_fatal_error( - queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, 0); - spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, - flags); - /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU * will be reset as a work-around. */ - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); } return (remaining) ? 0 : -ETIMEDOUT; @@ -722,27 +749,6 @@ static int sched_halt_stream(struct kbase_queue *queue) } } retry: - /* First wait for the group to reach a stable state. IDLE state is - * an intermediate state that is only set by Scheduler at the start - * of a tick (prior to scanout) for groups that received idle - * notification, then later the idle group is moved to one of the - * suspended states or the runnable state. - */ - while (group->run_state == KBASE_CSF_GROUP_IDLE) { - mutex_unlock(&scheduler->lock); - remaining = wait_event_timeout(kbdev->csf.event_wait, - group->run_state != - KBASE_CSF_GROUP_IDLE, - kbdev->csf.fw_timeout_ms); - mutex_lock(&scheduler->lock); - if (!remaining) { - dev_warn(kbdev->dev, - "Timed out waiting for state change of Group-%d when stopping a queue on csi %d", - group->handle, queue->csi_index); - } - } - - WARN_ON(group->run_state == KBASE_CSF_GROUP_IDLE); /* Update the group state so that it can get scheduled soon */ update_idle_suspended_group_state(group); @@ -1559,7 +1565,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend( lockdep_assert_held(&scheduler->lock); - if (scheduler->state == SCHED_BUSY || scheduler->apply_async_protm) { + if (scheduler->state == SCHED_BUSY) { /* active phase or, async entering the protected mode */ if (group->prepared_seq_num >= scheduler->non_idle_scanout_grps) { @@ -1731,7 +1737,6 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) /* The csg does not need cleanup other than drop its AS */ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); - WARN_ON(kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_NOT_PEND); kbase_ctx_sched_release_ctx(kctx); if (unlikely(group->faulted)) as_fault = true; @@ -1779,11 +1784,12 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; ginfo = &kbdev->csf.global_iface.groups[slot]; + /* CSGs remaining on-slot can be either idle or runnable. + * This also applies in protected mode. + */ WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || (group->run_state == KBASE_CSF_GROUP_IDLE))); - group->run_state = KBASE_CSF_GROUP_RUNNABLE; - /* Update consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); @@ -1858,12 +1864,11 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_ctx_sched_retain_ctx(kctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_mmu_deferred_flush_invalidate(kctx); mutex_unlock(&kbdev->mmu_hw_mutex); if (kctx->as_nr == KBASEP_AS_NR_INVALID) { - dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", - group->handle, kctx->tgid, kctx->id, slot); + dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", + group->handle, kctx->tgid, kctx->id, slot); return; } @@ -1896,6 +1901,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, tiler_mask & U32_MAX); + ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); @@ -2043,7 +2049,7 @@ static int term_group_sync(struct kbase_queue_group *group) if (!remaining) { dev_warn(kbdev->dev, "term request timed out for group %d on slot %d", group->handle, group->csg_nr); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); err = -ETIMEDOUT; } @@ -2112,9 +2118,10 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&kctx->csf.lock); - lockdep_assert_held(&kbdev->csf.scheduler.lock); + lockdep_assert_held(&scheduler->lock); KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) @@ -2125,8 +2132,39 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) update_idle_suspended_group_state(group); - else + else { + struct kbase_queue_group *protm_grp; + unsigned long flags; + + WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( + group)); + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + + /* A normal mode CSG could be idle onslot during + * protected mode. In this case clear the + * appropriate bit in csg_slots_idle_mask. + */ + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + if (protm_grp && protm_grp != group) + clear_bit((unsigned int)group->csg_nr, + scheduler->csg_slots_idle_mask); + spin_unlock_irqrestore(&scheduler->interrupt_lock, + flags); + + /* If GPU is in protected mode then any doorbells rang + * would have no effect. Check if GPU is in protected + * mode and if this group has higher priority than the + * active protected mode group. If so prompt the FW + * to exit protected mode. + */ + if (protm_grp && + group->scan_seq_num < protm_grp->scan_seq_num) { + /* Prompt the FW to exit protected mode */ + scheduler_force_protm_exit(kbdev); + } + } } else if (!queue_group_scheduled_locked(group)) { insert_group_to_runnable(&kbdev->csf.scheduler, group, KBASE_CSF_GROUP_RUNNABLE); @@ -2511,7 +2549,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) */ dev_warn( kbdev->dev, - "Group %p on slot %u failed to suspend\n", + "Group %pK on slot %u failed to suspend\n", (void *)group, i); /* The group has failed suspension, stop @@ -2541,11 +2579,13 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) if (WARN_ON(i == num_groups)) break; program_vacant_csg_slot(kbdev, (s8)i); - if (WARN_ON(!csg_slot_in_use(kbdev, (int)i))) + if (!csg_slot_in_use(kbdev, (int)i)) { + dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); break; + } } } else { - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); } } @@ -2611,7 +2651,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n", num_groups, slot_mask); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); break; } @@ -3287,7 +3327,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) continue; if (WARN_ON(!group)) continue; - if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE)) + if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && + group->run_state != KBASE_CSF_GROUP_IDLE)) continue; if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) continue; @@ -3295,7 +3336,8 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) if (group_on_slot_is_idle(kbdev, i)) { group->run_state = KBASE_CSF_GROUP_IDLE; set_bit(i, scheduler->csg_slots_idle_mask); - } + } else + group->run_state = KBASE_CSF_GROUP_RUNNABLE; } bitmap_or(scheduler->csg_slots_idle_mask, @@ -3381,7 +3423,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", kbdev->csf.global_iface.group_num, slot_mask); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); if (is_suspend) { @@ -3526,21 +3568,6 @@ static int scheduler_prepare(struct kbase_device *kbdev) return 0; } -static void scheduler_wait_protm_quit(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); - long remaining; - - lockdep_assert_held(&scheduler->lock); - - remaining = wait_event_timeout(kbdev->csf.event_wait, - !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); - - if (!remaining) - dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); -} - static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -3572,6 +3599,8 @@ static void schedule_actions(struct kbase_device *kbdev) unsigned long flags; struct kbase_queue_group *protm_grp; int ret; + bool skip_idle_slots_update; + bool new_protm_top_grp = false; kbase_reset_gpu_assert_prevented(kbdev); lockdep_assert_held(&scheduler->lock); @@ -3582,7 +3611,14 @@ static void schedule_actions(struct kbase_device *kbdev) return; } - scheduler_handle_idle_slots(kbdev); + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* Skip updating on-slot idle CSGs if GPU is in protected mode. */ + if (!skip_idle_slots_update) + scheduler_handle_idle_slots(kbdev); + scheduler_prepare(kbdev); spin_lock_irqsave(&scheduler->interrupt_lock, flags); protm_grp = scheduler->active_protm_grp; @@ -3613,12 +3649,12 @@ static void schedule_actions(struct kbase_device *kbdev) scheduler->top_grp->kctx->tgid, scheduler->top_grp->kctx->id); - /* Due to GPUCORE-24491 only the top-group is allowed - * to be on slot and all other on slot groups have to - * be suspended before entering protected mode. - * This would change in GPUCORE-24492. + /* When entering protected mode all CSG slots can be occupied + * but only the protected mode CSG will be running. Any event + * that would trigger the execution of an on-slot idle CSG will + * need to be handled by the host during protected mode. */ - scheduler->num_csg_slots_for_tick = 1; + new_protm_top_grp = true; } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); @@ -3635,12 +3671,12 @@ static void schedule_actions(struct kbase_device *kbdev) * locked in the secure mode. */ if (protm_grp) - scheduler_wait_protm_quit(kbdev); + scheduler_force_protm_exit(kbdev); wait_csg_slots_start(kbdev); wait_csg_slots_finish_prio_update(kbdev); - if (scheduler->num_csg_slots_for_tick == 1) { + if (new_protm_top_grp) { scheduler_group_check_protm_enter(kbdev, scheduler->top_grp); } @@ -3913,8 +3949,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev) WARN_ON(!kbase_reset_gpu_is_active(kbdev)); KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u); - if (!kbase_csf_scheduler_protected_mode_in_use(kbdev) && - !suspend_active_queue_groups_on_reset(kbdev)) { + if (!suspend_active_queue_groups_on_reset(kbdev)) { /* As all groups have been successfully evicted from the CSG * slots, clear out thee scheduler data fields and return */ @@ -4002,21 +4037,14 @@ static void firmware_aliveness_monitor(struct work_struct *work) kbase_pm_wait_for_desired_state(kbdev); - err = kbase_csf_firmware_ping(kbdev); + err = kbase_csf_firmware_ping_wait(kbdev); if (err) { - /* FW not responding means hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); - /* It is acceptable to enqueue a reset whilst we've prevented * them, it will happen after we've allowed them again */ - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else if (get_nr_active_csgs(kbdev) == 1) { queue_delayed_work(system_long_wq, @@ -4132,7 +4160,9 @@ static bool group_sync_updated(struct kbase_queue_group *group) bool updated = false; int stream; - WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); + /* Groups can also be blocked on-slot during protected mode. */ + WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && + group->run_state != KBASE_CSF_GROUP_IDLE); for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { struct kbase_queue *const queue = group->bound_queues[stream]; @@ -4233,40 +4263,159 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - /* Check if the group is now eligible for execution in protected mode - * and accordingly undertake full scheduling actions as due to - * GPUCORE-24491 the on slot groups other than the top group have to - * be suspended first before entering protected mode. - */ - if (scheduler_get_protm_enter_async_group(kbdev, group)) { - scheduler->apply_async_protm = true; - schedule_actions(kbdev); - scheduler->apply_async_protm = false; - } + /* Check if the group is now eligible for execution in protected mode. */ + if (scheduler_get_protm_enter_async_group(kbdev, group)) + scheduler_group_check_protm_enter(kbdev, group); mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); } /** + * check_sync_update_for_idle_group_protm() - Check the sync wait condition + * for all the queues bound to + * the given group. + * + * @group: Pointer to the group that requires evaluation. + * + * This function is called if the GPU is in protected mode and there are on + * slot idle groups with higher priority than the active protected mode group. + * This function will evaluate the sync condition, if any, of all the queues + * bound to the given group. + * + * Return true if the sync condition of at least one queue has been satisfied. + */ +static bool check_sync_update_for_idle_group_protm( + struct kbase_queue_group *group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = + &kbdev->csf.scheduler; + bool sync_update_done = false; + int i; + + lockdep_assert_held(&scheduler->lock); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { + struct kbase_queue *queue = group->bound_queues[i]; + + if (queue && queue->enabled && !sync_update_done) { + struct kbase_csf_cmd_stream_group_info *const ginfo = + &kbdev->csf.global_iface.groups[group->csg_nr]; + struct kbase_csf_cmd_stream_info *const stream = + &ginfo->streams[queue->csi_index]; + u32 status = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT); + unsigned long flags; + + if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) + continue; + + /* Save the information of sync object of the command + * queue so the callback function, 'group_sync_updated' + * can evaluate the sync object when it gets updated + * later. + */ + queue->status_wait = status; + queue->sync_ptr = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_POINTER_LO); + queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; + queue->sync_value = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_VALUE); + + if (!evaluate_sync_update(queue)) + continue; + + /* Update csg_slots_idle_mask and group's run_state */ + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + clear_bit((unsigned int)group->csg_nr, + scheduler->csg_slots_idle_mask); + spin_unlock_irqrestore(&scheduler->interrupt_lock, + flags); + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + sync_update_done = true; + } + } + + return sync_update_done; +} + +/** + * check_sync_update_for_idle_groups_protm() - Check the sync wait condition + * for the idle groups on slot + * during protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function checks the gpu queues of all the idle groups on slot during + * protected mode that has a higher priority than the active protected mode + * group. + * + * Return true if the sync condition of at least one queue in a group has been + * satisfied. + */ +static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *protm_grp; + bool exit_protm = false; + unsigned long flags; + u32 num_groups; + u32 i; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + if (!protm_grp) + return exit_protm; + + num_groups = kbdev->csf.global_iface.group_num; + + for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { + struct kbase_csf_csg_slot *csg_slot = + &scheduler->csg_slots[i]; + struct kbase_queue_group *group = csg_slot->resident_group; + + if (group->scan_seq_num < protm_grp->scan_seq_num) { + /* If sync update has been performed for the group that + * has a higher priority than the protm group, then we + * need to exit protected mode. + */ + if (check_sync_update_for_idle_group_protm(group)) + exit_protm = true; + } + } + + return exit_protm; +} + +/** * check_group_sync_update_worker() - Check the sync wait condition for all the * blocked queue groups * * @work: Pointer to the context-specific work item for evaluating the wait * condition for all the queue groups in idle_wait_groups list. * - * This function checks the gpu queues of all the groups present in - * idle_wait_groups list of a context. If the sync wait condition - * for at least one queue bound to the group has been satisfied then - * the group is moved to the per context list of runnable groups so - * that Scheduler can consider scheduling the group in next tick. + * This function checks the gpu queues of all the groups present in both + * idle_wait_groups list of a context and all on slot idle groups (if GPU + * is in protected mode). + * If the sync wait condition for at least one queue bound to the group has + * been satisfied then the group is moved to the per context list of + * runnable groups so that Scheduler can consider scheduling the group + * in next tick or exit protected mode. */ static void check_group_sync_update_worker(struct work_struct *work) { struct kbase_context *const kctx = container_of(work, struct kbase_context, csf.sched.sync_update_work); - struct kbase_csf_scheduler *const scheduler = - &kctx->kbdev->csf.scheduler; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; mutex_lock(&scheduler->lock); @@ -4280,13 +4429,16 @@ static void check_group_sync_update_worker(struct work_struct *work) * groups list of the context. */ update_idle_suspended_group_state(group); - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); } } } else { WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); } + if (check_sync_update_for_idle_groups_protm(kbdev)) + scheduler_force_protm_exit(kbdev); + mutex_unlock(&scheduler->lock); } @@ -4402,7 +4554,6 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) scheduler->tock_pending_request = false; scheduler->active_protm_grp = NULL; scheduler->gpu_idle_fw_timer_enabled = false; - scheduler->apply_async_protm = false; scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h index 20d1bc9..1607ff6 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.h +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,7 +125,7 @@ struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue * group from the firmware. * - * @group: Pointer to the queue group to be scheduled. + * @group: Pointer to the queue group to be descheduled. * * This function would disable the scheduling of GPU command queue group on * firmware. @@ -174,7 +174,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx); int kbase_csf_scheduler_init(struct kbase_device *kbdev); /** - * kbase_csf_scheduler_context_init() - Terminate the context-specific part + * kbase_csf_scheduler_context_term() - Terminate the context-specific part * for CSF scheduler. * * @kctx: Pointer to kbase context that is being terminated. diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 0b4fb5a..9e4ed17 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -596,14 +596,14 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, if (likely(heap)) { err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count, new_chunk_ptr); - } - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( - kctx->kbdev, kctx->id, heap->heap_id, - PFN_UP(heap->chunk_size * heap->max_chunks), - PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks, - heap->chunk_size, heap->chunk_count, heap->target_in_flight, - nr_in_flight); + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( + kctx->kbdev, kctx->id, heap->heap_id, + PFN_UP(heap->chunk_size * heap->max_chunks), + PFN_UP(heap->chunk_size * heap->chunk_count), + heap->max_chunks, heap->chunk_size, heap->chunk_count, + heap->target_in_flight, nr_in_flight); + } mutex_unlock(&kctx->csf.tiler_heaps.lock); diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c index 7e9eb75..afcc90b 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c @@ -289,10 +289,6 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, trace_buffer->trace_enable_entry_count = entry[6]; trace_buffer->num_pages = trace_buffer_data[i].size; - /* Temporary workaround until handled by GPUCORE-27330 */ - if (!strcmp(trace_buffer_data[i].name, "timeline")) - trace_buffer->updatable = 0; - for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) { trace_buffer->trace_enable_init_mask[j] = trace_buffer_data[i].trace_enable_init_mask[j]; @@ -456,6 +452,7 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( dev_warn( kbdev->dev, "GPU reset already in progress when enabling firmware timeline."); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return -EAGAIN; } } diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index f657bcb..cb2c2e2 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -23,6 +23,7 @@ #include "../mali_kbase_device.h" #include <mali_kbase_hwaccess_backend.h> +#include <mali_kbase_hwcnt_backend_csf_if_fw.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_reset_gpu.h> #include <csf/mali_kbase_csf.h> @@ -170,6 +171,77 @@ static void kbase_backend_late_term(struct kbase_device *kbdev) kbase_hwaccess_pm_term(kbdev); } +/** + * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend + * firmware interface. + * @kbdev: Device pointer + */ +static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_csf_if_fw_create( + kbdev, &kbdev->hwcnt_backend_csf_if_fw); +} + +/** + * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend + * firmware interface. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); +} + +/** + * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend. + * @kbdev: Device pointer + */ + +static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_csf_create( + &kbdev->hwcnt_backend_csf_if_fw, + KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, + &kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_metadata_init - Initialize hardware counter + * metadata. + * @kbdev: Device pointer + */ +static int +kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev) +{ + /* For CSF GPUs, HWC metadata needs to query information from CSF + * firmware, so the initialization of HWC metadata only can be called + * after firmware initialized, but firmware initialization depends on + * HWC backend initialization, so we need to separate HWC backend + * metadata initialization from HWC backend initialization. + */ + return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_metadata_term - Terminate hardware counter + * metadata. + * @kbdev: Device pointer + */ +static void +kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); +} + static const struct kbase_device_init dev_init[] = { #ifdef CONFIG_MALI_NO_MALI {kbase_gpu_device_create, kbase_gpu_device_destroy, @@ -244,12 +316,10 @@ static const struct kbase_device_init dev_init[] = { * paragraph that starts with "Word of warning", currently the * second-last paragraph. */ - {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, + {kbase_sysfs_init, kbase_sysfs_term, + "SysFS group creation failed"}, {kbase_device_misc_register, kbase_device_misc_deregister, "Misc device registration failed"}, -#ifdef CONFIG_MALI_BUSLOG - {buslog_init, buslog_term, "Bus log client registration failed"}, -#endif {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed"}, #endif diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c index 4d11a82..259e42a 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c @@ -69,17 +69,9 @@ static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev) if (!as_valid || (as_nr == MCU_AS_NR)) { kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); - /* MCU bus fault could mean hardware counters will stop - * working. - * Put the backend into the unrecoverable error state to - * cause current and subsequent counter operations to - * immediately fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); - dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else { /* Handle Bus fault */ @@ -133,16 +125,8 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) } kbase_csf_scheduler_spin_unlock(kbdev, flags); - /* Protected fault means we're unlikely to have the counter - * operations we might do during reset acknowledged. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index 8052fba..9301310 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -21,6 +21,7 @@ #include "../mali_kbase_device_internal.h" #include "../mali_kbase_device.h" +#include "../mali_kbase_hwaccess_instr.h" #include <mali_kbase_config_defaults.h> #include <mali_kbase_hwaccess_backend.h> @@ -107,6 +108,7 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) return 0; fail_update_l2_features: + kbase_backend_devfreq_term(kbdev); fail_devfreq_init: kbase_job_slot_term(kbdev); fail_job_slot: @@ -144,6 +146,16 @@ static void kbase_backend_late_term(struct kbase_device *kbdev) kbase_hwaccess_pm_term(kbdev); } +static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); +} + +static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); +} + static const struct kbase_device_init dev_init[] = { #ifdef CONFIG_MALI_NO_MALI {kbase_gpu_device_create, kbase_gpu_device_destroy, @@ -183,6 +195,8 @@ static const struct kbase_device_init dev_init[] = { {kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, "Clock rate trace manager initialization failed"}, + {kbase_instr_backend_init, kbase_instr_backend_term, + "Instrumentation backend initialization failed"}, {kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term, "GPU hwcnt backend creation failed"}, @@ -215,9 +229,6 @@ static const struct kbase_device_init dev_init[] = { {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, {kbase_device_misc_register, kbase_device_misc_deregister, "Misc device registration failed"}, -#ifdef CONFIG_MALI_BUSLOG - {buslog_init, buslog_term, "Bus log client registration failed"}, -#endif {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed"}, #endif @@ -254,7 +265,8 @@ int kbase_device_init(struct kbase_device *kbdev) for (i = 0; i < ARRAY_SIZE(dev_init); i++) { err = dev_init[i].init(kbdev); if (err) { - dev_err(kbdev->dev, "%s error = %d\n", + if (err != -EPROBE_DEFER) + dev_err(kbdev->dev, "%s error = %d\n", dev_init[i].err_mes, err); kbase_device_term_partial(kbdev, i); break; diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index a90c8cd..5e900d0 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -40,9 +40,6 @@ #include <tl/mali_kbase_timeline.h> #include "mali_kbase_vinstr.h" -#if MALI_USE_CSF -#include <mali_kbase_hwcnt_backend_csf_if_fw.h> -#endif #include "mali_kbase_hwcnt_context.h" #include "mali_kbase_hwcnt_virtualizer.h" @@ -227,10 +224,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) if (err) goto dma_set_mask_failed; -#if !MALI_USE_CSF - spin_lock_init(&kbdev->hwcnt.lock); -#endif - err = kbase_ktrace_init(kbdev); if (err) goto term_as; @@ -241,20 +234,11 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) atomic_set(&kbdev->ctx_num, 0); -#if !MALI_USE_CSF - err = kbase_instr_backend_init(kbdev); - if (err) - goto term_trace; -#endif - kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - else - kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); mutex_init(&kbdev->kctx_list_lock); INIT_LIST_HEAD(&kbdev->kctx_list); @@ -263,11 +247,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) return 0; -#if !MALI_USE_CSF -term_trace: - kbase_ktrace_term(kbdev); -#endif - term_as: kbase_device_all_as_term(kbdev); dma_set_mask_failed: @@ -285,10 +264,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev) kbase_debug_assert_register_hook(NULL, NULL); #endif -#if !MALI_USE_CSF - kbase_instr_backend_term(kbdev); -#endif - kbase_ktrace_term(kbdev); kbase_device_all_as_term(kbdev); @@ -311,60 +286,6 @@ void kbase_increment_device_id(void) kbase_dev_nr++; } -#if MALI_USE_CSF - -int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_csf_if_fw_create( - kbdev, &kbdev->hwcnt_backend_csf_if_fw); -} - -void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); -} - -int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_csf_create( - &kbdev->hwcnt_backend_csf_if_fw, - KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, - &kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); -} - -int kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev) -{ - /* For CSF GPUs, HWC metadata needs to query informatoin from CSF - * firmware, so the initialization of HWC metadata only can be called - * after firmware initialised, but firmware initialization depends on - * HWC backend initialization, so we need to separate HWC backend - * metadata initialization from HWC backend initialization. - */ - return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); -} -#else - -int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); -} -#endif /* MALI_USE_CSF */ - int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) { return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, @@ -484,7 +405,14 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + err = kbase_arbiter_pm_install_interrupts(kbdev); + else + err = kbase_install_interrupts(kbdev); +#else err = kbase_install_interrupts(kbdev); +#endif if (err) goto fail_interrupts; diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h index 2705e67..067f33c 100644 --- a/mali_kbase/device/mali_kbase_device_internal.h +++ b/mali_kbase/device/mali_kbase_device_internal.h @@ -42,18 +42,6 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev); int kbase_device_timeline_init(struct kbase_device *kbdev); void kbase_device_timeline_term(struct kbase_device *kbdev); -#if MALI_USE_CSF -int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev); -int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev); -int kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev); -#else -int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev); -#endif - int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c index fa70afc..16eae0a 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ */ #include <mali_kbase.h> -#include "csf/mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> #include "../mali_kbase_gpu_fault.h" const char *kbase_gpu_exception_name(u32 const exception_code) diff --git a/mali_kbase/gpu/mali_kbase_gpu.h b/mali_kbase/gpu/mali_kbase_gpu.h deleted file mode 100644 index dba0e28..0000000 --- a/mali_kbase/gpu/mali_kbase_gpu.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_GPU_H_ -#define _KBASE_GPU_H_ - -#include "mali_kbase_gpu_regmap.h" -#include "mali_kbase_gpu_fault.h" -#include "mali_kbase_gpu_coherency.h" -#include "mali_kbase_gpu_id.h" - -#endif /* _KBASE_GPU_H_ */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h index b7a566f..05a229d 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h +++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,420 +22,12 @@ #ifndef _KBASE_GPU_REGMAP_H_ #define _KBASE_GPU_REGMAP_H_ -#include "mali_kbase_gpu_coherency.h" -#include "mali_kbase_gpu_id.h" -#if MALI_USE_CSF -#include "backend/mali_kbase_gpu_regmap_csf.h" -#else -#include "backend/mali_kbase_gpu_regmap_jm.h" -#endif - -/* Begin Register Offsets */ -/* GPU control registers */ - -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ -#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -#define MMU_FEATURES 0x014 /* (RO) MMU features */ -#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_MASK 0x028 /* (RW) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - -#define GPU_COMMAND 0x030 /* (WO) */ -#define GPU_STATUS 0x034 /* (RO) */ - -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ - -#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ - -#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ - -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core - * supergroup are l2 coherent - */ - -#define PWR_KEY 0x050 /* (WO) Power manager key register */ -#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ -#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ -#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ -#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ - -#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ - -#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ - -#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) - -#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ - -#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ - -#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ - -#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ - -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - -#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ - -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ - -#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ - -#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ - -#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ - -#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ - -#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ - -#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ - -#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ - -#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ - -#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ -#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) -#define ASN_HASH_COUNT 3 - -#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ - -#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ - -#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ - -#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ - -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ - -#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ -#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ -#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ - -/* Job control registers */ - -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - -#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ - -/* MMU control registers */ - -#define MEMORY_MANAGEMENT_BASE 0x2000 -#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) - -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ -#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ - -/* MMU address space control registers */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ -#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C - -/* End Register Offsets */ +#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h> /* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ #ifdef CONFIG_MALI_DEBUG +#undef GPU_IRQ_REG_ALL #define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) -#else /* CONFIG_MALI_DEBUG */ -#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) #endif /* CONFIG_MALI_DEBUG */ -/* - * MMU_IRQ_RAWSTAT register values. Values are valid also for - * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. - */ - -#define MMU_PAGE_FAULT_FLAGS 16 - -/* Macros returning a bitmask to retrieve page fault or bus error flags from - * MMU registers - */ -#define MMU_PAGE_FAULT(n) (1UL << (n)) -#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) - -/* - * Begin LPAE MMU TRANSTAB register values - */ -#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 -#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) -#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) -#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) -#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) -#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) - -#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 - -/* - * Begin AARCH64 MMU TRANSTAB register values - */ -#define MMU_HW_OUTA_BITS 40 -#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) - -/* - * Begin MMU STATUS register values - */ -#define AS_STATUS_AS_ACTIVE 0x01 - -#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) - -#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 - -#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) -#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) - -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) - -#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 -#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) -#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) - -/* - * Begin MMU TRANSCFG register values - */ -#define AS_TRANSCFG_ADRMODE_LEGACY 0 -#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 - -#define AS_TRANSCFG_ADRMODE_MASK 0xF - -/* - * Begin TRANSCFG register values - */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) -#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) -#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) - -/* - * Begin Command Values - */ - -/* AS_COMMAND register commands */ -#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -/* Flush all L2 caches then issue a flush region command to all MMUs - * (deprecated - only for use with T60x) - */ -#define AS_COMMAND_FLUSH 0x04 -/* Flush all L2 caches then issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH_PT 0x04 -/* Wait for memory accesses to complete, flush all the L1s cache then flush all - * L2 caches then issue a flush region command to all MMUs - */ -#define AS_COMMAND_FLUSH_MEM 0x05 - -/* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ - -/* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - -/* The performance counters are disabled. */ -#define PRFCNT_CONFIG_MODE_OFF 0 -/* The performance counters are enabled, but are only written out when a - * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. - */ -#define PRFCNT_CONFIG_MODE_MANUAL 1 -/* The performance counters are enabled, and are written out each time a tile - * finishes rendering. - */ -#define PRFCNT_CONFIG_MODE_TILE 2 - -/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */ -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_WRITE_ALLOC 0x8Dull - -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -/* There is no LPAE support for non-cacheable, since the memory type is always - * write-back. - * Marking this setting as reserved for LPAE - */ -#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED - -/* L2_MMU_CONFIG register */ -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) - -/* End L2_MMU_CONFIG register */ - -/* THREAD_* registers */ - -/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -#define IMPLEMENTATION_UNSPECIFIED 0 -#define IMPLEMENTATION_SILICON 1 -#define IMPLEMENTATION_FPGA 2 -#define IMPLEMENTATION_MODEL 3 - -/* Default values when registers are not supported by the implemented hardware */ -#define THREAD_MT_DEFAULT 256 -#define THREAD_MWS_DEFAULT 256 -#define THREAD_MBS_DEFAULT 256 -#define THREAD_MR_DEFAULT 1024 -#define THREAD_MTQ_DEFAULT 4 -#define THREAD_MTGS_DEFAULT 10 - -/* End THREAD_* registers */ - -/* SHADER_CONFIG register */ -#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -#define SC_TLS_HASH_ENABLE (1ul << 17) -#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -#define SC_VAR_ALGORITHM (1ul << 29) -/* End SHADER_CONFIG register */ - -/* TILER_CONFIG register */ -#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) -/* End TILER_CONFIG register */ - -/* L2_CONFIG register */ -#define L2_CONFIG_SIZE_SHIFT 16 -#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) -#define L2_CONFIG_HASH_SHIFT 24 -#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) -#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 -#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) -/* End L2_CONFIG register */ - -/* IDVS_GROUP register */ -#define IDVS_GROUP_SIZE_SHIFT (16) -#define IDVS_GROUP_MAX_SIZE (0x3F) - #endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c index d7648cd..00c0f60 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -23,7 +23,9 @@ #include "mali_kbase_ipa_counter_common_jm.h" #include "mali_kbase.h" - +#ifdef CONFIG_MALI_NO_MALI +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* Performance counter blocks base offsets */ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) @@ -94,10 +96,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, u32 counter_block_offset) { +#ifdef CONFIG_MALI_NO_MALI + const u32 sc_base = MEMSYS_BASE + + (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * + KBASE_IPA_NR_BYTES_PER_BLOCK); +#else const u32 sc_base = MEMSYS_BASE + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * KBASE_IPA_NR_BYTES_PER_BLOCK); - +#endif return sc_base + counter_block_offset; } diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h index 06adb36..e327536 100644 --- a/mali_kbase/jm/mali_kbase_jm_js.h +++ b/mali_kbase/jm/mali_kbase_jm_js.h @@ -657,7 +657,7 @@ static inline bool kbasep_js_is_submit_allowed( test_bit = (u16) (1u << kctx->as_nr); is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); - dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); return is_allowed; } @@ -684,7 +684,7 @@ static inline void kbasep_js_set_submit_allowed( set_bit = (u16) (1u << kctx->as_nr); - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed |= set_bit; @@ -715,7 +715,7 @@ static inline void kbasep_js_clear_submit_allowed( clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed &= clear_mask; diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h index 997cd49..183f0b0 100644 --- a/mali_kbase/jm/mali_kbase_js_defs.h +++ b/mali_kbase/jm/mali_kbase_js_defs.h @@ -171,7 +171,8 @@ enum { * Internal atom priority defines for kbase_jd_atom::sched_prio */ enum { - KBASE_JS_ATOM_SCHED_PRIO_REALTIME = 0, + KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, + KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, KBASE_JS_ATOM_SCHED_PRIO_HIGH, KBASE_JS_ATOM_SCHED_PRIO_MED, KBASE_JS_ATOM_SCHED_PRIO_LOW, diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index d6f31cf..bdc769f 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,7 +49,6 @@ enum base_hw_feature { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -139,7 +136,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -166,7 +162,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -193,7 +188,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END @@ -222,7 +216,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END @@ -250,7 +243,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END @@ -278,7 +270,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END @@ -306,7 +297,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -335,7 +325,6 @@ static const enum base_hw_feature base_hw_features_tBAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -364,7 +353,6 @@ static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, @@ -393,7 +381,6 @@ static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 0afabb1..a61eeb2 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index a78ff43..b6683b9 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -45,7 +45,7 @@ #include <linux/workqueue.h> #include <linux/interrupt.h> -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_linux.h> /* @@ -64,7 +64,7 @@ #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" #include "mali_kbase_gpuprops.h" -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #if !MALI_USE_CSF #include "mali_kbase_debug_job_fault.h" #include "mali_kbase_jd_debugfs.h" @@ -213,10 +213,6 @@ void registers_unmap(struct kbase_device *kbdev); int kbase_device_coherency_init(struct kbase_device *kbdev); -#ifdef CONFIG_MALI_BUSLOG -int buslog_init(struct kbase_device *kbdev); -void buslog_term(struct kbase_device *kbdev); -#endif #if !MALI_USE_CSF int kbase_jd_init(struct kbase_context *kctx); diff --git a/mali_kbase/mali_kbase_cache_policy.h b/mali_kbase/mali_kbase_cache_policy.h index 817710a..2cd3079 100644 --- a/mali_kbase/mali_kbase_cache_policy.h +++ b/mali_kbase/mali_kbase_cache_policy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2012-2013, 2015, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,7 @@ #define _KBASE_CACHE_POLICY_H_ #include "mali_kbase.h" -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> /** * kbase_cache_enabled - Choose the cache policy for a specific region diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index 4e5155a..96fcbcd 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -53,7 +53,7 @@ #include <mali_kbase_hwaccess_instr.h> #endif #include <mali_kbase_reset_gpu.h> -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #if !MALI_USE_CSF #include "mali_kbase_kinstr_jm.h" #endif @@ -1150,10 +1150,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, u64 flags; int err; - if (alias->in.nents == 0 || alias->in.nents > 2048) - return -EINVAL; - - if (alias->in.stride > (U64_MAX / 2048)) + if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) return -EINVAL; ai = vmalloc(sizeof(*ai) * alias->in.nents); @@ -1357,18 +1354,6 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, } #if MALI_UNIT_TEST -static int kbase_api_tlstream_test(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_test *test) -{ - kbase_timeline_test( - kctx->kbdev, - test->tpw_count, - test->msg_delay, - test->msg_count, - test->aux_msg); - - return 0; -} static int kbase_api_tlstream_stats(struct kbase_context *kctx, struct kbase_ioctl_tlstream_stats *stats) @@ -1508,14 +1493,11 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, } if (!err) { - param->out.total_stream_num = - kbase_csf_firmware_get_glb_iface(kctx->kbdev, - group_data, max_group_num, - stream_data, max_total_stream_num, - ¶m->out.glb_version, ¶m->out.features, - ¶m->out.group_num, ¶m->out.prfcnt_size); - - param->out.padding = 0; + param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( + kctx->kbdev, group_data, max_group_num, stream_data, + max_total_stream_num, ¶m->out.glb_version, + ¶m->out.features, ¶m->out.group_num, + ¶m->out.prfcnt_size, ¶m->out.instr_features); if (copy_to_user(user_groups, group_data, MIN(max_group_num, param->out.group_num) * @@ -1619,6 +1601,23 @@ static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, return ret; \ } while (0) +static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, + struct kbase_ioctl_set_limited_core_count *set_limited_core_count) +{ + const u64 shader_core_mask = + kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); + const u64 limited_core_mask = + ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + + if ((shader_core_mask & limited_core_mask) == 0) { + /* At least one shader core must be available after applying the mask */ + return -EINVAL; + } + + kctx->limited_core_mask = limited_core_mask; + return 0; +} + static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct kbase_file *const kfile = filp->private_data; @@ -1980,12 +1979,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; #endif /* MALI_USE_CSF */ #if MALI_UNIT_TEST - case KBASE_IOCTL_TLSTREAM_TEST: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, - kbase_api_tlstream_test, - struct kbase_ioctl_tlstream_test, - kctx); - break; case KBASE_IOCTL_TLSTREAM_STATS: KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, kbase_api_tlstream_stats, @@ -1999,6 +1992,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_context_priority_check, kctx); break; + case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, + kbasep_ioctl_set_limited_core_count, + struct kbase_ioctl_set_limited_core_count, + kctx); + break; } dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); @@ -2115,7 +2114,7 @@ static unsigned int kbase_poll(struct file *filp, poll_table *wait) void kbase_event_wakeup(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx); - dev_dbg(kctx->kbdev->dev, "Waking event queue for context %p\n", + dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", (void *)kctx); wake_up_interruptible(&kctx->event_queue); } @@ -3086,7 +3085,7 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G78" }, { .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TBAX" }, + .name = "Mali-G78AE" }, { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G68" }, { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, @@ -4094,21 +4093,28 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) { struct kbase_device *kbdev = container_of(data, struct kbase_device, protected_mode_hwcnt_disable_work); + spinlock_t *backend_lock; unsigned long flags; bool do_disable; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + backend_lock = &kbdev->csf.scheduler.interrupt_lock; +#else + backend_lock = &kbdev->hwaccess_lock; +#endif + + spin_lock_irqsave(backend_lock, flags); do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(backend_lock, flags); if (!do_disable) return; kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(backend_lock, flags); do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; @@ -4128,9 +4134,10 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(backend_lock, flags); } +#ifndef PLATFORM_PROTECTED_CALLBACKS static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) { struct kbase_device *kbdev = pdev->data; @@ -4150,7 +4157,6 @@ static const struct protected_mode_ops kbasep_native_protected_ops = { .protected_mode_disable = kbasep_protected_mode_disable }; -#ifndef PLATFORM_PROTECTED_CALLBACKS #define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops) #endif /* PLATFORM_PROTECTED_CALLBACKS */ @@ -4330,6 +4336,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev) u32 gpu_model_id; if (kbase_is_pv_enabled(kbdev->dev->of_node)) { + dev_info(kbdev->dev, "Arbitration interface enabled\n"); if (kbase_is_pm_enabled(kbdev->dev->of_node)) { /* Arbitration AND power management invalid */ dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); @@ -4353,7 +4360,8 @@ int kbase_device_pm_init(struct kbase_device *kbdev) gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); if (gpu_model_id != GPU_ID2_PRODUCT_TGOX - && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { + && gpu_model_id != GPU_ID2_PRODUCT_TNOX + && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { kbase_arbiter_pm_early_term(kbdev); dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); return -EPERM; @@ -4542,7 +4550,7 @@ void power_control_term(struct kbase_device *kbdev) static void trigger_reset(struct kbase_device *kbdev) { kbase_pm_context_active(kbdev); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); kbase_pm_context_idle(kbdev); } @@ -4570,7 +4578,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ MAKE_QUIRK_ACCESSORS(sc); MAKE_QUIRK_ACCESSORS(tiler); MAKE_QUIRK_ACCESSORS(mmu); -MAKE_QUIRK_ACCESSORS(jm); +MAKE_QUIRK_ACCESSORS(gpu); static ssize_t kbase_device_debugfs_reset_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) @@ -4691,7 +4699,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, NULL); if (!kbdev->mali_debugfs_directory) { - dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + dev_err(kbdev->dev, + "Couldn't create mali debugfs directory: %s\n", + kbdev->devname); err = -ENOMEM; goto out; } @@ -4746,9 +4756,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("quirks_mmu", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_mmu_quirks); - debugfs_create_file("quirks_jm", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_jm_quirks); + debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, + kbdev, &fops_gpu_quirks); debugfs_create_bool("infinite_cache", mode, debugfs_ctx_defaults_directory, @@ -4878,40 +4887,6 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) return 0; } -#ifdef CONFIG_MALI_BUSLOG - -/* Callback used by the kbase bus logger client, to initiate a GPU reset - * when the bus log is restarted. GPU reset is used as reference point - * in HW bus log analyses. - */ -static void kbase_logging_started_cb(void *data) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); -} - -int buslog_init(struct kbase_device *kbdev) -{ - int err = 0; - - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); - if (err == 0) - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); - - return err; -} - -void buslog_term(struct kbase_device *kbdev) -{ - bl_core_client_unregister(kbdev->buslogger); -} -#endif #if MALI_USE_CSF /** @@ -5222,7 +5197,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev) if (err) { if (err == -EPROBE_DEFER) - dev_err(kbdev->dev, "Device initialization Deferred\n"); + dev_info(kbdev->dev, + "Device initialization Deferred\n"); else dev_err(kbdev->dev, "Device initialization failed\n"); @@ -5448,7 +5424,6 @@ static struct platform_driver kbase_platform_driver = { .remove = kbase_platform_device_remove, .driver = { .name = kbase_drv_name, - .owner = THIS_MODULE, .pm = &kbase_pm_ops, .of_match_table = of_match_ptr(kbase_dt_ids), }, diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index f59a2d7..c63bc8d 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -365,8 +365,7 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) } #if MALI_USE_CSF -bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx, - bool sync) +bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) { struct kbase_device *kbdev; bool added_ref = false; @@ -383,20 +382,16 @@ bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx, mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - added_ref = kbase_ctx_sched_inc_refcount_nolock(kctx); - - WARN_ON(added_ref && - (kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_NOT_PEND)); - - if (!added_ref && (kctx->as_nr != KBASEP_AS_NR_INVALID)) { - enum kbase_ctx_mmu_flush_pending_state new_state = - sync ? KCTX_MMU_FLUSH_PEND_SYNC : - KCTX_MMU_FLUSH_PEND_NO_SYNC; + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && + (kctx == kbdev->as_to_kctx[kctx->as_nr])) { + atomic_inc(&kctx->refcount); - WARN_ON(kctx != kbdev->as_to_kctx[kctx->as_nr]); + if (kbdev->as_free & (1u << kctx->as_nr)) + kbdev->as_free &= ~(1u << kctx->as_nr); - if (kctx->mmu_flush_pend_state != KCTX_MMU_FLUSH_PEND_SYNC) - kctx->mmu_flush_pend_state = new_state; + KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, + kbase_ktrace_get_ctx_refcnt(kctx)); + added_ref = true; } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); diff --git a/mali_kbase/mali_kbase_ctx_sched.h b/mali_kbase/mali_kbase_ctx_sched.h index 1aa3762..cadb735 100644 --- a/mali_kbase/mali_kbase_ctx_sched.h +++ b/mali_kbase/mali_kbase_ctx_sched.h @@ -222,23 +222,20 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); #if MALI_USE_CSF /** - * kbase_ctx_sched_refcount_mmu_flush - Refcount the context for the MMU flush - * operation. + * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU + * address space slot assigned to it. * - * @kctx: Context to be refcounted. - * @sync: Flag passed to the caller function kbase_mmu_flush_invalidate(). + * @kctx: Context to be refcounted * - * This function takes a reference on the context for the MMU flush operation. - * The refcount is taken only if the context is busy/active. - * If the context isn't active but has a GPU address space slot assigned to it - * then a flag is set to indicate that MMU flush operation is pending, which - * will be performed when the context becomes active. + * This function takes a reference on the context if it has a GPU address space + * slot assigned to it. The address space slot will not be available for + * re-assignment until the reference is released. * * Return: true if refcount succeeded and the address space slot will not be - * reassigned, false if the refcount failed (because the context was inactive) + * reassigned, false if the refcount failed (because the address space slot + * was not assigned). */ -bool kbase_ctx_sched_refcount_mmu_flush(struct kbase_context *kctx, - bool sync); +bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx); #endif #endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c index 6902ded..7dfdff1 100644 --- a/mali_kbase/mali_kbase_debug_job_fault.c +++ b/mali_kbase/mali_kbase_debug_job_fault.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -549,6 +549,14 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) { WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + /* Return early if the job fault part of the kbase_device is not + * initialized yet. An error can happen during the device probe after + * the privileged Kbase context was created for the HW counter dumping + * but before the job fault part is initialized. + */ + if (!kctx->kbdev->job_fault_resume_workq) + return; + kbase_ctx_remove_pending_event(kctx); } diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index d813f2f..5b7591c 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -48,9 +48,6 @@ #include <linux/file.h> #include <linux/sizes.h> -#ifdef CONFIG_MALI_BUSLOG -#include <linux/bus_logger.h> -#endif #if defined(CONFIG_SYNC) #include <sync.h> @@ -554,7 +551,6 @@ struct kbase_mmu_mode { unsigned long flags; }; -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); #define DEVNAME_SIZE 16 @@ -624,8 +620,8 @@ struct kbase_process { * issues present in the GPU. * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW * issues present in the GPU. - * @hw_quirks_jm: Configuration to be used for the Job Manager as per - * the HW issues present in the GPU. + * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU + * subsystems as per the HW issues present in the GPU. * @entry: Links the device instance to the global list of GPU * devices. The list would have as many entries as there * are GPU device instances. @@ -710,6 +706,8 @@ struct kbase_process { * @nr_hw_address_spaces: Number of address spaces actually available in the * GPU, remains constant after driver initialisation. * @nr_user_address_spaces: Number of address spaces available to user contexts + * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance + * counters. * @hwcnt: Structure used for instrumentation and HW counters * dumping * @hwcnt.lock: The lock should be used when accessing any of the @@ -754,6 +752,8 @@ struct kbase_process { * including any contexts that might be created for * hardware counters. * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. + * @group_max_uid_in_devices: Max value of any queue group UID in any kernel + * context in the kbase device. * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed * to devfreq_add_device() to add devfreq feature to Mali * GPU device. @@ -918,7 +918,7 @@ struct kbase_device { u32 hw_quirks_sc; u32 hw_quirks_tiler; u32 hw_quirks_mmu; - u32 hw_quirks_jm; + u32 hw_quirks_gpu; struct list_head entry; struct device *dev; @@ -1016,6 +1016,7 @@ struct kbase_device { struct list_head kctx_list; struct mutex kctx_list_lock; + atomic_t group_max_uid_in_devices; #ifdef CONFIG_MALI_DEVFREQ struct devfreq_dev_profile devfreq_profile; @@ -1120,9 +1121,6 @@ struct kbase_device { struct work_struct protected_mode_hwcnt_disable_work; -#ifdef CONFIG_MALI_BUSLOG - struct bus_logger_client *buslogger; -#endif bool irq_reset_flush; @@ -1225,7 +1223,7 @@ struct kbase_file { unsigned long api_version; atomic_t setup_state; }; - +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * enum kbase_context_flags - Flags for kbase contexts * @@ -1285,6 +1283,9 @@ struct kbase_file { * refcount for the context drops to 0 or on when the address spaces are * re-enabled on GPU reset or power cycle. * + * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual + * address page limit, so we must take care to not exceed the physical limit + * * All members need to be separate bits. This enum is intended for use in a * bitmask where multiple values get OR-ed together. */ @@ -1305,38 +1306,90 @@ enum kbase_context_flags { KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, KCTX_AS_DISABLED_ON_FAULT = 1U << 15, -#if MALI_JIT_PRESSURE_LIMIT_BASE - /* - * Set when JIT physical page limit is less than JIT virtual address - * page limit, so we must take care to not exceed the physical limit - */ KCTX_JPL_ENABLED = 1U << 16, -#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */ }; - -#if MALI_USE_CSF +#else /** - * enum kbase_ctx_mmu_flush_pending_state - State for the pending mmu flush - * operation for a kbase context. + * enum kbase_context_flags - Flags for kbase contexts + * + * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit + * process on a 64-bit kernel. + * + * @KCTX_RUNNABLE_REF: Set when context is counted in + * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * + * @KCTX_ACTIVE: Set when the context is active. + * + * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this + * context. + * + * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been + * initialized. + * + * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new + * allocations. Existing allocations will not change. + * + * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. + * + * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept + * scheduled in. + * + * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. + * This is only ever updated whilst the jsctx_mutex is held. + * + * @KCTX_DYING: Set when the context process is in the process of being evicted. + * + * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this + * context, to disable use of implicit dma-buf fences. This is used to avoid + * potential synchronization deadlocks. + * + * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory + * allocations. For 64-bit clients it is enabled by default, and disabled by + * default on 32-bit clients. Being able to clear this flag is only used for + * testing purposes of the custom zone allocation on 64-bit user-space builds, + * where we also require more control than is available through e.g. the JIT + * allocation mechanism. However, the 64-bit user-space client must still + * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT + * + * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled + * from it for job slot 0. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled + * from it for job slot 1. This is reset when the context first goes active or + * is re-activated on that slot. * - * @KCTX_MMU_FLUSH_NOT_PEND: Set when there is no MMU flush operation pending - * for a kbase context or deferred flush operation - * is performed. + * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled + * from it for job slot 2. This is reset when the context first goes active or + * is re-activated on that slot. * - * @KCTX_MMU_FLUSH_PEND_NO_SYNC: Set when the MMU flush operation is deferred - * for a kbase context when it is inactive and - * the sync flag passed is 0. + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. * - * @KCTX_MMU_FLUSH_PEND_SYNC: Set when the MMU flush operation is deferred - * for a kbase context when it is inactive and - * the sync flag passed is 1. + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. */ -enum kbase_ctx_mmu_flush_pending_state { - KCTX_MMU_FLUSH_NOT_PEND, - KCTX_MMU_FLUSH_PEND_NO_SYNC, - KCTX_MMU_FLUSH_PEND_SYNC, +enum kbase_context_flags { + KCTX_COMPAT = 1U << 0, + KCTX_RUNNABLE_REF = 1U << 1, + KCTX_ACTIVE = 1U << 2, + KCTX_PULLED = 1U << 3, + KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, + KCTX_INFINITE_CACHE = 1U << 5, + KCTX_SUBMIT_DISABLED = 1U << 6, + KCTX_PRIVILEGED = 1U << 7, + KCTX_SCHEDULED = 1U << 8, + KCTX_DYING = 1U << 9, + KCTX_NO_IMPLICIT_SYNC = 1U << 10, + KCTX_FORCE_SAME_VA = 1U << 11, + KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, + KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, + KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, }; -#endif +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct kbase_sub_alloc { struct list_head link; @@ -1616,12 +1669,8 @@ struct kbase_reg_zone { * @kinstr_jm: Kernel job manager instrumentation context handle * @tl_kctx_list_node: List item into the device timeline's list of * contexts, for timeline summarization. - * @mmu_flush_pend_state: Tracks if the MMU flush operations are pending for the - * context. The flush required due to unmap is also - * tracked. It is supposed to be in - * KCTX_MMU_FLUSH_NOT_PEND state whilst a context is - * active and shall be updated with mmu_hw_mutex lock - * held. + * @limited_core_mask: The mask that is applied to the affinity in case of atoms + * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1769,9 +1818,7 @@ struct kbase_context { #endif struct list_head tl_kctx_list_node; -#if MALI_USE_CSF - enum kbase_ctx_mmu_flush_pending_state mmu_flush_pend_state; -#endif + u64 limited_core_mask; }; #ifdef CONFIG_MALI_CINSTR_GWT diff --git a/mali_kbase/mali_kbase_event.c b/mali_kbase/mali_kbase_event.c index 04687ee..25a379d 100644 --- a/mali_kbase/mali_kbase_event.c +++ b/mali_kbase/mali_kbase_event.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016,2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,7 +42,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); katom->status = KBASE_JD_ATOM_STATE_UNUSED; - dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom); wake_up(&katom->completed); return data; @@ -79,7 +79,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve mutex_unlock(&ctx->event_mutex); - dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); uevent->event_code = atom->event_code; uevent->atom_number = (atom - ctx->jctx.atoms); @@ -164,11 +164,11 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { struct kbase_device *kbdev = ctx->kbdev; - dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { dev_warn(kbdev->dev, - "%s: Atom %d (%p) not completed (status %d)\n", + "%s: Atom %d (%pK) not completed (status %d)\n", __func__, kbase_jd_atom_id(atom->kctx, atom), atom->kctx, diff --git a/mali_kbase/mali_kbase_gpu_memory_debugfs.c b/mali_kbase/mali_kbase_gpu_memory_debugfs.c index 45ce740..a10b2bb 100644 --- a/mali_kbase/mali_kbase_gpu_memory_debugfs.c +++ b/mali_kbase/mali_kbase_gpu_memory_debugfs.c @@ -56,7 +56,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) /* output the memory usage and cap for each kctx * opened on this device */ - seq_printf(sfile, " %s-0x%p %10u\n", + seq_printf(sfile, " %s-0x%pK %10u\n", "kctx", kctx, atomic_read(&(kctx->used_pages))); diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index 9da0b00..49f96f6 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -28,7 +28,7 @@ #include <mali_kbase_gpuprops.h> #include <mali_kbase_hwaccess_gpuprops.h> #include <mali_kbase_config_defaults.h> -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include <linux/clk.h> #include <mali_kbase_pm_internal.h> #include <linux/of_platform.h> @@ -104,6 +104,71 @@ static void kbase_gpuprops_construct_coherent_groups( } /** + * kbase_gpuprops_get_curr_config_props - Get the current allocated resources + * @kbdev: The &struct kbase_device structure for the device + * @curr_config: The &struct curr_config_props structure to receive the result + * + * Fill the &struct curr_config_props structure with values from the GPU + * configuration registers. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, + struct curr_config_props * const curr_config) +{ + struct kbase_current_config_regdump curr_config_regdump; + int err; + + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + /* If update not needed just return. */ + if (!curr_config->update_needed) + return 0; + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get_curr_config(kbdev, + &curr_config_regdump); + if (err) + return err; + + curr_config->l2_slices = + KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; + + curr_config->l2_present = + ((u64) curr_config_regdump.l2_present_hi << 32) + + curr_config_regdump.l2_present_lo; + + curr_config->shader_present = + ((u64) curr_config_regdump.shader_present_hi << 32) + + curr_config_regdump.shader_present_lo; + + curr_config->num_cores = hweight64(curr_config->shader_present); + + curr_config->update_needed = false; + + return 0; +} + +/** + * kbase_gpuprops_req_curr_config_update - Request Current Config Update + * @kbdev: The &struct kbase_device structure for the device + * + * Requests the current configuration to be updated next time the + * kbase_gpuprops_get_curr_config_props() is called. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) +{ + if (WARN_ON(!kbdev)) + return -EINVAL; + + kbdev->gpu_props.curr_config.update_needed = true; + return 0; +} + +/** * kbase_gpuprops_get_props - Get the GPU configuration * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device @@ -183,6 +248,59 @@ void kbase_gpuprops_update_core_props_gpu_id( } /** + * kbase_gpuprops_update_max_config_props - Updates the max config properties in + * the base_gpu_props. + * @base_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Updates the &struct base_gpu_props structure with the max config properties. + */ +static void kbase_gpuprops_update_max_config_props( + struct base_gpu_props * const base_props, struct kbase_device *kbdev) +{ + int l2_n = 0; + + if (WARN_ON(!kbdev) || WARN_ON(!base_props)) + return; + + /* return if the max_config is not set during arbif initialization */ + if (kbdev->gpu_props.max_config.core_mask == 0) + return; + + /* + * Set the base_props with the maximum config values to ensure that the + * user space will always be based on the maximum resources available. + */ + base_props->l2_props.num_l2_slices = + kbdev->gpu_props.max_config.l2_slices; + base_props->raw_props.shader_present = + kbdev->gpu_props.max_config.core_mask; + /* + * Update l2_present in the raw data to be consistent with the + * max_config.l2_slices number. + */ + base_props->raw_props.l2_present = 0; + for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { + base_props->raw_props.l2_present <<= 1; + base_props->raw_props.l2_present |= 0x1; + } + /* + * Update the coherency_info data using just one core group. For + * architectures where the max_config is provided by the arbiter it is + * not necessary to split the shader core groups in different coherent + * groups. + */ + base_props->coherency_info.coherency = + base_props->raw_props.mem_features; + base_props->coherency_info.num_core_groups = 1; + base_props->coherency_info.num_groups = 1; + base_props->coherency_info.group[0].core_mask = + kbdev->gpu_props.max_config.core_mask; + base_props->coherency_info.group[0].num_cores = + hweight32(kbdev->gpu_props.max_config.core_mask); +} + +/** * kbase_gpuprops_calculate_props - Calculate the derived properties * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device @@ -297,8 +415,30 @@ static void kbase_gpuprops_calculate_props( gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; } - /* Initialize the coherent_group structure for each group */ - kbase_gpuprops_construct_coherent_groups(gpu_props); + + /* + * If the maximum resources allocated information is available it is + * necessary to update the base_gpu_props with the max_config info to + * the userspace. This is applicable to systems that receive this + * information from the arbiter. + */ + if (kbdev->gpu_props.max_config.core_mask) + /* Update the max config properties in the base_gpu_props */ + kbase_gpuprops_update_max_config_props(gpu_props, + kbdev); + else + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, + const struct max_config_props *max_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!max_config)) + return; + + kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices; + kbdev->gpu_props.max_config.core_mask = max_config->core_mask; } void kbase_gpuprops_set(struct kbase_device *kbdev) @@ -306,7 +446,8 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) struct kbase_gpu_props *gpu_props; struct gpu_raw_gpu_props *raw; - KBASE_DEBUG_ASSERT(kbdev != NULL); + if (WARN_ON(!kbdev)) + return; gpu_props = &kbdev->gpu_props; raw = &gpu_props->props.raw_props; @@ -326,9 +467,19 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); gpu_props->num_cores = hweight64(raw->shader_present); - gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_core_groups = + gpu_props->props.coherency_info.num_core_groups; gpu_props->num_address_spaces = hweight32(raw->as_present); gpu_props->num_job_slots = hweight32(raw->js_present); + + /* + * Current configuration is used on HW interactions so that the maximum + * config is just used for user space avoiding interactions with parts + * of the hardware that might not be allocated to the kbase instance at + * that moment. + */ + kbase_gpuprops_req_curr_config_update(kbdev); + kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); } int kbase_gpuprops_set_features(struct kbase_device *kbdev) @@ -494,7 +645,10 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) goto exit; dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", - regdump.l2_features); + regdump.l2_features); + dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", + regdump.l2_config); + /* Update gpuprops with reflected L2_FEATURES */ gpu_props->raw_props.l2_features = regdump.l2_features; diff --git a/mali_kbase/mali_kbase_gpuprops.h b/mali_kbase/mali_kbase_gpuprops.h index 7c7b123..72f76c3 100644 --- a/mali_kbase/mali_kbase_gpuprops.h +++ b/mali_kbase/mali_kbase_gpuprops.h @@ -115,4 +115,38 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev); void kbase_gpuprops_update_core_props_gpu_id( struct base_gpu_props * const gpu_props); +/** + * kbase_gpuprops_set_max_config - Set the max config information + * @kbdev: Device pointer + * @max_config: Maximum configuration data to be updated + * + * This function sets max_config in the kbase_gpu_props. + */ +void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, + const struct max_config_props *max_config); + +/** + * kbase_gpuprops_get_curr_config_props - Get the current allocated resources + * @kbdev: The &struct kbase_device structure for the device + * @curr_config: The &struct curr_config_props structure to receive the result + * + * Fill the &struct curr_config_props structure with values from the GPU + * configuration registers. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, + struct curr_config_props * const curr_config); + +/** + * kbase_gpuprops_req_curr_config_update - Request Current Config Update + * @kbdev: The &struct kbase_device structure for the device + * + * Requests the current configuration to be updated next time the + * kbase_gpuprops_get_curr_config_props() is called. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); + #endif /* _KBASE_GPUPROPS_H_ */ diff --git a/mali_kbase/mali_kbase_gpuprops_types.h b/mali_kbase/mali_kbase_gpuprops_types.h index 8ecb54f..8b37b88 100644 --- a/mali_kbase/mali_kbase_gpuprops_types.h +++ b/mali_kbase/mali_kbase_gpuprops_types.h @@ -26,7 +26,7 @@ #ifndef _KBASE_GPUPROPS_TYPES_H_ #define _KBASE_GPUPROPS_TYPES_H_ -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #define KBASE_GPU_SPEED_MHZ 123 #define KBASE_GPU_PC_SIZE_LOG2 24U @@ -34,6 +34,7 @@ struct kbase_gpuprops_regdump { u32 gpu_id; u32 l2_features; + u32 l2_config; u32 core_features; u32 tiler_features; u32 mem_features; @@ -60,6 +61,28 @@ struct kbase_gpuprops_regdump { u32 gpu_features_hi; }; +/** + * struct kbase_current_config_regdump - Register dump for current resources + * allocated to the GPU. + * @mem_features: Memory system features. Contains information about the + * features of the memory system. Used here to get the L2 slice + * count. + * @shader_present_lo: Shader core present bitmap. Low word. + * @shader_present_hi: Shader core present bitmap. High word. + * @l2_present_lo: L2 cache present bitmap. Low word. + * @l2_present_hi: L2 cache present bitmap. High word. + * + * Register dump structure used to store the resgisters data realated to the + * current resources allocated to the GPU. + */ +struct kbase_current_config_regdump { + u32 mem_features; + u32 shader_present_lo; + u32 shader_present_hi; + u32 l2_present_lo; + u32 l2_present_hi; +}; + struct kbase_gpu_cache_props { u8 associativity; u8 external_bus_width; @@ -74,6 +97,50 @@ struct kbase_gpu_mmu_props { u8 pa_bits; }; +/** + * struct max_config_props - Properties based on the maximum resources + * available. + * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU + * during runtime. + * @padding: Padding to a multiple of 64 bits. + * @core_mask: Largest core mask bitmap that can be assigned to the GPU during + * runtime. + * + * Properties based on the maximum resources available (not necessarly + * allocated at that moment). Used to provide the maximum configuration to the + * userspace allowing the applications to allocate enough resources in case the + * real allocated resources change. + */ +struct max_config_props { + u8 l2_slices; + u8 padding[3]; + u32 core_mask; +}; + +/** + * struct curr_config_props - Properties based on the current resources + * allocated to the GPU. + * @l2_present: Current L2 present bitmap that is allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. + * @num_cores: Current number of shader cores allocated to the GPU. + * @l2_slices: Current number of L2 slices allocated to the GPU. + * @update_needed: Defines if it is necessary to re-read the registers to + * update the current allocated resources. + * @padding: Padding to a multiple of 64 bits. + * + * Properties based on the current resource available. Used for operations with + * hardware interactions to avoid using userspace data that can be based on + * the maximum resource available. + */ +struct curr_config_props { + u64 l2_present; + u64 shader_present; + u16 num_cores; + u8 l2_slices; + bool update_needed; + u8 padding[4]; +}; + struct kbase_gpu_props { /* kernel-only properties */ u8 num_cores; @@ -86,6 +153,12 @@ struct kbase_gpu_props { struct kbase_gpu_mem_props mem; struct kbase_gpu_mmu_props mmu; + /* Properties based on the current resource available */ + struct curr_config_props curr_config; + + /* Properties based on the maximum resource available */ + struct max_config_props max_config; + /* Properties shared with userspace */ struct base_gpu_props props; diff --git a/mali_kbase/mali_kbase_gwt.h b/mali_kbase/mali_kbase_gwt.h index f349d8f..32b0f5f 100644 --- a/mali_kbase/mali_kbase_gwt.h +++ b/mali_kbase/mali_kbase_gwt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2010-2017, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,7 @@ #define _KBASE_GWT_H #include <mali_kbase.h> -#include <mali_kbase_ioctl.h> +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> /** * kbase_gpu_gwt_start - Start the GPU write tracking diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index d2063bb..b1758d7 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -126,91 +126,91 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( }; static const struct base_hw_product base_hw_products[] = { - {GPU_ID2_PRODUCT_TMIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 1), - base_hw_issues_tMIx_r0p0_05dev0}, - {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, - {U32_MAX /* sentinel value */, NULL} } }, - - {GPU_ID2_PRODUCT_THEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, - {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TSIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDVX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TGOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TTRX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LBEX, - {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBAx_r0p0}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBAx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDUX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TODX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LODX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, - {U32_MAX, NULL} } }, + { GPU_ID2_PRODUCT_TMIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 1), + base_hw_issues_tMIx_r0p0_05dev0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, + { U32_MAX /* sentinel value */, NULL } } }, + + { GPU_ID2_PRODUCT_THEX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, + { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TSIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TDVX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TNOX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TGOX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TTRX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TNAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LBEX, + { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TBEX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TBAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TDUX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TODX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LODX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { U32_MAX, NULL } } }, }; u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; diff --git a/mali_kbase/mali_kbase_hwaccess_gpuprops.h b/mali_kbase/mali_kbase_hwaccess_gpuprops.h index 5e5f9dc..0fca83e 100644 --- a/mali_kbase/mali_kbase_hwaccess_gpuprops.h +++ b/mali_kbase/mali_kbase_hwaccess_gpuprops.h @@ -40,6 +40,23 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); /** + * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with + * relevant GPU properties read from + * the GPU registers. + * @kbdev: Device pointer. + * @curr_config_regdump: Pointer to struct kbase_current_config_regdump + * structure. + * + * The caller should ensure that GPU remains powered-on during this function and + * the caller must ensure this function returns success before using the values + * returned in the curr_config_regdump in any part of the kernel. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, + struct kbase_current_config_regdump *curr_config_regdump); + +/** * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read * from GPU * @kbdev: Device pointer diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c index c1bc7fc..4bc62c1 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,41 +127,31 @@ enum kbase_hwcnt_backend_csf_enable_state { * struct kbase_hwcnt_backend_csf_info - Information used to create an instance * of a CSF hardware counter backend. * @backend: Pointer to access CSF backend. - * @lock: Spinlock protecting backend and its internal - * states. * @fw_in_protected_mode: True if FW is running in protected mode, else * false. * @unrecoverable_error_happened: True if an recoverable error happened, else * false. - * @csf_if: CSF interface object pointer. Functions inside - * this interface MUST never be called while - * holding the spin lock, as that could cause - * deadlocks. + * @csf_if: CSF interface object pointer. * @ring_buf_cnt: Dump buffer count in the ring buffer. * @counter_set: The performance counter set to use. * @metadata: Hardware counter metadata. - * @dump_bytes: Bytes of GPU memory required to perform a - * hardware counter dump. - * @gpu_info: GPU information to initialise HWC dump memory - * layout. + * @prfcnt_info: Performance counter information. */ struct kbase_hwcnt_backend_csf_info { struct kbase_hwcnt_backend_csf *backend; - spinlock_t lock; bool fw_in_protected_mode; bool unrecoverable_error_happened; struct kbase_hwcnt_backend_csf_if *csf_if; u32 ring_buf_cnt; enum kbase_hwcnt_set counter_set; const struct kbase_hwcnt_metadata *metadata; - size_t dump_bytes; - struct kbase_hwcnt_gpu_info gpu_info; + struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; }; /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout * information. - * @fe_cnt: FroneEnd block count. + * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. @@ -207,7 +197,7 @@ struct kbase_hwcnt_csf_physical_layout { * count for sample period. * @phys_layout: Physical memory layout information of HWC * sample buffer. - * @dump_completed: Completion signalled by the dump worker when + * @dump_completed: Completion signaled by the dump worker when * it is completed accumulating up to the * insert_index_to_accumulate. * Should be initialized to the "complete" state. @@ -242,7 +232,7 @@ bool kbasep_hwcnt_backend_csf_backend_exists( struct kbase_hwcnt_backend_csf_info *csf_info) { WARN_ON(!csf_info); - lockdep_assert_held(&csf_info->lock); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); return (csf_info->backend != NULL); } @@ -280,6 +270,9 @@ kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->get_gpu_cycle_count( backend_csf->info->csf_if->ctx, cycle_counts, backend_csf->clk_enable_map); @@ -310,10 +303,9 @@ kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) } /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to - * guarantee the header is - * enabled, the header will be - * used when do the samples - * delta calculation. + * guarantee headers are + * enabled if any counter is + * required. *@phys_enable_map: HWC physical enable map to be processed. */ static void kbasep_hwcnt_backend_csf_process_enable_map( @@ -338,21 +330,21 @@ static void kbasep_hwcnt_backend_csf_process_enable_map( } static void kbasep_hwcnt_backend_csf_init_layout( - const struct kbase_hwcnt_gpu_info *gpu_info, + const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, struct kbase_hwcnt_csf_physical_layout *phys_layout) { - WARN_ON(!gpu_info); + WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); phys_layout->fe_cnt = 1; phys_layout->tiler_cnt = 1; - phys_layout->mmu_l2_cnt = gpu_info->l2_count; - phys_layout->shader_cnt = fls64(gpu_info->core_mask); + phys_layout->mmu_l2_cnt = prfcnt_info->l2_count; + phys_layout->shader_cnt = fls64(prfcnt_info->core_mask); phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt + phys_layout->mmu_l2_cnt + phys_layout->shader_cnt; - phys_layout->shader_avail_mask = gpu_info->core_mask; + phys_layout->shader_avail_mask = prfcnt_info->core_mask; phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; phys_layout->counters_per_block = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; @@ -363,9 +355,12 @@ static void kbasep_hwcnt_backend_csf_init_layout( static void kbasep_hwcnt_backend_csf_reset_internal_buffers( struct kbase_hwcnt_backend_csf *backend_csf) { - memset(backend_csf->to_user_buf, 0, backend_csf->info->dump_bytes); - memset(backend_csf->accum_buf, 0, backend_csf->info->dump_bytes); - memset(backend_csf->old_sample_buf, 0, backend_csf->info->dump_bytes); + memset(backend_csf->to_user_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->accum_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->old_sample_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); } static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( @@ -389,12 +384,12 @@ static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( u32 idx; u32 *sample; char *cpu_dump_base; + size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { - sample = (u32 *)&cpu_dump_base[idx * - backend_csf->info->dump_bytes]; + sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( backend_csf, sample); } @@ -405,19 +400,20 @@ static void kbasep_hwcnt_backend_csf_update_user_sample( { /* Copy the data into the sample and wait for the user to get it. */ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, - backend_csf->info->dump_bytes); + backend_csf->info->prfcnt_info.dump_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or * threshold. */ - memset(backend_csf->accum_buf, 0, backend_csf->info->dump_bytes); + memset(backend_csf->accum_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); } static void kbasep_hwcnt_backend_csf_accumulate_sample( const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf, - const u32 *new_sample_buf) + const u32 *new_sample_buf, bool clearing_samples) { size_t block_idx, ctr_idx; const u32 *old_block = old_sample_buf; @@ -425,6 +421,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( u32 *acc_block = accum_buf; for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + const u32 old_enable_mask = + old_block[phys_layout->offset_enable_mask]; const u32 new_enable_mask = new_block[phys_layout->offset_enable_mask]; @@ -442,11 +440,63 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( phys_layout->headers_per_block * KBASE_HWCNT_VALUE_BYTES); - /* Accumulate the counters. */ - for (ctr_idx = phys_layout->headers_per_block; - ctr_idx < phys_layout->values_per_block; - ctr_idx++) { - acc_block[ctr_idx] += new_block[ctr_idx]; + /* Accumulate counter samples + * + * When accumulating samples we need to take into + * account whether the counter sampling method involves + * clearing counters back to zero after each sample is + * taken. + * + * The intention for CSF was that all HW should use + * counters which wrap to zero when their maximum value + * is reached. This, combined with non-clearing + * sampling, enables multiple concurrent users to + * request samples without interfering with each other. + * + * However some early HW may not support wrapping + * counters, for these GPUs counters must be cleared on + * sample to avoid loss of data due to counters + * saturating at their maximum value. + */ + if (!clearing_samples) { + if (old_enable_mask == 0) { + /* Hardware block was previously + * unavailable. Accumulate the new + * counters only, as we know previous + * values are zeroes. + */ + for (ctr_idx = + phys_layout + ->headers_per_block; + ctr_idx < + phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx]; + } + } else { + /* Hardware block was previously + * available. Accumulate the delta + * between old and new counter values. + */ + for (ctr_idx = + phys_layout + ->headers_per_block; + ctr_idx < + phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx] - + old_block[ctr_idx]; + } + } + } else { + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx]; + } } } old_block += phys_layout->values_per_block; @@ -467,9 +517,11 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( u32 insert_index_to_stop) { u32 raw_idx; + unsigned long flags; u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; - const size_t buf_dump_bytes = backend_csf->info->dump_bytes; + const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf; @@ -478,9 +530,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( return; /* Sync all the buffers to CPU side before read the data. */ - backend_csf->info->csf_if->ring_buf_sync( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, - extract_index_to_start, (insert_index_to_stop - 1), true); + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, + extract_index_to_start, + insert_index_to_stop, true); /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; @@ -495,7 +548,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( kbasep_hwcnt_backend_csf_accumulate_sample( &backend_csf->phys_layout, buf_dump_bytes, - backend_csf->accum_buf, old_sample_buf, new_sample_buf); + backend_csf->accum_buf, old_sample_buf, new_sample_buf, + clearing_samples); old_sample_buf = new_sample_buf; } @@ -514,23 +568,28 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( } /* Sync zeroed buffers to avoid coherency issues on future use. */ - backend_csf->info->csf_if->ring_buf_sync( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, - extract_index_to_start, (insert_index_to_stop - 1), false); + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, + extract_index_to_start, + insert_index_to_stop, false); /* After consuming all samples between extract_idx and insert_idx, * set the raw extract index to insert_idx so that the sample buffers * can be released back to the ring buffer pool. */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->csf_if->set_extract_index( backend_csf->info->csf_if->ctx, insert_index_to_stop); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); } static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( struct kbase_hwcnt_backend_csf *backend_csf, enum kbase_hwcnt_backend_csf_enable_state new_state) { - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); if (backend_csf->enable_state != new_state) { backend_csf->enable_state = new_state; @@ -558,21 +617,19 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) WARN_ON(!work); backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); - - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* The backend was disabled or had an error while the worker was being * launched. */ - if (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED && - backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -581,12 +638,14 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; insert_index_to_acc = backend_csf->insert_index_to_accumulate; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); /* Read the raw extract and insert indexes from the CSF interface. */ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, &insert_index); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + /* Accumulate up to the insert we grabbed at the prfcnt request * interrupt. */ @@ -599,19 +658,18 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); /* Dump done, set state back to COMPLETED for next request. */ - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* The backend was disabled or had an error while we were accumulating. */ - if (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED && - backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -621,7 +679,8 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) /* Our work here is done - set the wait object and unblock waiters. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; complete_all(&backend_csf->dump_completed); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); } /** @@ -643,20 +702,21 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); /* Read the raw extract and insert indexes from the CSF interface. */ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, &insert_index); - spin_lock_irqsave(&backend_csf->info->lock, flags); - /* Assert the backend is not destroyed. */ - WARN_ON(backend_csf != backend_csf->info->backend); - /* The backend was disabled or had an error while the worker was being * launched. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -667,14 +727,19 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } - spin_unlock_irqrestore(&backend_csf->info->lock, flags); - - /* Accumulate everything we possibly can. We grabbed offsets before the - * spin lock, so we know it is not possible for a concurrent dump's - * insert_to_accumulate to exceed the insert we grabbed. + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* Accumulate everything we possibly can. We grabbed the insert index + * immediately after we acquired the lock but before we checked whether + * a concurrent dump was triggered. This ensures that if a concurrent + * dump was triggered between releasing the lock and now, we know for a + * fact that our insert will not exceed the concurrent dump's + * insert_to_accumulate, so we don't risk accumulating too much data. */ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); @@ -685,45 +750,31 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) static void kbase_hwcnt_backend_csf_submit_dump_worker( struct kbase_hwcnt_backend_csf_info *csf_info) { - unsigned long flags; u32 extract_index; - u32 insert_index; WARN_ON(!csf_info); - - csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, - &insert_index); - - spin_lock_irqsave(&csf_info->lock, flags); - - /* Make sure the backend exists and is in the correct state. - * A lot of things could have happened to it in the period before we - * acquired the lock. + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); + WARN_ON(csf_info->backend->enable_state != + KBASE_HWCNT_BACKEND_CSF_ENABLED); + WARN_ON(csf_info->backend->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); + + /* Save insert index now so that the dump worker only accumulates the + * HWC data associated with this request. Extract index is not stored + * as that needs to be checked when accumulating to prevent re-reading + * buffers that have already been read and returned to the GPU. */ - if (kbasep_hwcnt_backend_csf_backend_exists(csf_info) && - (csf_info->backend->enable_state == - KBASE_HWCNT_BACKEND_CSF_ENABLED || - csf_info->backend->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && - csf_info->backend->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT) { - csf_info->backend->insert_index_to_accumulate = insert_index; - csf_info->backend->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; - - /* Submit the accumulator task into the work queue. */ - while (true != queue_work(csf_info->backend->hwc_dump_workq, - &csf_info->backend->hwc_dump_work)) { - /* Spin until we have guaranteed the work has been - * submitted. - * Without this there is a potential race where a prior - * submission of the work may still technically be on - * the queue, even though all of its work is complete. - */ - } - } - - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->get_indexes( + csf_info->csf_if->ctx, &extract_index, + &csf_info->backend->insert_index_to_accumulate); + csf_info->backend->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; + + /* Submit the accumulator task into the work queue. */ + queue_work(csf_info->backend->hwc_dump_workq, + &csf_info->backend->hwc_dump_work); } static void kbasep_hwcnt_backend_csf_get_physical_enable( @@ -753,59 +804,36 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable( enable->clk_enable_map = enable_map->clk_enable_map; } -static int kbasep_hwcnt_backend_csf_dump_enable_impl( +/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_csf_dump_enable_nolock( struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_backend_csf_if_enable *out_enable) + const struct kbase_hwcnt_enable_map *enable_map) { - unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; struct kbase_hwcnt_backend_csf_if_enable enable; - WARN_ON(!out_enable); - if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) return -EINVAL; + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); - spin_lock_irqsave(&backend_csf->info->lock, flags); /* enable_state should be DISABLED before we transfer it to enabled */ - if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) return -EIO; - } backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; WARN_ON(!completion_done(&backend_csf->dump_completed)); kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); - *out_enable = enable; - return 0; -} - -/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -static int kbasep_hwcnt_backend_csf_dump_enable_nolock( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) -{ - int errcode; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; - struct kbase_hwcnt_backend_csf_if_enable enable; - - errcode = kbasep_hwcnt_backend_csf_dump_enable_impl(backend, enable_map, - &enable); - if (errcode) - return errcode; - - backend_csf->info->csf_if->dump_enable_nolock( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, &enable); + backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, &enable); kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); @@ -818,33 +846,33 @@ static int kbasep_hwcnt_backend_csf_dump_enable( const struct kbase_hwcnt_enable_map *enable_map) { int errcode; + unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; - struct kbase_hwcnt_backend_csf_if_enable enable; - - errcode = kbasep_hwcnt_backend_csf_dump_enable_impl(backend, enable_map, - &enable); - if (errcode) - return errcode; - - backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, - backend_csf->ring_buf, &enable); - kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); + if (!backend_csf) + return -EINVAL; - return 0; + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, + enable_map); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + return errcode; } static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) { - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, *lock_flags); wait_event( backend_csf->enable_state_waitq, @@ -853,7 +881,8 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); - spin_lock_irqsave(&backend_csf->info->lock, *lock_flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, + lock_flags); } } @@ -868,7 +897,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) WARN_ON(!backend_csf); - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Make sure we wait until any previous enable or disable have completed * before doing anything. @@ -882,7 +911,8 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) /* If we are already disabled or in an unrecoverable error * state, there is nothing for us to do. */ - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return; } @@ -901,7 +931,8 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); /* Block until any async work has completed. We have transitioned out of * the ENABLED state so we can guarantee no new work will concurrently @@ -909,23 +940,16 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) */ flush_workqueue(backend_csf->hwc_dump_workq); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + if (do_disable) backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); - spin_lock_irqsave(&backend_csf->info->lock, flags); - kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); switch (backend_csf->enable_state) { - case KBASE_HWCNT_BACKEND_CSF_DISABLED: - case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: - case KBASE_HWCNT_BACKEND_CSF_ENABLED: - case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: - case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: - WARN_ON(true); - break; case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); @@ -935,9 +959,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); break; + default: + WARN_ON(true); + break; } - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); /* After disable, zero the header of all buffers in the ring buffer back * to 0 to prepare for the next enable. @@ -947,7 +975,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) /* Sync zeroed buffers to avoid coherency issues on future use. */ backend_csf->info->csf_if->ring_buf_sync( backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, - (backend_csf->info->ring_buf_cnt - 1), false); + backend_csf->info->ring_buf_cnt, false); /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare * for next enable. @@ -968,12 +996,27 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, if (!backend_csf) return -EINVAL; - spin_lock_irqsave(&backend_csf->info->lock, flags); - /* Make sure we are enabled or becoming enabled. */ - if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) && - (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* If we're transitioning to enabled there's nothing to accumulate, and + * the user dump buffer is already zeroed. We can just short circuit to + * the DUMP_COMPLETED state. + */ + if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); + kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return 0; + } + + /* Otherwise, make sure we're already enabled. */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); return -EIO; } @@ -983,27 +1026,14 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); /* HWC is disabled or another dump is ongoing, or we are on * fault. */ return -EIO; } - /* If we are transitioning to enabled there is nothing to accumulate, - * and the user dump buffer is already zeroed. - * We can just short circuit to the DUMP_COMPLETED state. - */ - if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); - *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); - kbasep_hwcnt_backend_csf_cc_update(backend_csf); - return 0; - } - /* Reset the completion so dump_wait() has something to wait on. */ reinit_completion(&backend_csf->dump_completed); @@ -1022,7 +1052,6 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; } - spin_unlock_irqrestore(&backend_csf->info->lock, flags); /* CSF firmware might enter protected mode now, but still call request. * That is fine, as we changed state while holding the lock, so the @@ -1036,13 +1065,14 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); - if (do_request) { + if (do_request) backend_csf->info->csf_if->dump_request( backend_csf->info->csf_if->ctx); - } else { + else kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); - } + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); return 0; } @@ -1060,13 +1090,14 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) wait_for_completion(&backend_csf->dump_completed); - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Make sure the last dump actually succeeded. */ errcode = (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ? 0 : -EIO; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); return errcode; } @@ -1144,10 +1175,8 @@ kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) destroy_workqueue(backend_csf->hwc_dump_workq); - if (backend_csf->info->csf_if->ring_buf_free) { - backend_csf->info->csf_if->ring_buf_free( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf); - } + backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf); kfree(backend_csf->accum_buf); backend_csf->accum_buf = NULL; @@ -1183,18 +1212,21 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, goto alloc_error; backend_csf->info = csf_info; - kbasep_hwcnt_backend_csf_init_layout(&csf_info->gpu_info, + kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); - backend_csf->accum_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL); + backend_csf->accum_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->accum_buf) goto err_alloc_acc_buf; - backend_csf->old_sample_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL); + backend_csf->old_sample_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->old_sample_buf) goto err_alloc_pre_sample_buf; - backend_csf->to_user_buf = kzalloc(csf_info->dump_bytes, GFP_KERNEL); + backend_csf->to_user_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; @@ -1210,7 +1242,7 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, /* Sync zeroed buffers to avoid coherency issues on use. */ backend_csf->info->csf_if->ring_buf_sync( backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, - (backend_csf->info->ring_buf_cnt - 1), false); + backend_csf->info->ring_buf_cnt, false); init_completion(&backend_csf->dump_completed); @@ -1278,17 +1310,17 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, /* If it was not created before, attach it to csf_info. * Use spin lock to avoid concurrent initialization. */ - spin_lock_irqsave(&csf_info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); if (csf_info->backend == NULL) { csf_info->backend = backend_csf; *out_backend = (struct kbase_hwcnt_backend *)backend_csf; success = true; - if (csf_info->unrecoverable_error_happened) { + if (csf_info->unrecoverable_error_happened) backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; - } } - spin_unlock_irqrestore(&csf_info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); /* Destroy the new created backend if the backend has already created * before. In normal case, this won't happen if the client call init() @@ -1317,9 +1349,10 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) /* Set the backend in csf_info to NULL so we won't handle any external * notification anymore since we are terminating. */ - spin_lock_irqsave(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->backend = NULL; - spin_unlock_irqrestore(&backend_csf->info->lock, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); kbasep_hwcnt_backend_csf_destroy(backend_csf); } @@ -1370,8 +1403,6 @@ static int kbasep_hwcnt_backend_csf_info_create( if (!info) return -ENOMEM; - spin_lock_init(&info->lock); - #if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) info->counter_set = KBASE_HWCNT_SET_SECONDARY; #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) @@ -1405,11 +1436,12 @@ kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) } static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) + struct kbase_hwcnt_backend_csf *backend_csf) { bool do_disable = false; - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); /* We are already in or transitioning to the unrecoverable error state. * Early out. @@ -1451,18 +1483,16 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( * disabled, - we don't want to disable twice if an unrecoverable error * happens while we are disabling. */ - if (do_disable) { - spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags); + if (do_disable) backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); - spin_lock_irqsave(&backend_csf->info->lock, *lock_flags); - } } static void kbasep_hwcnt_backend_csf_handle_recoverable_error( - struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) + struct kbase_hwcnt_backend_csf *backend_csf) { - lockdep_assert_held(&backend_csf->info->lock); + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); switch (backend_csf->enable_state) { case KBASE_HWCNT_BACKEND_CSF_DISABLED: @@ -1478,8 +1508,8 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( /* A seemingly recoverable error that occurs while we are * transitioning to enabled is probably unrecoverable. */ - kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf, - lock_flags); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + backend_csf); return; case KBASE_HWCNT_BACKEND_CSF_ENABLED: /* Start transitioning to the disabled state. We can't wait for @@ -1496,14 +1526,8 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); - /* Unlock spin lock before we call csf_if disable(). */ - spin_unlock_irqrestore(&backend_csf->info->lock, *lock_flags); - backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); - - /* Lock spin lock again to match the spin lock pairs. */ - spin_lock_irqsave(&backend_csf->info->lock, *lock_flags); return; } } @@ -1511,44 +1535,27 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( void kbase_hwcnt_backend_csf_protm_entered( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; - struct kbase_hwcnt_backend_csf_info *csf_info; - struct kbase_hwcnt_backend_csf *backend_csf; - - csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + struct kbase_hwcnt_backend_csf_info *csf_info = + (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); csf_info->fw_in_protected_mode = true; - /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); - return; - } - - backend_csf = csf_info->backend; - /* If we are not in REQUESTED state, we don't need to do the dumping. */ - if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) { - spin_unlock_irqrestore(&csf_info->lock, flags); - return; - } - backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; - - spin_unlock_irqrestore(&csf_info->lock, flags); - kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); + /* Call on_prfcnt_sample() to trigger collection of the protected mode + * entry auto-sample if there is currently a pending dump request. + */ + kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); } void kbase_hwcnt_backend_csf_protm_exited( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); csf_info->fw_in_protected_mode = false; - spin_unlock_irqrestore(&csf_info->lock, flags); } void kbase_hwcnt_backend_csf_on_unrecoverable_error( @@ -1559,18 +1566,17 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error( csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); csf_info->unrecoverable_error_happened = true; /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); return; } - kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend, - &flags); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } void kbase_hwcnt_backend_csf_on_before_reset( @@ -1582,11 +1588,11 @@ void kbase_hwcnt_backend_csf_on_before_reset( csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - spin_lock_irqsave(&csf_info->lock, flags); + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); csf_info->unrecoverable_error_happened = false; /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); return; } backend_csf = csf_info->backend; @@ -1605,7 +1611,7 @@ void kbase_hwcnt_backend_csf_on_before_reset( * really matter, the power is being pulled. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend, &flags); + csf_info->backend); } /* A reset is the only way to exit the unrecoverable error state */ @@ -1615,81 +1621,66 @@ void kbase_hwcnt_backend_csf_on_before_reset( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); } - spin_unlock_irqrestore(&csf_info->lock, flags); + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_sample( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; /* If the current state is not REQUESTED, this HWC sample will be * skipped and processed in next dump_request. */ - if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) return; - } backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; - spin_unlock_irqrestore(&csf_info->lock, flags); kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); } void kbase_hwcnt_backend_csf_on_prfcnt_threshold( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; - if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) /* Submit the threshold work into the work queue to consume the * available samples. */ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); - } - - spin_unlock_irqrestore(&csf_info->lock, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_overflow( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } /* Called when an overflow occurs. We treat this as a recoverable error, * so we start transitioning to the disabled state. @@ -1698,27 +1689,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_overflow( * complex recovery code when we can just turn ourselves off instead for * a while. */ - kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend, - &flags); - - spin_unlock_irqrestore(&csf_info->lock, flags); + kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); } void kbase_hwcnt_backend_csf_on_prfcnt_enable( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; if (backend_csf->enable_state == @@ -1735,27 +1720,22 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable( * we reset. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend, &flags); + csf_info->backend); } - - spin_unlock_irqrestore(&csf_info->lock, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_disable( struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); - spin_lock_irqsave(&csf_info->lock, flags); /* Early out if the backend does not exist. */ - if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { - spin_unlock_irqrestore(&csf_info->lock, flags); + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; - } backend_csf = csf_info->backend; if (backend_csf->enable_state == @@ -1773,10 +1753,8 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable( * we reset. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend, &flags); + csf_info->backend); } - - spin_unlock_irqrestore(&csf_info->lock, flags); } int kbase_hwcnt_backend_csf_metadata_init( @@ -1784,28 +1762,29 @@ int kbase_hwcnt_backend_csf_metadata_init( { int errcode; struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_gpu_info gpu_info; if (!iface) return -EINVAL; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; - WARN_ON(!csf_info->csf_if->get_gpu_info); - csf_info->csf_if->get_gpu_info(csf_info->csf_if->ctx, - &csf_info->dump_bytes, - &csf_info->gpu_info.l2_count, - &csf_info->gpu_info.core_mask, - &csf_info->gpu_info.clk_cnt); + WARN_ON(!csf_info->csf_if->get_prfcnt_info); + + csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, + &csf_info->prfcnt_info); /* The clock domain counts should not exceed the number of maximum * number of clock regulators. */ - if (csf_info->gpu_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) + if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) return -EIO; - errcode = kbase_hwcnt_csf_metadata_create(&csf_info->gpu_info, - csf_info->counter_set, - &csf_info->metadata); + gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; + errcode = kbase_hwcnt_csf_metadata_create( + &gpu_info, csf_info->counter_set, &csf_info->metadata); if (errcode) return errcode; @@ -1813,7 +1792,8 @@ int kbase_hwcnt_backend_csf_metadata_init( * Dump abstraction size should be exactly the same size and layout as * the physical dump size, for backwards compatibility. */ - WARN_ON(csf_info->dump_bytes != csf_info->metadata->dump_buf_bytes); + WARN_ON(csf_info->prfcnt_info.dump_bytes != + csf_info->metadata->dump_buf_bytes); return 0; } @@ -1868,8 +1848,7 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, return 0; } -void kbase_hwcnt_backend_csf_destroy( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) { if (!iface) return; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/mali_kbase_hwcnt_backend_csf.h index 93938f0..7506274 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,7 +40,7 @@ * @iface: Non-NULL pointer to backend interface structure that is filled * in on creation success. * - * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. * * Return: 0 on success, else error code. */ @@ -77,7 +77,7 @@ void kbase_hwcnt_backend_csf_destroy( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_protm_entered() - CSf HWC backend function to receive + * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive * notification that protected mode * has been entered. * @iface: Non-NULL pointer to HWC backend interface. @@ -86,7 +86,7 @@ void kbase_hwcnt_backend_csf_protm_entered( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_protm_exited() - CSf HWC backend function to receive + * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive * notification that protected mode has * been exited. * @iface: Non-NULL pointer to HWC backend interface. @@ -95,22 +95,20 @@ void kbase_hwcnt_backend_csf_protm_exited( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSf HWC backend function - * to be called when an - * unrecoverable error - * occurs, such as the - * firmware has died or bus - * error, this puts us into - * the unrecoverable error - * state, which we can only - * get out of by a reset. + * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function + * called when unrecoverable + * errors are detected. * @iface: Non-NULL pointer to HWC backend interface. + * + * This should be called on encountering errors that can only be recovered from + * with reset, or that may put HWC logic in state that could result in hang. For + * example, on bus error, or when FW becomes unresponsive. */ void kbase_hwcnt_backend_csf_on_unrecoverable_error( struct kbase_hwcnt_backend_interface *iface); /** - * kbase_hwcnt_backend_csf_on_before_reset() - CSf HWC backend function to be + * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be * called immediately before a * reset. Takes us out of the * unrecoverable error state, if we diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h index e86d240..b4ddd31 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,19 +61,63 @@ struct kbase_hwcnt_backend_csf_if_enable { }; /** - * typedef kbase_hwcnt_backend_csf_if_get_gpu_info_fn - Get GPU information - * @ctx: Non-NULL pointer to a CSF context. - * @dump_size: Non-NULL pointer to where the dump size of performance counter - * sample is stored. - * @l2_count: Non-NULL pointer to where the MMU L2 cache count is stored. - * @core_mask: Non-NULL pointer to where shader core mask is stored. + * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter + * information. + * @dump_bytes: Bytes of GPU memory required to perform a performance + * counter dump. + * @l2_count: The MMU L2 cache count. + * @core_mask: Shader core mask. + * @clk_cnt: Clock domain count in the system. + * @clearing_samples: Indicates whether counters are cleared after each sample + * is taken. + */ +struct kbase_hwcnt_backend_csf_if_prfcnt_info { + size_t dump_bytes; + size_t l2_count; + u64 core_mask; + u8 clk_cnt; + bool clearing_samples; +}; + +/** + * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the + * backend spinlock is + * held. + * @ctx: Non-NULL pointer to a CSF context. + */ +typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. + * + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Pointer to the memory location that would store the previous + * interrupt state. + */ +typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. * - * @clk_cnt: Non-NULL pointer to where clock domain count in the system is - * stored. + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Previously stored interrupt state when Scheduler interrupt + * spinlock was acquired. */ -typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_info_fn)( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, size_t *dump_size, - size_t *l2_count, u64 *core_mask, u8 *clk_cnt); +typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance + * counter information. + * @ctx: Non-NULL pointer to a CSF context. + * @prfcnt_info: Non-NULL pointer to struct where performance counter + * information should be stored. + */ +typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer @@ -105,14 +149,13 @@ typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)( * inclusive. * @buf_index_last: The last buffer index in the ring buffer to be synced, * exclusive. - * @for_cpu: The direction of sync to be applied. - * It is set to true when CPU cache needs to be invalidated - * before reading the ring buffer contents. And set to false - * when CPU cache needs to be flushed after writing to the - * ring buffer. + * @for_cpu: The direction of sync to be applied, set to true when CPU + * cache needs invalidating before reading the buffer, and set + * to false after CPU writes to flush these before this memory + * is overwritten by the GPU. * - * After HWC sample request is done in GPU side, we need to sync the dump memory - * to CPU side to access the HWC data. + * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU + * are correctly observed. */ typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, @@ -147,25 +190,10 @@ typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)( * @ctx: Non-NULL pointer to a CSF interface context. * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. * @enable: Non-NULL pointer to the enable map of HWC. - */ -typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable); - -/** - * typedef kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn - Setup and enable - * hardware counter - * in CSF interface. - * @ctx: Non-NULL pointer to a CSF interface context. - * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. - * @enable: Non-NULL pointer to the enable map of HWC. * - * Exactly the same as kbase_hwcnt_backend_csf_if_dump_enable_fn(), except must - * be called in an atomic context with the spinlock documented by the specific - * backend interface held. + * Requires lock to be taken before calling. */ -typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn)( +typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, struct kbase_hwcnt_backend_csf_if_enable *enable); @@ -174,13 +202,18 @@ typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn)( * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter * in CSF interface. * @ctx: Non-NULL pointer to a CSF interface context. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. + * * @ctx: Non-NULL pointer to the interface context. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx); @@ -189,9 +222,12 @@ typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)( * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and * insert indexes of the * ring buffer. + * * @ctx: Non-NULL pointer to a CSF interface context. * @extract_index: Non-NULL pointer where current extract index to be saved. * @insert_index: Non-NULL pointer where current insert index to be saved. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, @@ -201,8 +237,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)( * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract * index of the ring * buffer. + * * @ctx: Non-NULL pointer to a CSF interface context. * @extract_index: New extract index to be set. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); @@ -213,9 +252,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)( * @ctx: Non-NULL pointer to a CSF interface context. * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, * the array size should be at least as big as the number of - * clock domains returned by get_gpu_info interface. + * clock domains returned by get_prfcnt_info interface. * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock * domain. + * + * Requires lock to be taken before calling. */ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, @@ -225,7 +266,11 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual * interface. * @ctx: CSF interface context. - * @get_gpu_info: Function ptr to get HWC related information. + * @assert_lock_held: Function ptr to assert backend spinlock is held. + * @lock: Function ptr to acquire backend spinlock. + * @unlock: Function ptr to release backend spinlock. + * @get_prfcnt_info: Function ptr to get performance counter related + * information. * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. * @ring_buf_sync: Function ptr to sync ring buffer to CPU. * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. @@ -243,13 +288,15 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( */ struct kbase_hwcnt_backend_csf_if { struct kbase_hwcnt_backend_csf_if_ctx *ctx; - kbase_hwcnt_backend_csf_if_get_gpu_info_fn get_gpu_info; + kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held; + kbase_hwcnt_backend_csf_if_lock_fn lock; + kbase_hwcnt_backend_csf_if_unlock_fn unlock; + kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info; kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc; kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync; kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free; kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns; kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable; - kbase_hwcnt_backend_csf_if_dump_enable_nolock_fn dump_enable_nolock; kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable; kbase_hwcnt_backend_csf_if_dump_request_fn dump_request; kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c index 7a3b239..67ca4cb 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #include <device/mali_kbase_device.h> #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" -#include "csf/mali_gpu_csf_registers.h" +#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> #include "csf/mali_kbase_csf_firmware.h" #include "mali_kbase_hwcnt_backend_csf_if_fw.h" @@ -88,6 +88,50 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx { struct kbase_ccswe ccswe_shader_cores; }; +static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( + struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); +} + +static void +kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock(kbdev, flags); +} + +static void kbasep_hwcnt_backend_csf_if_fw_unlock( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + /** * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback * @@ -170,16 +214,18 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( rtm, &fw_ctx->rate_listener); } -static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, size_t *dump_size, - size_t *l2_count, u64 *core_mask, u8 *clk_cnt) +static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) { #ifdef CONFIG_MALI_NO_MALI - *l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - *core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; - *dump_size = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * - KBASE_DUMMY_MODEL_BLOCK_SIZE; - *clk_cnt = 1; + prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + prfcnt_info->core_mask = + (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; + prfcnt_info->dump_bytes = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * + KBASE_DUMMY_MODEL_BLOCK_SIZE; + prfcnt_info->clk_cnt = 1; + prfcnt_info->clearing_samples = false; #else struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -188,10 +234,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info( u32 prfcnt_fw_size = 0; WARN_ON(!ctx); - WARN_ON(!dump_size); - WARN_ON(!l2_count); - WARN_ON(!core_mask); - WARN_ON(!clk_cnt); + WARN_ON(!prfcnt_info); fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; @@ -199,12 +242,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_info( prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; prfcnt_fw_size = (prfcnt_size >> 16) << 8; fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; - *dump_size = fw_ctx->buf_bytes; + prfcnt_info->dump_bytes = fw_ctx->buf_bytes; - *l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; - *core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; + prfcnt_info->core_mask = + kbdev->gpu_props.props.coherency_info.group[0].core_mask; - *clk_cnt = fw_ctx->clk_cnt; + prfcnt_info->clk_cnt = fw_ctx->clk_cnt; + prfcnt_info->clearing_samples = true; #endif } @@ -331,9 +376,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( WARN_ON(!ctx); WARN_ON(!ring_buf); - /* Get the buffer indexes in the ring buffer. */ + /* The index arguments for this function form an inclusive, exclusive + * range. + * However, when masking back to the available buffers we will make this + * inclusive at both ends so full flushes are not 0 -> 0. + */ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); - ring_buf_index_last = buf_index_last & (fw_ring_buf->buf_count - 1); + ring_buf_index_last = + (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); /* The start address is the offset of the first buffer. */ start_address = fw_ctx->buf_bytes * ring_buf_index_first; @@ -348,6 +398,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( /* sync the first part to the end of ring buffer. */ for (i = pg_first; i < fw_ring_buf->num_pages; i++) { struct page *pg = as_page(fw_ring_buf->phys[i]); + if (for_cpu) { kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), @@ -367,6 +418,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( for (i = pg_first; i <= pg_last; i++) { struct page *pg = as_page(fw_ring_buf->phys[i]); + if (for_cpu) { kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, @@ -420,12 +472,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( } } -static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock( +static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, struct kbase_hwcnt_backend_csf_if_enable *enable) { - unsigned long flags; u32 prfcnt_config; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; @@ -437,18 +488,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock( WARN_ON(!ctx); WARN_ON(!ring_buf); WARN_ON(!enable); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; - lockdep_assert_held(&kbdev->hwaccess_lock); - /* Configure */ prfcnt_config = fw_ring_buf->buf_count; prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; - kbase_csf_scheduler_spin_lock(kbdev, &flags); - /* Configure the ring buffer base address */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); @@ -503,52 +551,25 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock( prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); } -static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable) -{ - unsigned long flags; - struct kbase_device *kbdev; - struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = - (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; - - WARN_ON(!ctx); - WARN_ON(!ring_buf); - WARN_ON(!enable); - - kbdev = fw_ctx->kbdev; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock(ctx, ring_buf, - enable); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { - unsigned long flags; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Disable the HWC */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.hwcnt.enable_pending = true; kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); @@ -569,7 +590,6 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( * happens. */ kbdev->csf.hwcnt.request_pending = false; - kbase_csf_scheduler_spin_unlock(kbdev, flags); kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); } @@ -577,7 +597,6 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( static void kbasep_hwcnt_backend_csf_if_fw_dump_request( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { - unsigned long flags; u32 glb_req; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; @@ -585,57 +604,52 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request( (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Trigger dumping */ - kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.hwcnt.request_pending = true; glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, GLB_REQ_PRFCNT_SAMPLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); - kbase_csf_scheduler_spin_unlock(kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, u32 *insert_index) { - unsigned long flags; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); WARN_ON(!extract_index); WARN_ON(!insert_index); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); - kbase_csf_scheduler_spin_lock(fw_ctx->kbdev, &flags); *extract_index = kbase_csf_firmware_global_input_read( &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); *insert_index = kbase_csf_firmware_global_output( &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); - kbase_csf_scheduler_spin_unlock(fw_ctx->kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) { - unsigned long flags; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); /* Set the raw extract index to release the buffer back to the ring * buffer. */ - kbase_csf_scheduler_spin_lock(fw_ctx->kbdev, &flags); kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, extract_idx); - kbase_csf_scheduler_spin_unlock(fw_ctx->kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( @@ -649,6 +663,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( WARN_ON(!ctx); WARN_ON(!cycle_counts); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { if (!(clk_enable_map & (1ull << clk))) @@ -749,14 +764,16 @@ int kbase_hwcnt_backend_csf_if_fw_create( return errcode; if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; - if_fw->get_gpu_info = kbasep_hwcnt_backend_csf_if_fw_get_gpu_info; + if_fw->assert_lock_held = + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; + if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; + if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; + if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; - if_fw->dump_enable_nolock = - kbasep_hwcnt_backend_csf_if_fw_dump_enable_nolock; if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; if_fw->get_gpu_cycle_count = diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h index d72851e..f55efb6 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c index c6c672c..4168472 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c @@ -62,6 +62,8 @@ struct kbase_hwcnt_backend_jm_info { * @enabled: True if dumping has been enabled, else false. * @pm_core_mask: PM state sync-ed shaders core mask for the enabled * dumping. + * @curr_config: Current allocated hardware resources to correctly map the src + * raw dump buffer to the dst dump buffer. * @clk_enable_map: The enable map specifying enabled clock domains. * @cycle_count_elapsed: * Cycle count elapsed for a given sample period. @@ -81,6 +83,7 @@ struct kbase_hwcnt_backend_jm { struct kbase_vmap_struct *vmap; bool enabled; u64 pm_core_mask; + struct kbase_hwcnt_curr_config curr_config; u64 clk_enable_map; u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; @@ -89,15 +92,16 @@ struct kbase_hwcnt_backend_jm { }; /** - * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the - * hwcnt metadata. + * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used + * to create the hwcnt metadata. * @kbdev: Non-NULL pointer to kbase device. * @info: Non-NULL pointer to data structure to be filled in. * * The initialised info struct will only be valid for use while kbdev is valid. */ -static int kbase_hwcnt_gpu_info_init(struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info) +static int +kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) { size_t clk; @@ -240,6 +244,37 @@ static void kbasep_hwcnt_backend_jm_cc_disable( } +/** + * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with + * current config information. + * @kbdev: Non-NULL pointer to kbase device. + * @curr_config: Non-NULL pointer to return the current configuration of + * hardware allocated to the GPU. + * + * The current configuration information is used for architectures where the + * max_config interface is available from the Arbiter. In this case the current + * allocated hardware is not always the same, so the current config information + * is used to correctly map the current allocated resources to the memory layout + * that is copied to the user space. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_gpu_update_curr_config( + struct kbase_device *kbdev, + struct kbase_hwcnt_curr_config *curr_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + curr_config->num_l2_slices = + kbdev->gpu_props.curr_config.l2_slices; + curr_config->shader_present = + kbdev->gpu_props.curr_config.shader_present; + return 0; +} + /* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ static u64 kbasep_hwcnt_backend_jm_timestamp_ns( struct kbase_hwcnt_backend *backend) @@ -287,11 +322,18 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock( timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, + &backend_jm->curr_config); + if (errcode) + goto error; + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); if (errcode) goto error; backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + backend_jm->enabled = true; kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); @@ -372,7 +414,7 @@ static int kbasep_hwcnt_backend_jm_dump_request( size_t clk; int ret; - if (!backend_jm || !backend_jm->enabled) + if (!backend_jm || !backend_jm->enabled || !dump_time_ns) return -EINVAL; kbdev = backend_jm->kctx->kbdev; @@ -441,6 +483,11 @@ static int kbasep_hwcnt_backend_jm_dump_get( struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; size_t clk; +#ifdef CONFIG_MALI_NO_MALI + struct kbase_device *kbdev; + unsigned long flags; + int errcode; +#endif if (!backend_jm || !dst || !dst_enable_map || (backend_jm->info->metadata != dst->metadata) || @@ -460,9 +507,24 @@ static int kbasep_hwcnt_backend_jm_dump_get( dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; } +#ifdef CONFIG_MALI_NO_MALI + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, + &backend_jm->curr_config); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (errcode) + return errcode; +#endif + return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va, dst_enable_map, backend_jm->pm_core_mask, - accumulate); + &backend_jm->curr_config, accumulate); } /** @@ -684,7 +746,7 @@ static int kbasep_hwcnt_backend_jm_info_create( WARN_ON(!kbdev); WARN_ON(!out_info); - errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); + errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info); if (errcode) return errcode; diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c index 91d1f8c..4fba6b6 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/mali_kbase_hwcnt_gpu.c @@ -242,6 +242,13 @@ int kbase_hwcnt_jm_metadata_create( if (!gpu_info || !out_metadata || !out_dump_bytes) return -EINVAL; + /* + * For architectures where a max_config interface is available + * from the arbiter, the v5 dump bytes and the metadata v5 are + * based on the maximum possible allocation of the HW in the + * GPU cause it needs to be prepared for the worst case where + * all the available L2 cache and Shader cores are allocated. + */ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); errcode = kbasep_hwcnt_backend_gpu_metadata_create( gpu_info, false, counter_set, &metadata); @@ -260,8 +267,7 @@ int kbase_hwcnt_jm_metadata_create( return 0; } -void kbase_hwcnt_jm_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata) +void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { if (!metadata) return; @@ -318,15 +324,41 @@ static bool is_block_type_shader( return is_shader; } +static bool is_block_type_l2_cache( + const u64 grp_type, + const u64 blk_type) +{ + bool is_l2_cache = false; + + switch (grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2) + is_l2_cache = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + + return is_l2_cache; +} + int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - u64 pm_core_mask, bool accumulate) + u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; size_t src_offset, grp, blk, blk_inst; u64 core_mask = pm_core_mask; + /* Variables to deal with the current configuration */ + int l2_count = 0; + bool hw_res_available = true; + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; @@ -348,15 +380,43 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const bool is_shader_core = is_block_type_shader( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); + const bool is_l2_cache = is_block_type_l2_cache( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type); + + /* + * If l2 blocks is greater than the current allocated number of + * L2 slices, there is no hw allocated to that block. + */ + if (is_l2_cache) { + l2_count++; + if (l2_count > curr_config->num_l2_slices) + hw_res_available = false; + else + hw_res_available = true; + } + /* + * For the shader cores, the current shader_mask allocated is + * always a subgroup of the maximum shader_mask, so after + * jumping any L2 cache not available the available shader cores + * will always have a matching set of blk instances available to + * accumulate them. + */ + else { + hw_res_available = true; + } - /* Early out if no values in the dest block are enabled */ + /* + * Early out if no values in the dest block are enabled or if + * the resource target of the block is not available in the HW. + */ if (kbase_hwcnt_enable_map_block_enabled( dst_enable_map, grp, blk, blk_inst)) { u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u32 *src_blk = dump_src + src_offset; - if (!is_shader_core || (core_mask & 1)) { + if ((!is_shader_core || (core_mask & 1)) && hw_res_available) { if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, @@ -372,7 +432,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, } } - src_offset += (hdr_cnt + ctr_cnt); + /* Just increase the src_offset if the HW is available */ + if (hw_res_available) + src_offset += (hdr_cnt + ctr_cnt); if (is_shader_core) core_mask = core_mask >> 1; } @@ -380,10 +442,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, return 0; } -int kbase_hwcnt_csf_dump_get( - struct kbase_hwcnt_dump_buffer *dst, void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h index 4ebff2d..9b846a9 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/mali_kbase_hwcnt_gpu.h @@ -128,6 +128,50 @@ struct kbase_hwcnt_gpu_info { }; /** + * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the + * GPU. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. + * + * For architectures with the max_config interface available from the Arbiter, + * the current resources allocated may change during runtime due to a + * re-partitioning (possible with partition manager). Thus, the HWC needs to be + * prepared to report any possible set of counters. For this reason the memory + * layout in the userspace is based on the maximum possible allocation. On the + * other hand, each partition has just the view of its currently allocated + * resources. Therefore, it is necessary to correctly map the dumped HWC values + * from the registers into this maximum memory layout so that it can be exposed + * to the userspace side correctly. + * + * For L2 cache just the number is enough once the allocated ones will be + * accumulated on the first L2 slots available in the destination buffer. + * + * For the correct mapping of the shader cores it is necessary to jump all the + * L2 cache slots in the destination buffer that are not allocated. But, it is + * not necessary to add any logic to map the shader cores bitmap into the memory + * layout because the shader_present allocated will always be a subset of the + * maximum shader_present. It is possible because: + * 1 - Partitions are made of slices and they are always ordered from the ones + * with more shader cores to the ones with less. + * 2 - The shader cores in a slice are always contiguous. + * 3 - A partition can only have a contiguous set of slices allocated to it. + * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with + * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: + * 0x0011|0111|0111|1111 -> note the order and that the shader cores are + * contiguous in any slice. + * Supposing that a partition takes the two slices in the middle, the current + * config shader_present for this partition would be: + * 0x0111|0111 -> note that this is a subset of the maximum above and the slices + * are contiguous. + * Therefore, by directly copying any subset of the maximum possible + * shader_present the mapping is already achieved. + */ +struct kbase_hwcnt_curr_config { + size_t num_l2_slices; + u64 shader_present; +}; + +/** * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the * JM GPUs. * @info: Non-NULL pointer to info struct. @@ -186,6 +230,8 @@ void kbase_hwcnt_csf_metadata_destroy( * kbase_hwcnt_jm_metadata_create. * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @curr_config: Current allocated hardware resources to correctly map the + * src raw dump buffer to the dst dump buffer. * @accumulate: True if counters in src should be accumulated into dst, * rather than copied. * @@ -197,7 +243,9 @@ void kbase_hwcnt_csf_metadata_destroy( */ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - const u64 pm_core_mask, bool accumulate); + const u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate); /** * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw @@ -217,10 +265,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, * * Return: 0 on success, else error code. */ -int kbase_hwcnt_csf_dump_get( - struct kbase_hwcnt_dump_buffer *dst, void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate); +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate); /** * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c index bd523dd..e87dbbf 100644 --- a/mali_kbase/mali_kbase_hwcnt_legacy.c +++ b/mali_kbase/mali_kbase_hwcnt_legacy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,7 @@ #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" #include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include <linux/slab.h> #include <linux/uaccess.h> diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 1cf24a2..949c041 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -74,7 +74,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom) { katom->status = KBASE_JD_ATOM_STATE_COMPLETED; kbase_kinstr_jm_atom_complete(katom); - dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n", + dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", (void *)katom); } @@ -89,7 +89,7 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -99,23 +99,23 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(katom->kctx, katom)); jd_mark_atom_complete(katom); - return 0; + return false; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { kbase_finish_soft_job(katom); jd_mark_atom_complete(katom); - return 0; + return false; } if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); jd_mark_atom_complete(katom); } - return 0; + return false; } katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); /* Queue an action about whether we should try scheduling a context */ return kbasep_js_add_job(kctx, katom); } @@ -758,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_del(runnable_jobs.next); node->in_jd_list = false; - dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", node, node->status); KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -901,7 +901,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, unsigned long flags; enum kbase_jd_atom_state status; - dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom); /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. @@ -976,7 +976,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; dev_dbg(kbdev->dev, - "Atom %p status to completed\n", + "Atom %pK status to completed\n", (void *)katom); /* Wrong dependency setup. Atom will be sent @@ -1019,7 +1019,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* This atom will be sent back to user space. @@ -1062,7 +1062,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, */ katom->event_code = BASE_JD_EVENT_DONE; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); @@ -1199,7 +1199,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, bool need_to_try_schedule_context; katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); @@ -1270,7 +1270,7 @@ int kbase_jd_submit(struct kbase_context *kctx, if (unlikely(jd_atom_is_v2)) { if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { - dev_err(kbdev->dev, + dev_dbg(kbdev->dev, "Invalid atom address %p passed to job_submit\n", user_addr); err = -EFAULT; @@ -1281,7 +1281,7 @@ int kbase_jd_submit(struct kbase_context *kctx, user_atom.seq_nr = 0; } else { if (copy_from_user(&user_atom, user_addr, stride) != 0) { - dev_err(kbdev->dev, + dev_dbg(kbdev->dev, "Invalid atom address %p passed to job_submit\n", user_addr); err = -EFAULT; @@ -1420,7 +1420,7 @@ void kbase_jd_done_worker(struct work_struct *data) js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", (void *)katom, (void *)kctx); KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1444,7 +1444,7 @@ void kbase_jd_done_worker(struct work_struct *data) if (katom->event_code == BASE_JD_EVENT_STOPPED) { unsigned long flags; - dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", (void *)katom); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -1452,7 +1452,7 @@ void kbase_jd_done_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); kbase_js_unpull(kctx, katom); @@ -1568,7 +1568,7 @@ void kbase_jd_done_worker(struct work_struct *data) KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); - dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", (void *)katom, (void *)kctx); } @@ -1698,7 +1698,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) kctx = katom->kctx; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom); KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c index 940b920..f423758 100644 --- a/mali_kbase/mali_kbase_jd_debugfs.c +++ b/mali_kbase/mali_kbase_jd_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) #include <mali_kbase_sync.h> #endif -#include <mali_kbase_ioctl.h> +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> struct kbase_jd_debugfs_depinfo { u8 id; @@ -46,13 +46,13 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, case BASE_JD_REQ_SOFT_FENCE_TRIGGER: res = kbase_sync_fence_out_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Sa([%p]%d) ", + seq_printf(sfile, "Sa([%pK]%d) ", info.fence, info.status); break; case BASE_JD_REQ_SOFT_FENCE_WAIT: res = kbase_sync_fence_in_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Wa([%p]%d) ", + seq_printf(sfile, "Wa([%pK]%d) ", info.fence, info.status); break; default: diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c index be14b45..73e9905 100644 --- a/mali_kbase/mali_kbase_jm.c +++ b/mali_kbase/mali_kbase_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kctx = kbdev->hwaccess.active_kctx[js]; dev_dbg(kbdev->dev, - "Trying to run the next %d jobs in kctx %p (s:%d)\n", + "Trying to run the next %d jobs in kctx %pK (s:%d)\n", nr_jobs_to_submit, (void *)kctx, js); if (!kctx) @@ -117,7 +117,7 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)kctx, js); kbdev->hwaccess.active_kctx[js] = NULL; } @@ -129,7 +129,7 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", (void *)katom, katom->event_code); if (katom->event_code != BASE_JD_EVENT_STOPPED && diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index ea317b2..6bb57e6 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -162,7 +162,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); dev_dbg(kctx->kbdev->dev, - "Slot %d (prio %d) is %spullable in kctx %p\n", + "Slot %d (prio %d) is %spullable in kctx %pK\n", js, prio, none_to_pull ? "not " : "", kctx); return none_to_pull; @@ -186,7 +186,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) return false; @@ -236,7 +236,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, WARN_ON(!(entry->core_req & BASE_JD_REQ_END_RENDERPASS)); dev_dbg(kctx->kbdev->dev, - "Del runnable atom %p from X_DEP list\n", + "Del runnable atom %pK from X_DEP list\n", (void *)entry); list_del(&entry->queue); @@ -252,7 +252,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, WARN_ON(!(entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); dev_dbg(kctx->kbdev->dev, - "Del blocked atom %p from X_DEP list\n", + "Del blocked atom %pK from X_DEP list\n", (void *)entry); list_del(queue->x_dep_head.next); @@ -279,7 +279,7 @@ jsctx_queue_foreach(struct kbase_context *kctx, int js, { int prio; - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) jsctx_queue_foreach_prio(kctx, js, prio, callback); } @@ -303,7 +303,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); dev_dbg(kctx->kbdev->dev, - "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n", (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); @@ -335,7 +335,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -365,7 +365,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n", (void *)katom, (void *)kctx); /* Atoms must be pulled in the correct order. */ @@ -387,7 +387,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); while (*new) { @@ -542,7 +542,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) sema_init(&jsdd->schedule_sem, 1); for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { - for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); } @@ -610,7 +610,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; @@ -684,7 +684,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -726,7 +726,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -802,7 +802,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n", (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], @@ -885,7 +885,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( lockdep_assert_held(&kbdev->hwaccess_lock); - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) continue; @@ -895,7 +895,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); dev_dbg(kbdev->dev, - "Popped %p from the pullable queue (s:%d)\n", + "Popped %pK from the pullable queue (s:%d)\n", (void *)kctx, js); return kctx; } @@ -949,25 +949,25 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, if (is_scheduled) { if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return false; } } katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", (void *)kctx, js); return false; /* No pullable atoms */ } if (kctx->blocked_js[js][katom->sched_priority]) { dev_dbg(kbdev->dev, - "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", (void *)kctx, katom->sched_priority, js); return false; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", (void *)katom); return false; /* next atom blocked */ } @@ -976,20 +976,20 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, - "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return false; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", (void *)katom, js); return false; } } - dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); return true; @@ -1013,7 +1013,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_prio = dep_atom->sched_priority; dev_dbg(kbdev->dev, - "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, (void *)katom, js, (void *)dep_atom, dep_js); /* Dependent atom must already have been submitted */ @@ -1115,7 +1115,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_js = kbase_js_get_slot(kbdev, dep_atom); dev_dbg(kbdev->dev, - "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, (void *)katom, js, (void *)dep_atom, dep_js); @@ -1130,7 +1130,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", (void *)katom); katom->x_pre_dep = dep_atom; @@ -1154,7 +1154,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } } else { dev_dbg(kbdev->dev, - "Deps of atom %p (s:%d) could not be represented\n", + "Deps of atom %pK (s:%d) could not be represented\n", (void *)katom, js); } @@ -1195,7 +1195,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) /* Determine the new priority for context, as per the priority * of currently in-use atoms. */ - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (kctx->atoms_count[prio]) { new_priority = prio; @@ -1237,7 +1237,7 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom) if (rp->state != KBASE_JD_RP_COMPLETE) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", (void *)start_katom, start_katom->renderpass_id); /* The following members are read when updating the job slot @@ -1280,7 +1280,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom) rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (rp->state == KBASE_JD_RP_COMPLETE) @@ -1347,7 +1347,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++(js_kctx_info->ctx.nr_jobs); - dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Lock for state available during IRQ */ @@ -1360,14 +1360,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); dev_dbg(kbdev->dev, - "Remove atom %p from kctx %p; now %d in ctx\n", + "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Setting atom status back to queued as it still has unresolved * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom); /* Undo the count, as the atom will get added again later but * leave the context priority adjusted or boosted, in case if @@ -1430,7 +1430,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, * context on the Queue */ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx); /* Queue was updated - caller must try to schedule the * head context @@ -1439,7 +1439,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } } out_unlock: - dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", kctx, enqueue_required ? "" : "not "); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -1468,7 +1468,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); dev_dbg(kbdev->dev, - "Remove atom %p from kctx %p; now %d in ctx\n", + "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1660,7 +1660,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Last reference, and we've been told to remove this context * from the Run Pool */ - dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", + dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, kbasep_js_is_submit_allowed(js_devdata, kctx)); @@ -1670,7 +1670,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( for (slot = 0; slot < num_slots; slot++) { if (kbdev->hwaccess.active_kctx[slot] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)kctx, slot); kbdev->hwaccess.active_kctx[slot] = NULL; } @@ -1773,7 +1773,7 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, * happens asynchronously */ dev_dbg(kbdev->dev, - "JS: ** Killing Context %p on RunPool Remove **", kctx); + "JS: ** Killing Context %pK on RunPool Remove **", kctx); kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); } } @@ -1879,7 +1879,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; - dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js); js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -2025,7 +2025,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, kbase_backend_use_ctx_sched(kbdev, kctx, js)) { dev_dbg(kbdev->dev, - "kctx %p already has ASID - mark as active (s:%d)\n", + "kctx %pK already has ASID - mark as active (s:%d)\n", (void *)kctx, js); if (kbdev->hwaccess.active_kctx[js] != kctx) { @@ -2200,7 +2200,7 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (prio = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_context *kctx, *n; unsigned long flags; @@ -2336,7 +2336,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n", (void *)katom, js); list_add_tail(&katom->queue, &queue->x_dep_head); @@ -2346,7 +2346,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, add_required = false; } } else { - dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", (void *)katom); } @@ -2360,7 +2360,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } dev_dbg(kctx->kbdev->dev, - "Enqueue of kctx %p is %srequired to submit atom %p\n", + "Enqueue of kctx %pK is %srequired to submit atom %pK\n", kctx, enqueue_required ? "" : "not ", katom); return enqueue_required; @@ -2387,7 +2387,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) if (!kbase_js_atom_blocked_on_x_dep(katom)) { dev_dbg(kctx->kbdev->dev, - "Del atom %p from X_DEP list in js_move_to_tree\n", + "Del atom %pK from X_DEP list in js_move_to_tree\n", (void *)katom); list_del(&katom->queue); @@ -2405,7 +2405,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) } } else { dev_dbg(kctx->kbdev->dev, - "Atom %p blocked on x-dep in js_move_to_tree\n", + "Atom %pK blocked on x-dep in js_move_to_tree\n", (void *)katom); break; } @@ -2449,7 +2449,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); /* Fail if it had a data dependency. */ @@ -2471,14 +2471,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n", (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return NULL; } @@ -2491,18 +2491,18 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", (void *)kctx, js); return NULL; } if (kctx->blocked_js[js][katom->sched_priority]) { dev_dbg(kbdev->dev, - "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", (void *)kctx, katom->sched_priority, js); return NULL; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", (void *)katom); return NULL; } @@ -2524,14 +2524,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, - "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return NULL; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", (void *)katom, js); return NULL; } @@ -2556,7 +2556,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom->ticks = 0; - dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); return katom; @@ -2599,7 +2599,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) return; dev_dbg(kctx->kbdev->dev, - "JS return start atom %p in state %d of RP %d\n", + "JS return start atom %pK in state %d of RP %d\n", (void *)start_katom, (int)rp->state, start_katom->renderpass_id); @@ -2627,7 +2627,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) /* Prevent the tiler job being pulled for execution in the * job scheduler again. */ - dev_dbg(kbdev->dev, "Blocking start atom %p\n", + dev_dbg(kbdev->dev, "Blocking start atom %pK\n", (void *)start_katom); atomic_inc(&start_katom->blocked); @@ -2639,14 +2639,14 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) /* Was the fragment job chain submitted to kbase yet? */ end_katom = rp->end_katom; if (end_katom) { - dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", (void *)end_katom); if (rp->state == KBASE_JD_RP_RETRY_OOM) { /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", (void *)end_katom); atomic_dec(&end_katom->blocked); WARN_ON(!(end_katom->atom_flags & @@ -2708,7 +2708,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) return; dev_dbg(kctx->kbdev->dev, - "JS return end atom %p in state %d of RP %d\n", + "JS return end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state != KBASE_JD_RP_OOM && @@ -2730,14 +2730,14 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, - "Reset backing to %zu pages for region %p\n", + "Reset backing to %zu pages for region %pK\n", reg->threshold_pages, (void *)reg); if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) kbase_mem_shrink(kctx, reg, reg->threshold_pages); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Deleting region %p from list\n", + dev_dbg(kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); @@ -2755,7 +2755,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) */ start_katom = rp->start_katom; if (!WARN_ON(!start_katom)) { - dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", (void *)start_katom); atomic_dec(&start_katom->blocked); (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, @@ -2781,7 +2781,7 @@ static void js_return_worker(struct work_struct *data) unsigned long flags; base_jd_core_req core_req = katom->core_req; - dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", __func__, (void *)katom, katom->event_code); if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) @@ -2826,12 +2826,12 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled)) { dev_dbg(kbdev->dev, - "No atoms currently pulled from context %p\n", + "No atoms currently pulled from context %pK\n", (void *)kctx); if (!kctx->slots_pullable) { dev_dbg(kbdev->dev, - "Context %p %s counted as runnable\n", + "Context %pK %s counted as runnable\n", (void *)kctx, kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? "is" : "isn't"); @@ -2867,7 +2867,7 @@ static void js_return_worker(struct work_struct *data) if (context_idle) { dev_dbg(kbdev->dev, - "Context %p %s counted as active\n", + "Context %pK %s counted as active\n", (void *)kctx, kbase_ctx_flag(kctx, KCTX_ACTIVE) ? "is" : "isn't"); @@ -2906,13 +2906,13 @@ static void js_return_worker(struct work_struct *data) kbase_backend_complete_wq_post_sched(kbdev, core_req); - dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", __func__, (void *)katom); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -2967,7 +2967,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, return false; dev_dbg(kctx->kbdev->dev, - "Start atom %p is done in state %d of RP %d\n", + "Start atom %pK is done in state %d of RP %d\n", (void *)start_katom, (int)rp->state, start_katom->renderpass_id); @@ -2979,7 +2979,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, unsigned long flags; dev_dbg(kctx->kbdev->dev, - "Start atom %p completed before soft-stop\n", + "Start atom %pK completed before soft-stop\n", (void *)start_katom); kbase_gpu_vm_lock(kctx); @@ -2991,7 +2991,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, struct kbase_va_region, link); WARN_ON(reg->flags & KBASE_REG_VA_FREED); - dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); @@ -3001,7 +3001,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, kbase_gpu_vm_unlock(kctx); } else { dev_dbg(kctx->kbdev->dev, - "Start atom %p did not exceed memory threshold\n", + "Start atom %pK did not exceed memory threshold\n", (void *)start_katom); WARN_ON(rp->state != KBASE_JD_RP_START && @@ -3018,7 +3018,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", (void *)end_katom); atomic_dec(&end_katom->blocked); @@ -3062,7 +3062,7 @@ static void js_complete_end_rp(struct kbase_context *kctx, if (WARN_ON(rp->end_katom != end_katom)) return; - dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || @@ -3096,7 +3096,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kbdev = kctx->kbdev; atom_slot = katom->slot_nr; - dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", __func__, (void *)katom, atom_slot); /* Update the incremental rendering state machine. @@ -3115,7 +3115,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", (void *)katom); context_idle = !atomic_dec_return(&kctx->atoms_pulled); @@ -3136,7 +3136,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] && kctx->blocked_js[atom_slot][prio]) { dev_dbg(kbdev->dev, - "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + "kctx %pK is no longer blocked from submitting on slot %d at priority %d\n", (void *)kctx, atom_slot, prio); kctx->blocked_js[atom_slot][prio] = false; @@ -3190,7 +3190,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * jd_done_worker(). */ if (context_idle) { - dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", (void *)kctx); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); } @@ -3241,7 +3241,7 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) return true; dev_dbg(kbdev->dev, - "JS complete end atom %p in state %d of RP %d\n", + "JS complete end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); @@ -3270,7 +3270,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", (void *)katom, (void *)kctx, (void *)x_dep); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -3286,7 +3286,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, katom->event_code = katom->will_fail_event_code; katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; - dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom); if (katom->event_code != BASE_JD_EVENT_DONE) { kbase_js_evict_deps(kctx, katom, katom->slot_nr, @@ -3308,7 +3308,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false); x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); kbase_js_move_to_tree(x_dep); @@ -3319,13 +3319,13 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, x_dep->slot_nr); if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", (void *)x_dep); return x_dep; } } else { dev_dbg(kbdev->dev, - "No cross-slot dep to unblock for atom %p\n", + "No cross-slot dep to unblock for atom %pK\n", (void *)katom); } @@ -3356,13 +3356,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", (void *)katom); return false; } if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { - dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", (void *)katom); return true; } @@ -3388,12 +3388,12 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) * if it only depends on the tiler job chain. */ if (katom->x_pre_dep != rp->start_katom) { - dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", (void *)katom->x_pre_dep, (void *)rp->start_katom); return true; } - dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", (void *)katom->x_pre_dep); return false; @@ -3407,7 +3407,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; int js; - dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", __func__, (void *)kbdev, (unsigned int)js_mask); js_devdata = &kbdev->js_data; @@ -3442,7 +3442,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) context_idle = true; dev_dbg(kbdev->dev, - "kctx %p is not active (s:%d)\n", + "kctx %pK is not active (s:%d)\n", (void *)kctx, js); if (kbase_pm_context_active_handle_suspend( @@ -3472,7 +3472,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) &kctx->jctx.sched_info.ctx.jsctx_mutex); dev_dbg(kbdev->dev, - "kctx %p cannot be used at this time\n", + "kctx %pK cannot be used at this time\n", kctx); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3514,7 +3514,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool pullable; dev_dbg(kbdev->dev, - "No atoms pulled from kctx %p (s:%d)\n", + "No atoms pulled from kctx %pK (s:%d)\n", (void *)kctx, js); pullable = kbase_js_ctx_pullable(kctx, js, @@ -3576,7 +3576,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* Could not run atoms on this slot */ } - dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", (void *)kctx); if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= @@ -3598,7 +3598,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && ctx_waiting[js]) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; } @@ -3629,7 +3629,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_lock(&js_kctx_info->ctx.jsctx_mutex); kbase_ctx_flag_set(kctx, KCTX_DYING); - dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx); /* * At this point we know: @@ -3693,7 +3693,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); /* Only cancel jobs when we evicted from the * queue. No Power Manager active reference was held. @@ -3714,7 +3714,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) * Pool */ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3732,7 +3732,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ KBASE_DEBUG_ASSERT(was_retained); - dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx); /* Cancel any remaining running jobs for this kctx - if any. * Submit is disallowed which takes effect immediately, so no @@ -3745,7 +3745,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_devdata->queue_mutex); mutex_unlock(&kctx->jctx.lock); - dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", + dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", kctx); kbasep_js_runpool_release_ctx(kbdev, kctx); diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c index 76cff41..cc8dd86 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.c +++ b/mali_kbase/mali_kbase_kinstr_jm.c @@ -25,7 +25,7 @@ */ #include "mali_kbase_kinstr_jm.h" -#include "mali_kbase_kinstr_jm_reader.h" +#include <uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h> #include "mali_kbase.h" #include "mali_kbase_linux.h" diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h index 74fe5cf..2b81636 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.h +++ b/mali_kbase/mali_kbase_kinstr_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,7 +63,7 @@ #ifndef _KBASE_KINSTR_JM_H_ #define _KBASE_KINSTR_JM_H_ -#include "mali_kbase_kinstr_jm_reader.h" +#include <uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h> #ifdef __KERNEL__ #include <linux/version.h> diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index fd992e2..326917c 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -849,7 +849,7 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx) * * Return: true if any allocs exist on any zone, false otherwise */ -bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) +static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) { unsigned int zone_idx; @@ -1393,7 +1393,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; - dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", (void *)reg); #if MALI_USE_CSF if (reg->flags & KBASE_REG_CSF_EVENT) @@ -1916,7 +1916,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(kctx != NULL); KBASE_DEBUG_ASSERT(reg != NULL); - dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); @@ -1975,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { @@ -2772,6 +2772,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); #if MALI_USE_CSF /** @@ -4233,8 +4234,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) + if (delta) { + mutex_lock(&kctx->reg_lock); kbase_mem_shrink(kctx, reg, old_pages - delta); + mutex_unlock(&kctx->reg_lock); + } } #if MALI_JIT_PRESSURE_LIMIT_BASE diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index cda6b57..d12ec31 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -31,7 +31,7 @@ #endif #include <linux/kref.h> -#include "mali_base_kernel.h" +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_hw.h> #include "mali_kbase_pm.h" #include "mali_kbase_defs.h" @@ -549,7 +549,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( WARN_ON(!region->va_refcnt); /* non-atomic as kctx->reg_lock is held */ - dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", region->va_refcnt, (void *)region); region->va_refcnt++; @@ -566,7 +566,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( /* non-atomic as kctx->reg_lock is held */ region->va_refcnt--; - dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", region->va_refcnt, (void *)region); if (!region->va_refcnt) kbase_region_refcnt_free(region); diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 7c9c08e..cc80927 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -42,7 +42,7 @@ #include <mali_kbase.h> #include <mali_kbase_mem_linux.h> #include <tl/mali_kbase_tracepoints.h> -#include <mali_kbase_ioctl.h> +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include <mmu/mali_kbase_mmu.h> #include <mali_kbase_caps.h> #include <mali_kbase_trace_gpu_mem.h> @@ -1104,7 +1104,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; - }; + } if (unlikely(ret)) dev_warn(kctx->kbdev->dev, @@ -2718,7 +2718,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, { struct kbase_va_region *reg = NULL; void *kaddr = NULL; - size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + size_t nr_pages = vma_pages(vma); int err = 0; int free_on_close = 0; struct device *dev = kctx->kbdev->dev; @@ -3333,7 +3333,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, { unsigned long cookie = vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); - size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + size_t nr_pages = vma_pages(vma); struct kbase_queue *queue; int err = 0; diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index 9b5854a..1874a6f 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -309,7 +309,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) kbase_mem_pool_unlock(pool); } - +KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) @@ -804,8 +804,8 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, nr_to_pool = kbase_mem_pool_capacity(pool); nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, - dirty); + kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, + dirty); i += nr_to_pool; } diff --git a/mali_kbase/mali_kbase_mipe_gen_header.h b/mali_kbase/mali_kbase_mipe_gen_header.h index 87eb65b..d1ea7ad 100644 --- a/mali_kbase/mali_kbase_mipe_gen_header.h +++ b/mali_kbase/mali_kbase_mipe_gen_header.h @@ -39,14 +39,14 @@ * defined. See documentation below: */ -/** +/* * The name of the variable where the result BLOB will be stored. */ #if !defined(MIPE_HEADER_BLOB_VAR_NAME) #error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" #endif -/** +/* * A compiler attribute for the BLOB variable. * * e.g. __attribute__((section("my_section"))) @@ -77,7 +77,7 @@ #error "MIPE_HEADER_STREAM_ID must be defined!" #endif -/** +/* * MIPE packet class. * * See enum tl_packet_class. @@ -86,7 +86,7 @@ #error "MIPE_HEADER_PKT_CLASS must be defined!" #endif -/** +/* * The list of tracepoints to process. * * It should be defined as follows: @@ -105,14 +105,14 @@ #error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" #endif -/** +/* * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. */ #if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) #error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" #endif -/** +/* * The list of enums to process. * * It should be defined as follows: @@ -129,7 +129,7 @@ */ #if defined(MIPE_HEADER_ENUM_LIST) -/** +/* * Tracepoint message ID used for enums declaration. */ #if !defined(MIPE_HEADER_ENUM_MSG_ID) diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index da09a97..3ded47b 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -256,9 +256,15 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) kbase_pm_context_idle(kbdev); /* Re-enable GPU hardware counters */ +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif /* Resume vinstr */ kbase_vinstr_resume(kbdev->vinstr_ctx); diff --git a/mali_kbase/mali_kbase_reset_gpu.h b/mali_kbase/mali_kbase_reset_gpu.h index 4f66972..cb8a082 100644 --- a/mali_kbase/mali_kbase_reset_gpu.h +++ b/mali_kbase/mali_kbase_reset_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -143,8 +143,16 @@ void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); /** + * Flags for kbase_prepare_to_reset_gpu + */ +#define RESET_FLAGS_NONE ((unsigned int)0) +/* This reset should be treated as an unrecoverable error by HW counter logic */ +#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) + +/** * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. * @kbdev: Device pointer + * @flags: Bitfield indicating impact of reset (see flag defines) * * Caller is expected to hold the kbdev->hwaccess_lock. * @@ -153,18 +161,20 @@ void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags); /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. * @kbdev: Device pointer - * + * @flags: Bitfield indicating impact of reset (see flag defines) + * Return: a boolean which should be interpreted as follows: * - true - Prepared for reset, kbase_reset_gpu should be called. * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); /** * kbase_reset_gpu - Reset the GPU diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index 654c029..e14a4be 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -27,7 +27,7 @@ #include <mali_kbase_sync.h> #endif #include <linux/dma-mapping.h> -#include <mali_base_kernel.h> +#include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_hwaccess_time.h> #include <mali_kbase_kinstr_jm.h> #include <mali_kbase_mem_linux.h> @@ -145,6 +145,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) * delay suspend until we process the atom (which may be at the end of a * long chain of dependencies */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); if (pm_active_err) { struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; @@ -162,6 +165,10 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) return pm_active_err; } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + else + atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, &ts); @@ -291,7 +298,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) if (!kbase_sync_fence_in_info_get(dep, &info)) { dev_warn(dev, - "\tVictim trigger atom %d fence [%p] %s: %s\n", + "\tVictim trigger atom %d fence [%pK] %s: %s\n", kbase_jd_atom_id(kctx, dep), info.fence, info.name, @@ -320,11 +327,11 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) return; } - dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", kctx->tgid, kctx->id, kbase_jd_atom_id(kctx, katom), info.fence, timeout_ms); - dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", info.fence, info.name, kbase_sync_status_string(info.status)); @@ -1422,41 +1429,27 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) struct base_external_resource_list *ext_res; u64 count = 0; size_t copy_size; - int ret; user_ext_res = (__user struct base_external_resource_list *) (uintptr_t) katom->jc; /* Fail the job if there is no info structure */ - if (!user_ext_res) { - ret = -EINVAL; - goto fail; - } + if (!user_ext_res) + return -EINVAL; - if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { - ret = -EINVAL; - goto fail; - } + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) + return -EINVAL; /* Is the number of external resources in range? */ - if (!count || count > BASE_EXT_RES_COUNT_MAX) { - ret = -EINVAL; - goto fail; - } + if (!count || count > BASE_EXT_RES_COUNT_MAX) + return -EINVAL; /* Copy the information for safe access and future storage */ copy_size = sizeof(*ext_res); copy_size += sizeof(struct base_external_resource) * (count - 1); - ext_res = kzalloc(copy_size, GFP_KERNEL); - if (!ext_res) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { - ret = -EINVAL; - goto free_info; - } + ext_res = memdup_user(user_ext_res, copy_size); + if (IS_ERR(ext_res)) + return PTR_ERR(ext_res); /* * Overwrite the count with the first value incase it was changed @@ -1467,11 +1460,6 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) katom->softjob_data = ext_res; return 0; - -free_info: - kfree(ext_res); -fail: - return ret; } static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) @@ -1793,6 +1781,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_dec(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } mutex_unlock(&kctx->jctx.lock); } diff --git a/mali_kbase/mali_kbase_sync_common.c b/mali_kbase/mali_kbase_sync_common.c index 2061f53..39a68c2 100644 --- a/mali_kbase/mali_kbase_sync_common.c +++ b/mali_kbase/mali_kbase_sync_common.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ */ /* - * @file mali_kbase_sync_common.c + * @file * * Common code for our explicit fence functionality */ diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index bc985cb..4ac0d0e 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,9 +22,9 @@ #include "mali_kbase_vinstr.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_reader.h" +#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h> #include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" +#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include "mali_malisw.h" #include "mali_kbase_debug.h" @@ -898,11 +898,12 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( struct kbase_vinstr_client *cli, unsigned long arg, size_t size) { long ret = -EINVAL; - u8 clk_cnt = cli->vctx->metadata->clk_cnt; if (size == sizeof(u32)) { ret = put_user(HWCNT_READER_API, (u32 __user *)arg); } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { + u8 clk_cnt = cli->vctx->metadata->clk_cnt; + unsigned long bytes = 0; struct kbase_hwcnt_reader_api_version api_version = { .version = HWCNT_READER_API, .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, @@ -915,8 +916,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( api_version.features |= KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; - ret = copy_to_user( + bytes = copy_to_user( (void __user *)arg, &api_version, sizeof(api_version)); + + /* copy_to_user returns zero in case of success. + * If it fails, it returns the number of bytes that could NOT be copied + */ + if (bytes == 0) + ret = 0; + else + ret = -EFAULT; } return ret; } @@ -1042,7 +1051,16 @@ static int kbasep_vinstr_hwcnt_reader_mmap( return -EINVAL; vm_size = vma->vm_end - vma->vm_start; - size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; + + /* The mapping is allowed to span the entirety of the page allocation, + * not just the chunk where the dump buffers are allocated. + * This accommodates the corner case where the combined size of the + * dump buffers is smaller than a single page. + * This does not pose a security risk as the pages are zeroed on + * allocation, and anything out of bounds of the dump buffers is never + * written to. + */ + size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index 6b7cb42..8240817 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -83,10 +83,19 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, .addr = fault->addr, }; - if (WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault))) + /* + * A page fault work item could already be pending for the + * context's address space, when the page fault occurs for + * MCU's address space. + */ + if (!queue_work(as->pf_wq, &as->work_pagefault)) kbase_ctx_sched_release_ctx(kctx); - else + else { + dev_dbg(kbdev->dev, + "Page fault is already pending for as %u\n", + as_nr); atomic_inc(&kbdev->faults_pending); + } } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -117,15 +126,9 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) submit_work_pagefault(kbdev, as_no, fault); - /* MCU AS fault could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); - /* GPU reset is required to recover */ - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index 18a74ab..ae334c1 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -206,7 +206,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, lockdep_assert_held(&kbdev->hwaccess_lock); dev_dbg(kbdev->dev, - "Entering %s kctx %p, as %p\n", + "Entering %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); if (!kctx) { @@ -255,14 +255,10 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, */ kbasep_js_clear_submit_allowed(js_devdata, kctx); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, fault->addr, - fault->extra_addr); - else - dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, fault->addr); + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, fault->addr, + fault->extra_addr); /* * We need to switch to UNMAPPED mode - but we do this in a @@ -276,7 +272,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, } dev_dbg(kbdev->dev, - "Leaving %s kctx %p, as %p\n", + "Leaving %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); } @@ -375,14 +371,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* record the fault status */ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, AS_FAULTSTATUS)); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); - } + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); if (kbase_as_has_bus_fault(as, fault)) { /* Mark bus fault as handled. @@ -423,7 +416,7 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) { dev_dbg(kctx->kbdev->dev, - "Switching to incremental rendering for region %p\n", + "Switching to incremental rendering for region %pK\n", (void *)reg); return kbase_job_slot_softstop_start_rp(kctx, reg); } diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 51bee43..0761f68 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -561,7 +561,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); dev_dbg(kbdev->dev, - "Entering %s %p, fault_pfn %lld, as_no %d\n", + "Entering %s %pK, fault_pfn %lld, as_no %d\n", __func__, (void *)data, fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() @@ -634,21 +634,13 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault", fault); goto fault_done; default: @@ -852,7 +844,7 @@ page_fault_retry: if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { dev_dbg(kctx->kbdev->dev, - "Get region %p for IR\n", + "Get region %pK for IR\n", (void *)region); kbase_va_region_alloc_get(kctx, region); } @@ -980,7 +972,7 @@ fault_done: release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); - dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -1557,7 +1549,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } @@ -1613,17 +1605,8 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); -#if MALI_USE_CSF - /* A GPU hang could mean hardware counters will stop working. - * Put the backend into the unrecoverable error state to cause - * current and subsequent counter operations to immediately - * fail, avoiding the risk of a hang. - */ - kbase_hwcnt_backend_csf_on_unrecoverable_error( - &kbdev->hwcnt_gpu_iface); -#endif /* MALI_USE_CSF */ - - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1659,7 +1642,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); mutex_unlock(&kbdev->js_data.queue_mutex); #else - ctx_is_in_runpool = kbase_ctx_sched_refcount_mmu_flush(kctx, sync); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); #endif /* !MALI_USE_CSF */ if (ctx_is_in_runpool) { @@ -1681,11 +1664,6 @@ void kbase_mmu_update(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); kbdev->mmu_mode->update(kbdev, mmut, as_nr); - -#if MALI_USE_CSF - if (mmut->kctx) - mmut->kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND; -#endif } KBASE_EXPORT_TEST_API(kbase_mmu_update); @@ -1719,10 +1697,6 @@ void kbase_mmu_disable(struct kbase_context *kctx) kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); - -#if MALI_USE_CSF - kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND; -#endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); @@ -2312,30 +2286,3 @@ void kbase_flush_mmu_wqs(struct kbase_device *kbdev) flush_workqueue(as->pf_wq); } } - -#if MALI_USE_CSF -void kbase_mmu_deferred_flush_invalidate(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - - lockdep_assert_held(&kbdev->mmu_hw_mutex); - - if (kctx->as_nr == KBASEP_AS_NR_INVALID) - return; - - if (kctx->mmu_flush_pend_state == KCTX_MMU_FLUSH_NOT_PEND) - return; - - WARN_ON(!atomic_read(&kctx->refcount)); - - /* Specify the entire address space as the locked region. - * The flush of entire L2 cache and complete TLB invalidation will - * anyways happen for the exisiting CSF GPUs, regardless of the locked - * range. This may have to be revised later on. - */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, - kctx->mmu_flush_pend_state == KCTX_MMU_FLUSH_PEND_SYNC); - - kctx->mmu_flush_pend_state = KCTX_MMU_FLUSH_NOT_PEND; -} -#endif diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 1d877ac..bf4fd91 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -152,21 +152,4 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr, u64 address, bool as_valid); -#if MALI_USE_CSF -/** - * kbase_mmu_deferred_flush_invalidate() - Perform deferred MMU flush - * operations for a Kbase context. - * @kctx: Pointer to the Kbase context for which MMU flush operations - * are pending. - * - * This function performs the MMU flush operations that are pending for a Kbase - * context. The flush operations will be deferred if the context is inactive, - * i.e. kctx->refcount is zero which happens when all the queue groups of a - * context have gone off CSG slots. - * This needs to be called when first queue group of the context is put back - * on the CSG slot. - */ -void kbase_mmu_deferred_flush_invalidate(struct kbase_context *kctx); -#endif - #endif /* _KBASE_MMU_H_ */ diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index b0596af..88fd9cf 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -124,38 +124,33 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) struct kbase_mmu_setup *current_setup = &as->current_setup; u64 transcfg = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - transcfg = current_setup->transcfg; + transcfg = current_setup->transcfg; - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK - * Clear PTW_MEMATTR bits - */ - transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; - /* Enable correct PTW_MEMATTR bits */ - transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; - /* Ensure page-tables reads use read-allocate cache-policy in - * the L2 - */ - transcfg |= AS_TRANSCFG_R_ALLOCATE; - - if (kbdev->system_coherency != COHERENCY_NONE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) - * Clear PTW_SH bits - */ - transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); - /* Enable correct PTW_SH bits */ - transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); - } + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK + * Clear PTW_MEMATTR bits + */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (transcfg >> 32) & 0xFFFFFFFFUL); - } else { - if (kbdev->system_coherency != COHERENCY_NONE) - current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + if (kbdev->system_coherency != COHERENCY_NONE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) + * Clear PTW_SH bits + */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); } + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (transcfg >> 32) & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c b/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c deleted file mode 100644 index 09793e1..0000000 --- a/mali_kbase/mmu/mali_kbase_mmu_mode_lpae.c +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include "mali_kbase.h" -#include <gpu/mali_kbase_gpu_regmap.h> -#include "mali_kbase_defs.h" - -#define ENTRY_TYPE_MASK 3ULL -#define ENTRY_IS_ATE 1ULL -#define ENTRY_IS_INVAL 2ULL -#define ENTRY_IS_PTE 3ULL - -#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ -#define ENTRY_RD_BIT (1ULL << 6) -#define ENTRY_WR_BIT (1ULL << 7) -#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ -#define ENTRY_ACCESS_BIT (1ULL << 10) -#define ENTRY_NX_BIT (1ULL << 54) - -#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ - ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) - -/* Helper Function to perform assignment of page table entries, to - * ensure the use of strd, which is required on LPAE systems. - */ -static inline void page_table_entry_set(u64 *pte, u64 phy) -{ - WRITE_ONCE(*pte, phy); -} - -static void mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) -{ - /* Set up the required caching policies at the correct indices - * in the memattr register. - */ - setup->memattr = - (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_LPAE_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - 0; /* The other indices are unused for now */ - - setup->transtab = ((u64)mmut->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | - AS_TRANSTAB_LPAE_ADRMODE_TABLE | - AS_TRANSTAB_LPAE_READ_INNER; - - setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; -} - -static void mmu_update(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr) -{ - struct kbase_as *as; - struct kbase_mmu_setup *current_setup; - - if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) - return; - - as = &kbdev->as[as_nr]; - current_setup = &as->current_setup; - - mmu_get_as_setup(mmut, current_setup); - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - struct kbase_as * const as = &kbdev->as[as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; - - current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static phys_addr_t pte_to_phy_addr(u64 entry) -{ - if (!(entry & 1)) - return 0; - - return entry & ~0xFFF; -} - -static int ate_is_valid(u64 ate, int const level) -{ - return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); -} - -static int pte_is_valid(u64 pte, int const level) -{ - return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); -} - -/* - * Map KBASE_REG flags to MMU flags - */ -static u64 get_mmu_flags(unsigned long flags) -{ - u64 mmu_flags; - unsigned long memattr_idx; - - memattr_idx = KBASE_REG_MEMATTR_VALUE(flags); - if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE, - "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n")) - memattr_idx = AS_MEMATTR_INDEX_DEFAULT; - /* store mem_attr index as 4:2, noting that: - * - macro called above ensures 3 bits already - * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits - */ - mmu_flags = memattr_idx << 2; - - /* write perm if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; - /* read perm if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; - /* nx if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; - - if (flags & KBASE_REG_SHARE_BOTH) { - /* inner and outer shareable */ - mmu_flags |= SHARE_BOTH_BITS; - } else if (flags & KBASE_REG_SHARE_IN) { - /* inner shareable coherency */ - mmu_flags |= SHARE_INNER_BITS; - } - - return mmu_flags; -} - -static void entry_set_ate(u64 *entry, - struct tagged_addr phy, - unsigned long flags, - int const level) -{ - page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | - ENTRY_IS_ATE); -} - -static void entry_set_pte(u64 *entry, phys_addr_t phy) -{ - page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); -} - -static void entry_invalidate(u64 *entry) -{ - page_table_entry_set(entry, ENTRY_IS_INVAL); -} - -static struct kbase_mmu_mode const lpae_mode = { - .update = mmu_update, - .get_as_setup = mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .flags = 0 -}; - -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) -{ - return &lpae_mode; -} diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig index 2630736..a21810b 100644 --- a/mali_kbase/tests/Mconfig +++ b/mali_kbase/tests/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -40,6 +40,10 @@ config BUILD_CSF_TESTS config BUILD_ARBIF_TESTS bool - default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT + default y if UNIT_TEST_CODE && MALI_ARBITER_SUPPORT default n +config BUILD_ARBIF_KERNEL_TESTS + bool + default y if BUILD_KERNEL_MODULES && BUILD_ARBIF_TESTS + default n diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c index 42f1e2d..7455ce2 100644 --- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c +++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -159,7 +159,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) * this iteration of the loop, so will start to correctly update * the object model state. */ - }; + } mutex_unlock(&timeline->tl_kctx_list_lock); diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_jm.c b/mali_kbase/tl/backend/mali_kbase_timeline_jm.c index f016e8b..6659d2d 100644 --- a/mali_kbase/tl/backend/mali_kbase_timeline_jm.c +++ b/mali_kbase/tl/backend/mali_kbase_timeline_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -74,7 +74,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) kctx, kctx->id, (u32)(kctx->tgid)); - }; + } /* Reset body stream buffers while holding the kctx lock. * This ensures we can't fire both summary and normal tracepoints for diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c index 4f955a1..20d7b16 100644 --- a/mali_kbase/tl/mali_kbase_timeline.c +++ b/mali_kbase/tl/mali_kbase_timeline.c @@ -186,7 +186,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) { - int ret; + int ret = 0; u32 timeline_flags = TLSTREAM_ENABLED | flags; struct kbase_timeline *timeline = kbdev->timeline; @@ -262,6 +262,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) ret = -EBUSY; } + if (ret >= 0) + timeline->last_acquire_time = ktime_get(); + return ret; } diff --git a/mali_kbase/tl/mali_kbase_timeline.h b/mali_kbase/tl/mali_kbase_timeline.h index 9315fcc..0465352 100644 --- a/mali_kbase/tl/mali_kbase_timeline.h +++ b/mali_kbase/tl/mali_kbase_timeline.h @@ -107,32 +107,6 @@ void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx); void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); #if MALI_UNIT_TEST -/** - * kbase_timeline_test - start timeline stream data generator - * @kbdev: Kernel common context - * @tpw_count: Number of trace point writers in each context - * @msg_delay: Time delay in milliseconds between trace points written by one - * writer - * @msg_count: Number of trace points written by one writer - * @aux_msg: If non-zero aux messages will be included - * - * This test starts a requested number of asynchronous writers in both IRQ and - * thread context. Each writer will generate required number of test - * tracepoints (tracepoints with embedded information about writer that - * should be verified by user space reader). Tracepoints will be emitted in - * all timeline body streams. If aux_msg is non-zero writer will also - * generate not testable tracepoints (tracepoints without information about - * writer). These tracepoints are used to check correctness of remaining - * timeline message generating functions. Writer will wait requested time - * between generating another set of messages. This call blocks until all - * writers finish. - */ -void kbase_timeline_test( - struct kbase_device *kbdev, - unsigned int tpw_count, - unsigned int msg_delay, - unsigned int msg_count, - int aux_msg); /** * kbase_timeline_stats - read timeline stream statistics diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c index 8587ba0..e3b6fbc 100644 --- a/mali_kbase/tl/mali_kbase_timeline_io.c +++ b/mali_kbase/tl/mali_kbase_timeline_io.c @@ -24,6 +24,7 @@ #include "mali_kbase_tracepoints.h" #include "mali_kbase_timeline.h" +#include <linux/delay.h> #include <linux/poll.h> /* The timeline stream file operations functions. */ @@ -46,7 +47,8 @@ const struct file_operations kbasep_tlstream_fops = { /** * kbasep_timeline_io_packet_pending - check timeline streams for pending - *packets + * packets + * * @timeline: Timeline instance * @ready_stream: Pointer to variable where stream will be placed * @rb_idx_raw: Pointer to variable where read buffer index will be placed @@ -86,8 +88,8 @@ kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, } /** - * kbasep_timeline_has_header_data() - - * check timeline headers for pending packets + * kbasep_timeline_has_header_data() - check timeline headers for pending + * packets * * @timeline: Timeline instance * @@ -139,6 +141,7 @@ static inline int copy_stream_header(char __user *buffer, size_t size, /** * kbasep_timeline_copy_header - copy timeline headers to the user + * * @timeline: Timeline instance * @buffer: Pointer to the buffer provided by user * @size: Maximum amount of data that can be stored in the buffer @@ -174,6 +177,7 @@ static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, /** * kbasep_timeline_io_read - copy data from streams to buffer provided by user + * * @filp: Pointer to file structure * @buffer: Pointer to the buffer provided by user * @size: Maximum amount of data that can be stored in the buffer @@ -198,7 +202,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, if (!buffer) return -EINVAL; - if ((*f_pos < 0) || (size < PACKET_SIZE)) + if (*f_pos < 0) return -EINVAL; mutex_lock(&timeline->reader_lock); @@ -217,10 +221,10 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, } /* If we already read some packets and there is no - * packet pending then return back to user. - * If we don't have any data yet, wait for packet to be - * submitted. - */ + * packet pending then return back to user. + * If we don't have any data yet, wait for packet to be + * submitted. + */ if (copy_len > 0) { if (!kbasep_timeline_io_packet_pending( timeline, &stream, &rb_idx_raw)) @@ -241,8 +245,8 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, } /* Check if this packet fits into the user buffer. - * If so copy its content. - */ + * If so copy its content. + */ rb_idx = rb_idx_raw % PACKET_COUNT; rb_size = atomic_read(&stream->buffer[rb_idx].size); if (rb_size > size - copy_len) @@ -254,10 +258,10 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, } /* If the distance between read buffer index and write - * buffer index became more than PACKET_COUNT, then overflow - * happened and we need to ignore the last portion of bytes - * that we have just sent to user. - */ + * buffer index became more than PACKET_COUNT, then overflow + * happened and we need to ignore the last portion of bytes + * that we have just sent to user. + */ smp_rmb(); wb_idx_raw = atomic_read(&stream->wbi); @@ -321,6 +325,8 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) { struct kbase_timeline *timeline; + ktime_t elapsed_time; + s64 elapsed_time_ms, time_to_sleep; KBASE_DEBUG_ASSERT(inode); KBASE_DEBUG_ASSERT(filp); @@ -330,6 +336,18 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) timeline = (struct kbase_timeline *)filp->private_data; + /* Get the amount of time passed since the timeline was acquired and ensure + * we sleep for long enough such that it has been at least + * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. + * This prevents userspace from spamming acquire and release too quickly. + */ + elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time); + elapsed_time_ms = ktime_to_ms(elapsed_time); + time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS, + TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); + if (time_to_sleep > 0) + msleep(time_to_sleep); + #if MALI_USE_CSF kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); #endif diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h index 2825f77..8a58a13 100644 --- a/mali_kbase/tl/mali_kbase_timeline_priv.h +++ b/mali_kbase/tl/mali_kbase_timeline_priv.h @@ -34,6 +34,11 @@ #include <linux/atomic.h> #include <linux/mutex.h> +/* The minimum amount of time timeline must be acquired for before release is + * allowed, to prevent DoS attacks. + */ +#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500) + /** * struct kbase_timeline - timeline state structure * @streams: The timeline streams generated by kernel @@ -49,6 +54,7 @@ * otherwise. See kbase_timeline_io_acquire(). * @obj_header_btc: Remaining bytes to copy for the object stream header * @aux_header_btc: Remaining bytes to copy for the aux stream header + * @last_acquire_time: The time at which timeline was last acquired. * @csf_tl_reader: CSFFW timeline reader */ struct kbase_timeline { @@ -65,6 +71,7 @@ struct kbase_timeline { atomic_t *timeline_flags; size_t obj_header_btc; size_t aux_header_btc; + ktime_t last_acquire_time; #if MALI_USE_CSF struct kbase_csf_tl_reader csf_tl_reader; #endif diff --git a/mali_kbase/tl/mali_kbase_tlstream.c b/mali_kbase/tl/mali_kbase_tlstream.c index c6eb3c8..202c12f 100644 --- a/mali_kbase/tl/mali_kbase_tlstream.c +++ b/mali_kbase/tl/mali_kbase_tlstream.c @@ -56,20 +56,19 @@ static void kbasep_packet_header_setup( * @numbered: non-zero if the stream is numbered * * Function updates mutable part of packet header in the given buffer. - * Note that value of data_size must not including size of the header. + * Note that value of data_size must not include size of the header. */ static void kbasep_packet_header_update( char *buffer, size_t data_size, int numbered) { - u32 word0; u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); KBASE_DEBUG_ASSERT(buffer); - CSTD_UNUSED(word0); - memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); + /* we copy the contents of word1 to its respective position in the buffer */ + memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1)); } /** diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index 479f0f4..ece23b3 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -69,6 +69,7 @@ enum tl_msg_id_obj { KBASE_TL_ARBITER_STARTED, KBASE_TL_ARBITER_STOP_REQUESTED, KBASE_TL_ARBITER_STOPPED, + KBASE_TL_ARBITER_REQUESTED, KBASE_JD_GPU_SOFT_RESET, KBASE_TL_KBASE_NEW_DEVICE, KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, @@ -288,6 +289,10 @@ enum tl_msg_id_aux { "Driver has stopped using gpu", \ "@p", \ "gpu") \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \ + "Driver has requested the arbiter for gpu access", \ + "@p", \ + "gpu") \ TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ "gpu soft reset", \ "@p", \ @@ -1565,6 +1570,28 @@ void __kbase_tlstream_tl_arbiter_stopped( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_arbiter_requested( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_ARBITER_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_jd_gpu_soft_reset( struct kbase_tlstream *stream, const void *gpu) diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index a3fd7c1..f3f554a 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -237,6 +237,9 @@ void __kbase_tlstream_tl_arbiter_stop_requested( void __kbase_tlstream_tl_arbiter_stopped( struct kbase_tlstream *stream, const void *gpu); +void __kbase_tlstream_tl_arbiter_requested( + struct kbase_tlstream *stream, + const void *gpu); void __kbase_tlstream_jd_gpu_soft_reset( struct kbase_tlstream *stream, const void *gpu); @@ -1301,6 +1304,25 @@ struct kbase_tlstream; } while (0) /** + * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - + * Driver has requested the arbiter for gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_arbiter_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - * gpu soft reset * diff --git a/mali_pixel/memory_group_manager.c b/mali_pixel/memory_group_manager.c deleted file mode 100644 index 6e10722..0000000 --- a/mali_pixel/memory_group_manager.c +++ /dev/null @@ -1,492 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include <linux/fs.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/platform_device.h> -#include <linux/version.h> -#include <linux/module.h> -#ifdef CONFIG_DEBUG_FS -#include <linux/debugfs.h> -#endif -#include <linux/mm.h> -#include <linux/memory_group_manager.h> - -#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) -static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, pgprot_t pgprot) -{ - int err; - -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) - if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - err = vm_insert_pfn(vma, addr, pfn); -#else - err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); -#endif - - if (unlikely(err == -ENOMEM)) - return VM_FAULT_OOM; - if (unlikely(err < 0 && err != -EBUSY)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; -} -#endif - -#define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1) - -/** - * struct mgm_group - Structure to keep track of the number of allocated - * pages per group - * - * @size: The number of allocated small(4KB) pages - * @lp_size: The number of allocated large(2MB) pages - * @insert_pfn: The number of calls to map pages for CPU access. - * @update_gpu_pte: The number of calls to update GPU page table entries. - * - * This structure allows page allocation information to be displayed via - * debugfs. Display is organized per group with small and large sized pages. - */ -struct mgm_group { - size_t size; - size_t lp_size; - size_t insert_pfn; - size_t update_gpu_pte; -}; - -/** - * struct mgm_groups - Structure for groups of memory group manager - * - * @groups: To keep track of the number of allocated pages of all groups - * @dev: device attached - * @mgm_debugfs_root: debugfs root directory of memory group manager - * - * This structure allows page allocation information to be displayed via - * debugfs. Display is organized per group with small and large sized pages. - */ -struct mgm_groups { - struct mgm_group groups[MEMORY_GROUP_MANAGER_NR_GROUPS]; - struct device *dev; -#ifdef CONFIG_DEBUG_FS - struct dentry *mgm_debugfs_root; -#endif -}; - -#ifdef CONFIG_DEBUG_FS - -static int mgm_size_get(void *data, u64 *val) -{ - struct mgm_group *group = data; - - *val = group->size; - - return 0; -} - -static int mgm_lp_size_get(void *data, u64 *val) -{ - struct mgm_group *group = data; - - *val = group->lp_size; - - return 0; -} - -static int mgm_insert_pfn_get(void *data, u64 *val) -{ - struct mgm_group *group = data; - - *val = group->insert_pfn; - - return 0; -} - -static int mgm_update_gpu_pte_get(void *data, u64 *val) -{ - struct mgm_group *group = data; - - *val = group->update_gpu_pte; - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_size, mgm_size_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_lp_size, mgm_lp_size_get, NULL, "%llu\n"); - -DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_insert_pfn, mgm_insert_pfn_get, NULL, - "%llu\n"); - -DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_update_gpu_pte, mgm_update_gpu_pte_get, NULL, - "%llu\n"); - -static void mgm_term_debugfs(struct mgm_groups *data) -{ - debugfs_remove_recursive(data->mgm_debugfs_root); -} - -#define MGM_DEBUGFS_GROUP_NAME_MAX 10 -static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) -{ - int i; - struct dentry *e, *g; - char debugfs_group_name[MGM_DEBUGFS_GROUP_NAME_MAX]; - - /* - * Create root directory of memory-group-manager - */ - mgm_data->mgm_debugfs_root = - debugfs_create_dir("physical-memory-group-manager", NULL); - if (IS_ERR(mgm_data->mgm_debugfs_root)) { - dev_err(mgm_data->dev, "fail to create debugfs root directory\n"); - return -ENODEV; - } - - /* - * Create debugfs files per group - */ - for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { - scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX, - "group_%d", i); - g = debugfs_create_dir(debugfs_group_name, - mgm_data->mgm_debugfs_root); - if (IS_ERR(g)) { - dev_err(mgm_data->dev, "fail to create group[%d]\n", i); - goto remove_debugfs; - } - - e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i], - &fops_mgm_size); - if (IS_ERR(e)) { - dev_err(mgm_data->dev, "fail to create size[%d]\n", i); - goto remove_debugfs; - } - - e = debugfs_create_file("lp_size", 0444, g, - &mgm_data->groups[i], &fops_mgm_lp_size); - if (IS_ERR(e)) { - dev_err(mgm_data->dev, - "fail to create lp_size[%d]\n", i); - goto remove_debugfs; - } - - e = debugfs_create_file("insert_pfn", 0444, g, - &mgm_data->groups[i], &fops_mgm_insert_pfn); - if (IS_ERR(e)) { - dev_err(mgm_data->dev, - "fail to create insert_pfn[%d]\n", i); - goto remove_debugfs; - } - - e = debugfs_create_file("update_gpu_pte", 0444, g, - &mgm_data->groups[i], &fops_mgm_update_gpu_pte); - if (IS_ERR(e)) { - dev_err(mgm_data->dev, - "fail to create update_gpu_pte[%d]\n", i); - goto remove_debugfs; - } - } - - return 0; - -remove_debugfs: - mgm_term_debugfs(mgm_data); - return -ENODEV; -} - -#else - -static void mgm_term_debugfs(struct mgm_groups *data) -{ -} - -static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) -{ - return 0; -} - -#endif /* CONFIG_DEBUG_FS */ - -#define ORDER_SMALL_PAGE 0 -#define ORDER_LARGE_PAGE 9 -static void update_size(struct memory_group_manager_device *mgm_dev, int - group_id, int order, bool alloc) -{ - struct mgm_groups *data = mgm_dev->data; - - switch (order) { - case ORDER_SMALL_PAGE: - if (alloc) - data->groups[group_id].size++; - else { - WARN_ON(data->groups[group_id].size == 0); - data->groups[group_id].size--; - } - break; - - case ORDER_LARGE_PAGE: - if (alloc) - data->groups[group_id].lp_size++; - else { - WARN_ON(data->groups[group_id].lp_size == 0); - data->groups[group_id].lp_size--; - } - break; - - default: - dev_err(data->dev, "Unknown order(%d)\n", order); - break; - } -} - -static struct page *example_mgm_alloc_page( - struct memory_group_manager_device *mgm_dev, int group_id, - gfp_t gfp_mask, unsigned int order) -{ - struct mgm_groups *const data = mgm_dev->data; - struct page *p; - - dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d gfp_mask=0x%x order=%u\n", - __func__, (void *)mgm_dev, group_id, gfp_mask, order); - - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) - return NULL; - - p = alloc_pages(gfp_mask, order); - - if (p) { - update_size(mgm_dev, group_id, order, true); - } else { - struct mgm_groups *data = mgm_dev->data; - - dev_err(data->dev, "alloc_pages failed\n"); - } - - return p; -} - -static void example_mgm_free_page( - struct memory_group_manager_device *mgm_dev, int group_id, - struct page *page, unsigned int order) -{ - struct mgm_groups *const data = mgm_dev->data; - - dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d page=%p order=%u\n", - __func__, (void *)mgm_dev, group_id, (void *)page, order); - - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) - return; - - __free_pages(page, order); - - update_size(mgm_dev, group_id, order, false); -} - -static int example_mgm_get_import_memory_id( - struct memory_group_manager_device *mgm_dev, - struct memory_group_manager_import_data *import_data) -{ - struct mgm_groups *const data = mgm_dev->data; - - dev_dbg(data->dev, "%s(mgm_dev=%p, import_data=%p (type=%d)\n", - __func__, (void *)mgm_dev, (void *)import_data, - (int)import_data->type); - - if (!WARN_ON(!import_data)) { - WARN_ON(!import_data->u.dma_buf); - - WARN_ON(import_data->type != - MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF); - } - - return IMPORTED_MEMORY_ID; -} - -static u64 example_mgm_update_gpu_pte( - struct memory_group_manager_device *const mgm_dev, int const group_id, - int const mmu_level, u64 pte) -{ - struct mgm_groups *const data = mgm_dev->data; - const u32 pbha_bit_pos = 59; /* bits 62:59 */ - const u32 pbha_bit_mask = 0xf; /* 4-bit */ - - dev_dbg(data->dev, - "%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n", - __func__, (void *)mgm_dev, group_id, mmu_level, pte); - - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) - return pte; - - pte |= ((u64)group_id & pbha_bit_mask) << pbha_bit_pos; - - data->groups[group_id].update_gpu_pte++; - - return pte; -} - -static vm_fault_t example_mgm_vmf_insert_pfn_prot( - struct memory_group_manager_device *const mgm_dev, int const group_id, - struct vm_area_struct *const vma, unsigned long const addr, - unsigned long const pfn, pgprot_t const prot) -{ - struct mgm_groups *const data = mgm_dev->data; - vm_fault_t fault; - - dev_dbg(data->dev, - "%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n", - __func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn, - (unsigned long long int) pgprot_val(prot)); - - if (WARN_ON(group_id < 0) || - WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) - return VM_FAULT_SIGBUS; - - fault = vmf_insert_pfn_prot(vma, addr, pfn, prot); - - if (fault == VM_FAULT_NOPAGE) - data->groups[group_id].insert_pfn++; - else - dev_err(data->dev, "vmf_insert_pfn_prot failed\n"); - - return fault; -} - -static int mgm_initialize_data(struct mgm_groups *mgm_data) -{ - int i; - - for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { - mgm_data->groups[i].size = 0; - mgm_data->groups[i].lp_size = 0; - mgm_data->groups[i].insert_pfn = 0; - mgm_data->groups[i].update_gpu_pte = 0; - } - - return mgm_initialize_debugfs(mgm_data); -} - -static void mgm_term_data(struct mgm_groups *data) -{ - int i; - - for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { - if (data->groups[i].size != 0) - dev_warn(data->dev, - "%zu 0-order pages in group(%d) leaked\n", - data->groups[i].size, i); - if (data->groups[i].lp_size != 0) - dev_warn(data->dev, - "%zu 9 order pages in group(%d) leaked\n", - data->groups[i].lp_size, i); - } - - mgm_term_debugfs(data); -} - -static int memory_group_manager_probe(struct platform_device *pdev) -{ - struct memory_group_manager_device *mgm_dev; - struct mgm_groups *mgm_data; - - mgm_dev = kzalloc(sizeof(*mgm_dev), GFP_KERNEL); - if (!mgm_dev) - return -ENOMEM; - - mgm_dev->owner = THIS_MODULE; - mgm_dev->ops.mgm_alloc_page = example_mgm_alloc_page; - mgm_dev->ops.mgm_free_page = example_mgm_free_page; - mgm_dev->ops.mgm_get_import_memory_id = - example_mgm_get_import_memory_id; - mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot; - mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte; - - mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL); - if (!mgm_data) { - kfree(mgm_dev); - return -ENOMEM; - } - - mgm_dev->data = mgm_data; - mgm_data->dev = &pdev->dev; - - if (mgm_initialize_data(mgm_data)) { - kfree(mgm_data); - kfree(mgm_dev); - return -ENOENT; - } - - platform_set_drvdata(pdev, mgm_dev); - dev_info(&pdev->dev, "Memory group manager probed successfully\n"); - - return 0; -} - -static int memory_group_manager_remove(struct platform_device *pdev) -{ - struct memory_group_manager_device *mgm_dev = - platform_get_drvdata(pdev); - struct mgm_groups *mgm_data = mgm_dev->data; - - mgm_term_data(mgm_data); - kfree(mgm_data); - - kfree(mgm_dev); - - dev_info(&pdev->dev, "Memory group manager removed successfully\n"); - - return 0; -} - -static const struct of_device_id memory_group_manager_dt_ids[] = { - { .compatible = "arm,physical-memory-group-manager" }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, memory_group_manager_dt_ids); - -static struct platform_driver memory_group_manager_driver = { - .probe = memory_group_manager_probe, - .remove = memory_group_manager_remove, - .driver = { - .name = "physical-memory-group-manager", - .owner = THIS_MODULE, - .of_match_table = of_match_ptr(memory_group_manager_dt_ids), - /* - * Prevent the mgm_dev from being unbound and freed, as other's - * may have pointers to it and would get confused, or crash, if - * it suddenly disappear. - */ - .suppress_bind_attrs = true, - } -}; - -module_platform_driver(memory_group_manager_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("ARM Ltd."); -MODULE_VERSION("1.0"); |