diff options
182 files changed, 7735 insertions, 3813 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h new file mode 100644 index 0000000..a8e0874 --- /dev/null +++ b/common/include/linux/version_compat_defs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _VERSION_COMPAT_DEFS_H_ +#define _VERSION_COMPAT_DEFS_H_ + +#include <linux/version.h> + +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +typedef unsigned int __poll_t; +#endif + +#endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h index 9d677ca..613eb1f 100644 --- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,11 +43,18 @@ (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) #define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 #define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 -#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ +#define KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS 0 +#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \ (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ + (KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS) #define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \ KBASE_DUMMY_MODEL_COUNTER_PER_CORE) +#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_VALUES_PER_BLOCK) +#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE) #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index 7f7b9dd..3b02350 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -23,99 +23,16 @@ #define _UAPI_BASE_CSF_KERNEL_H_ #include <linux/types.h> +#include "../mali_base_common_kernel.h" -/* Memory allocation, access/hint flags. +/* Memory allocation, access/hint flags & mask specific to CSF GPU. * * See base_mem_alloc_flags. */ -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - /* Must be FIXED memory. */ #define BASE_MEM_FIXED ((base_mem_alloc_flags)1 << 8) -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* IN/OUT */ -/* Should be cached on the CPU, returned if actually cached - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the alloc - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - /* CSF event memory * * If Outer shareable coherence is not specified or not available, then on @@ -131,46 +48,15 @@ #define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu - * mode. Some components within the GPU might only be able to access memory - * that is GPU cacheable. Refer to the specific GPU implementation for more - * details. The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - -/* OUT */ -/* Kernel side cache sync ops required */ -#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) /* Must be FIXABLE memory: its GPU VA will be determined at a later point, * at which time it will be at a fixed GPU VA. */ #define BASE_MEM_FIXABLE ((base_mem_alloc_flags)1 << 29) -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags +/* Note that the number of bits used for base_mem_alloc_flags + * must be less than BASE_MEM_FLAGS_NR_BITS !!! */ -#define BASE_MEM_FLAGS_NR_BITS 30 /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. @@ -178,62 +64,23 @@ #define BASEP_MEM_FLAGS_KERNEL_ONLY \ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - /* A mask of all currently reserved flags */ #define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20 -#define BASEP_MEM_INVALID_HANDLE (0ul) -#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) -/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ +/* Special base mem handles specific to CSF. + */ #define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT) #define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_FIRST_FREE_ADDRESS \ - ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) #define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ LOCAL_PAGE_SHIFT) -/** - * Valid set of just-in-time memory allocation flags - */ +/* Valid set of just-in-time memory allocation flags */ #define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) -/* Flags to pass to ::base_context_init. - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef __u32 base_context_create_flags; - -/* No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/* Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/* Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) +/* flags for base context specific to CSF */ /* Base context creates a CSF event notification thread. * @@ -242,22 +89,6 @@ typedef __u32 base_context_create_flags; */ #define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - /* Bitpattern describing the ::base_context_create_flags that can be * passed to base_context_init() */ @@ -266,15 +97,7 @@ typedef __u32 base_context_create_flags; BASE_CONTEXT_CSF_EVENT_THREAD | \ BASEP_CONTEXT_CREATE_KERNEL_FLAGS) -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) - */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. - */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) +/* Flags for base tracepoint specific to CSF */ /* Enable KBase tracepoints for CSF builds */ #define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2) @@ -306,6 +129,10 @@ typedef __u32 base_context_create_flags; */ #define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) +/* CSF CSI EXCEPTION_HANDLER_FLAGS */ +#define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0) +#define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG) + /** * enum base_kcpu_command_type - Kernel CPU queue command type. * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, @@ -725,4 +552,45 @@ struct base_csf_notification { } payload; }; +/** + * struct mali_base_gpu_core_props - GPU core props info + * + * @product_id: Pro specific value. + * @version_status: Status of the GPU release. No defined values, but starts at + * 0 and increases by one for each release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" + * release number. + * 8 bit values (0-255). + * @major_revision: Major release number of the GPU. "R" part of an "RnPn" + * release number. + * 4 bit values (0-15). + * @padding: padding to align to 8-byte + * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + * @log2_program_counter_size: Size of the shader program counter, in bits. + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This + * is a bitpattern where a set bit indicates that the format is supported. + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. + * It is unlikely that a client will be able to allocate all of this memory + * for their own purposes, but this at least provides an upper bound on the + * memory available to the GPU. + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + */ +struct mali_base_gpu_core_props { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; +}; + #endif /* _UAPI_BASE_CSF_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index 1794ddc..cbb7310 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -56,10 +56,18 @@ * - Added new Base memory allocation interface * 1.10: * - First release of new HW performance counters interface. + * 1.11: + * - Dummy model (no mali) backend will now clear HWC values after each sample + * 1.12: + * - Added support for incremental rendering flag in CSG create call + * 1.13: + * - Added ioctl to query a register of USER page. + * 1.14: + * - Added support for passing down the buffer descriptor VA in tiler heap init */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 10 +#define BASE_UK_VERSION_MINOR 14 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -245,6 +253,9 @@ union kbase_ioctl_cs_queue_group_create_1_6 { * allowed to use. * @in.compute_max: Maximum number of compute endpoints the group is allowed * to use. + * @in.csi_handlers: Flags to signal that the application intends to use CSI + * exception handlers in some linear buffers to deal with + * the given exception types. * @in.padding: Currently unused, must be zero * @out: Output parameters * @out.group_handle: Handle of a newly created queue group. @@ -261,9 +272,10 @@ union kbase_ioctl_cs_queue_group_create { __u8 tiler_max; __u8 fragment_max; __u8 compute_max; - __u8 padding[3]; + __u8 csi_handlers; + __u8 padding[2]; /** - * @reserved: Reserved + * @in.reserved: Reserved */ __u64 reserved; } in; @@ -353,6 +365,7 @@ struct kbase_ioctl_kcpu_queue_enqueue { * allowed. * @in.group_id: Group ID to be used for physical allocations. * @in.padding: Padding + * @in.buf_desc_va: Buffer descriptor GPU VA for tiler heap reclaims. * @out: Output parameters * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up * for the heap. @@ -368,6 +381,7 @@ union kbase_ioctl_cs_tiler_heap_init { __u16 target_in_flight; __u8 group_id; __u8 padding; + __u64 buf_desc_va; } in; struct { __u64 gpu_heap_va; @@ -379,6 +393,43 @@ union kbase_ioctl_cs_tiler_heap_init { _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) /** + * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap, + * earlier version upto 1.13 + * @in: Input parameters + * @in.chunk_size: Size of each chunk. + * @in.initial_chunks: Initial number of chunks that heap will be created with. + * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. + * @in.target_in_flight: Number of render-passes that the driver should attempt to + * keep in flight for which allocation of new chunks is + * allowed. + * @in.group_id: Group ID to be used for physical allocations. + * @in.padding: Padding + * @out: Output parameters + * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up + * for the heap. + * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, + * actually points to the header of heap chunk and not to + * the low address of free memory in the chunk. + */ +union kbase_ioctl_cs_tiler_heap_init_1_13 { + struct { + __u32 chunk_size; + __u32 initial_chunks; + __u32 max_chunks; + __u16 target_in_flight; + __u8 group_id; + __u8 padding; + } in; + struct { + __u64 gpu_heap_va; + __u64 first_chunk_va; + } out; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \ + _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13) + +/** * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap * instance * @@ -479,6 +530,29 @@ union kbase_ioctl_mem_alloc_ex { #define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex) +/** + * union kbase_ioctl_read_user_page - Read a register of USER page + * + * @in: Input parameters. + * @in.offset: Register offset in USER page. + * @in.padding: Padding to round up to a multiple of 8 bytes, must be zero. + * @out: Output parameters. + * @out.val_lo: Value of 32bit register or the 1st half of 64bit register to be read. + * @out.val_hi: Value of the 2nd half of 64bit register to be read. + */ +union kbase_ioctl_read_user_page { + struct { + __u32 offset; + __u32 padding; + } in; + struct { + __u32 val_lo; + __u32 val_hi; + } out; +}; + +#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page) + /*************** * test ioctls * ***************/ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h new file mode 100644 index 0000000..75ae6a1 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ +#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ + +/* IPA control registers */ +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) +#define STATUS 0x004 /* (RO) Status register */ + +#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h index deca665..ebe3b3e 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,9 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_H_ #define _UAPI_KBASE_GPU_REGMAP_H_ -#if !MALI_USE_CSF +#if MALI_USE_CSF +#include "backend/mali_kbase_gpu_regmap_csf.h" +#else #include "backend/mali_kbase_gpu_regmap_jm.h" #endif /* !MALI_USE_CSF */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h index b63575e..ae43908 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -23,100 +23,16 @@ #define _UAPI_BASE_JM_KERNEL_H_ #include <linux/types.h> +#include "../mali_base_common_kernel.h" -/* Memory allocation, access/hint flags. +/* Memory allocation, access/hint flags & mask specific to JM GPU. * * See base_mem_alloc_flags. */ -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - -/* Used as BASE_MEM_FIXED in other backends - */ +/* Used as BASE_MEM_FIXED in other backends */ #define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* IN/OUT */ -/* Should be cached on the CPU, returned if actually cached - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the allocation - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - /** * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved. * @@ -131,47 +47,15 @@ */ #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu - * mode. Some components within the GPU might only be able to access memory - * that is GPU cacheable. Refer to the specific GPU implementation for more - * details. The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - /* Use the GPU VA chosen by the kernel client */ #define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) -/* OUT */ -/* Kernel side cache sync ops required */ -#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) - /* Force trimming of JIT allocations when creating a new allocation */ #define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags +/* Note that the number of bits used for base_mem_alloc_flags + * must be less than BASE_MEM_FLAGS_NR_BITS !!! */ -#define BASE_MEM_FLAGS_NR_BITS 30 /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. @@ -180,29 +64,11 @@ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - /* A mask of all currently reserved flags */ #define BASE_MEM_FLAGS_RESERVED \ (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) -#define BASEP_MEM_INVALID_HANDLE (0ul) -#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) -/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_FIRST_FREE_ADDRESS \ - ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power @@ -227,47 +93,6 @@ #define BASE_JIT_ALLOC_VALID_FLAGS \ (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) -/** - * typedef base_context_create_flags - Flags to pass to ::base_context_init. - * - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef __u32 base_context_create_flags; - -/* No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/* Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/* Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - /* Bitpattern describing the ::base_context_create_flags that can be * passed to base_context_init() */ @@ -287,16 +112,7 @@ typedef __u32 base_context_create_flags; #define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ ((base_context_create_flags)(1 << 31)) -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) - */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. - */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - +/* Flags for base tracepoint specific to JM */ #define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ BASE_TLSTREAM_JOB_DUMPING_ENABLED) /* @@ -509,9 +325,6 @@ typedef __u32 base_jd_core_req; * takes priority * * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. - * - * If the core availability policy is keeping the required core group turned - * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. */ #define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) @@ -1019,11 +832,6 @@ enum { * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the * platform doesn't support the feature specified in * the atom. - * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used - * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used - * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used - * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used - * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate * to userspace that the KBase context has been * destroyed and Base should stop listening for @@ -1118,17 +926,10 @@ enum base_jd_event_code { /* SW defined exceptions */ BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_TIMED_OUT = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, - BASE_JD_EVENT_PM_EVENT = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, - - BASE_JD_EVENT_BAG_INVALID = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, @@ -1136,10 +937,6 @@ enum base_jd_event_code { BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, - BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | - BASE_JD_SW_EVENT_BAG | 0x000, BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, @@ -1206,4 +1003,49 @@ struct base_dump_cpu_gpu_counters { __u8 padding[36]; }; +/** + * struct mali_base_gpu_core_props - GPU core props info + * + * @product_id: Pro specific value. + * @version_status: Status of the GPU release. No defined values, but starts at + * 0 and increases by one for each release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" + * release number. + * 8 bit values (0-255). + * @major_revision: Major release number of the GPU. "R" part of an "RnPn" + * release number. + * 4 bit values (0-15). + * @padding: padding to align to 8-byte + * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + * @log2_program_counter_size: Size of the shader program counter, in bits. + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This + * is a bitpattern where a set bit indicates that the format is supported. + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. + * It is unlikely that a client will be able to allocate all of this memory + * for their own purposes, but this at least provides an upper bound on the + * memory available to the GPU. + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + * @num_exec_engines: The number of execution engines. Only valid for tGOX + * (Bifrost) GPUs, where GPU_HAS_REG_CORE_FEATURES is defined. Otherwise, + * this is always 0. + */ +struct mali_base_gpu_core_props { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; + __u8 num_exec_engines; +}; + #endif /* _UAPI_BASE_JM_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 215f12d..20d931a 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -125,9 +125,11 @@ * - Removed Kernel legacy HWC interface * 11.34: * - First release of new HW performance counters interface. + * 11.35: + * - Dummy model (no mali) backend will now clear HWC values after each sample */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 34 +#define BASE_UK_VERSION_MINOR 35 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h new file mode 100644 index 0000000..f837814 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_COMMON_KERNEL_H_ +#define _UAPI_BASE_COMMON_KERNEL_H_ + +#include <linux/types.h> + +struct base_mem_handle { + struct { + __u64 handle; + } basep; +}; + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +/* Memory allocation, access/hint flags & mask. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* IN/OUT */ +/* Should be cached on the CPU, returned if actually cached + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* OUT */ +/* Kernel side cache sync ops required */ +#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 30 + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* Special base mem handles. + */ +#define BASEP_MEM_INVALID_HANDLE (0ul) +#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) +/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) + +/* Flags to pass to ::base_context_init. + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef __u32 base_context_create_flags; + +/* Flags for base context */ + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Flags for base tracepoint + */ + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#endif /* _UAPI_BASE_COMMON_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h index f3ffb36..6adbd81 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -27,19 +27,10 @@ #define _UAPI_BASE_KERNEL_H_ #include <linux/types.h> - -struct base_mem_handle { - struct { - __u64 handle; - } basep; -}; - #include "mali_base_mem_priv.h" #include "gpu/mali_kbase_gpu_id.h" #include "gpu/mali_kbase_gpu_coherency.h" -#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 - #define BASE_MAX_COHERENT_GROUPS 16 #if defined(PAGE_MASK) && defined(PAGE_SHIFT) @@ -458,49 +449,6 @@ struct base_jd_debug_copy_buffer { * 16 coherent groups, since core groups are typically 4 cores. */ -/** - * struct mali_base_gpu_core_props - GPU core props info - * - * @product_id: Pro specific value. - * @version_status: Status of the GPU release. No defined values, but starts at - * 0 and increases by one for each release status (alpha, beta, EAC, etc.). - * 4 bit values (0-15). - * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" - * release number. - * 8 bit values (0-255). - * @major_revision: Major release number of the GPU. "R" part of an "RnPn" - * release number. - * 4 bit values (0-15). - * @padding: padding to allign to 8-byte - * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by - * clGetDeviceInfo() - * @log2_program_counter_size: Size of the shader program counter, in bits. - * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This - * is a bitpattern where a set bit indicates that the format is supported. - * Before using a texture format, it is recommended that the corresponding - * bit be checked. - * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. - * It is unlikely that a client will be able to allocate all of this memory - * for their own purposes, but this at least provides an upper bound on the - * memory available to the GPU. - * This is required for OpenCL's clGetDeviceInfo() call when - * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The - * client will not be expecting to allocate anywhere near this value. - * @num_exec_engines: The number of execution engines. - */ -struct mali_base_gpu_core_props { - __u32 product_id; - __u16 version_status; - __u16 minor_revision; - __u16 major_revision; - __u16 padding; - __u32 gpu_freq_khz_max; - __u32 log2_program_counter_size; - __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - __u64 gpu_available_memory_size; - __u8 num_exec_engines; -}; - /* * More information is possible - but associativity and bus width are not * required by upper-level apis. @@ -531,7 +479,7 @@ struct mali_base_gpu_tiler_props { * field. * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, * 3 = SW Model/Emulation - * @padding: padding to allign to 8-byte + * @padding: padding to align to 8-byte * @tls_alloc: Number of threads per core that TLS must be * allocated for */ @@ -551,7 +499,7 @@ struct mali_base_gpu_thread_props { * struct mali_base_gpu_coherent_group - descriptor for a coherent group * @core_mask: Core restriction mask required for the group * @num_cores: Number of cores in the group - * @padding: padding to allign to 8-byte + * @padding: padding to align to 8-byte * * \c core_mask exposes all cores in that coherent group, and \c num_cores * provides a cached population-count for that mask. @@ -581,7 +529,7 @@ struct mali_base_gpu_coherent_group { * are in the group[] member. Use num_groups instead. * @coherency: Coherency features of the memory, accessed by gpu_mem_features * methods - * @padding: padding to allign to 8-byte + * @padding: padding to align to 8-byte * @group: Descriptors of coherent groups * * Note that the sizes of the members could be reduced. However, the \c group @@ -599,6 +547,12 @@ struct mali_base_gpu_coherent_group_info { struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; }; +#if MALI_USE_CSF +#include "csf/mali_base_csf_kernel.h" +#else +#include "jm/mali_base_jm_kernel.h" +#endif + /** * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware * Configuration Discovery registers. @@ -696,12 +650,6 @@ struct base_gpu_props { struct mali_base_gpu_coherent_group_info coherency_info; }; -#if MALI_USE_CSF -#include "csf/mali_base_csf_kernel.h" -#else -#include "jm/mali_base_jm_kernel.h" -#endif - #define BASE_MEM_GROUP_ID_GET(flags) \ ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT) diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h index 304a334..70f5b09 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,7 @@ #define _UAPI_BASE_MEM_PRIV_H_ #include <linux/types.h> - -#include "mali_base_kernel.h" +#include "mali_base_common_kernel.h" #define BASE_SYNCSET_OP_MSYNC (1U << 0) #define BASE_SYNCSET_OP_CSYNC (1U << 1) diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h index d1d5f3d..73d54e9 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -563,7 +563,8 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { _IO(KBASE_IOCTL_TYPE, 34) /** - * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. + * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault + * addresses. * @in: Input parameters * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas. * @in.size_buffer: Address of buffer to hold size of modified areas (in pages) @@ -683,7 +684,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info { _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info) /** - * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * struct kbase_ioctl_kinstr_prfcnt_setup - Setup HWC dumper/reader * @in: input parameters. * @in.request_item_count: Number of requests in the requests array. * @in.request_item_size: Size in bytes of each request in the requests array. diff --git a/common/include/uapi/gpu/arm/midgard/mali_uk.h b/common/include/uapi/gpu/arm/midgard/mali_uk.h deleted file mode 100644 index 78946f6..0000000 --- a/common/include/uapi/gpu/arm/midgard/mali_uk.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/** - * DOC: Types and definitions that are common across OSs for both the user - * and kernel side of the User-Kernel interface. - */ - -#ifndef _UAPI_UK_H_ -#define _UAPI_UK_H_ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * DOC: uk_api User-Kernel Interface API - * - * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device - * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. - * - * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent - * kernel-side API (UKK) via an OS-specific communication mechanism. - * - * This API is internal to the Midgard DDK and is not exposed to any applications. - * - */ - -/** - * enum uk_client_id - These are identifiers for kernel-side drivers - * implementing a UK interface, aka UKK clients. - * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client. - * @UK_CLIENT_COUNT: The number of uk clients supported. This must be - * the last member of the enum - * - * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this - * identifier to select a UKK client to the uku_open() function. - * - * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id - * enumeration and the uku_open() implemenation for the various OS ports need to be updated to - * provide a mapping of the identifier to the OS specific device name. - * - */ -enum uk_client_id { - UK_CLIENT_MALI_T600_BASE, - UK_CLIENT_COUNT -}; - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* _UAPI_UK_H_ */ diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index ca02444..81b6e93 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -71,7 +71,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"r36p0-01eac0"' +MALI_RELEASE_NAME ?= '"r38p1-01eac0"' # We are building for Pixel CONFIG_MALI_PLATFORM_NAME="pixel" @@ -116,7 +116,7 @@ endif # # Experimental features must default to disabled, e.g.: # MALI_EXPERIMENTAL_FEATURE ?= 0 -MALI_INCREMENTAL_RENDERING ?= 0 +MALI_INCREMENTAL_RENDERING_JM ?= 0 # # ccflags @@ -129,7 +129,7 @@ ccflags-y += \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ - -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \ + -DMALI_INCREMENTAL_RENDERING_JM=$(MALI_INCREMENTAL_RENDERING_JM) \ -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) ifeq ($(KBUILD_EXTMOD),) @@ -187,6 +187,7 @@ mali_kbase-y := \ mali_kbase_mem_profile_debugfs.o \ mali_kbase_disjoint_events.o \ mali_kbase_debug_mem_view.o \ + mali_kbase_debug_mem_zones.o \ mali_kbase_smc.o \ mali_kbase_mem_pool.o \ mali_kbase_mem_pool_debugfs.o \ diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index ef16a7d..de27ae4 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -217,6 +217,20 @@ config MALI_GEM5_BUILD comment "Debug options" depends on MALI_MIDGARD && MALI_EXPERT +config MALI_FW_CORE_DUMP + bool "Enable support for FW core dump" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT + default n + help + Adds ability to request firmware core dump + + Example: + * To explicitly request core dump: + echo 1 >/sys/kernel/debug/mali0/fw_core_dump + * To output current core dump (after explicitly requesting a core dump, + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump + config MALI_DEBUG bool "Enable debug build" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index f64f568..d8522fc 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -149,16 +149,19 @@ ifeq ($(CONFIG_MALI_MIDGARD),m) ifeq ($(CONFIG_MALI_KUTF), y) CONFIG_MALI_KUTF_IRQ_TEST ?= y CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y else # Prevent misuse when CONFIG_MALI_KUTF=n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif else # Prevent misuse when CONFIG_MALI_DEBUG=n CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif else # Prevent misuse when CONFIG_MALI_MIDGARD=n @@ -168,6 +171,7 @@ else CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif # All Mali CONFIG should be listed here @@ -207,6 +211,7 @@ CONFIGS := \ CONFIG_MALI_KUTF \ CONFIG_MALI_KUTF_IRQ_TEST \ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ CONFIG_MALI_XEN # Pixel integration CONFIG options @@ -247,6 +252,26 @@ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions # +# The following were added to align with W=1 in scripts/Makefile.extrawarn +# from the Linux source tree +KBUILD_CFLAGS += -Wall -Werror +KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter +KBUILD_CFLAGS += -Wmissing-declarations +KBUILD_CFLAGS += -Wmissing-format-attribute +KBUILD_CFLAGS += -Wmissing-prototypes +KBUILD_CFLAGS += -Wold-style-definition +KBUILD_CFLAGS += -Wmissing-include-dirs +KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) +KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) +KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) +KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-missing-field-initializers +KBUILD_CFLAGS += -Wno-sign-compare +KBUILD_CFLAGS += -Wno-type-limits + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 + all: $(MAKE) -C $(KDIR) M=$(M) W=1 $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index f76d68b..d03322c 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -65,8 +65,7 @@ config MALI_CSF_SUPPORT config MALI_DEVFREQ bool "Enable devfreq support for Mali" depends on MALI_MIDGARD - default y if PLATFORM_JUNO - default y if PLATFORM_CUSTOM + default y help Support devfreq for Mali. @@ -192,6 +191,20 @@ config MALI_CORESTACK If unsure, say N. +config MALI_FW_CORE_DUMP + bool "Enable support for FW core dump" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT + default n + help + Adds ability to request firmware core dump + + Example: + * To explicitly request core dump: + echo 1 >/sys/kernel/debug/mali0/fw_core_dump + * To output current core dump (after explicitly requesting a core dump, + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump + choice prompt "Error injection level" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c index d813a04..667552c 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c +++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c @@ -955,7 +955,6 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; int res = 0; @@ -1008,11 +1007,9 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, /* Need to synchronously wait for GPU assignment */ atomic_inc(&kbdev->pm.gpu_users_waiting); mutex_unlock(&arb_vm_state->vm_state_lock); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); mutex_lock(&arb_vm_state->vm_state_lock); atomic_dec(&kbdev->pm.gpu_users_waiting); } @@ -1111,7 +1108,7 @@ static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, } /** - * gpu_clk_notifier_unregister() - Unregister clock rate change notifier + * arb_gpu_clk_notifier_unregister() - Unregister clock rate change notifier * @kbdev: kbase_device pointer * @gpu_clk_handle: Handle unique to the enumerated GPU clock * @nb: notifier block containing the callback function pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index d6b9750..ddd03ca 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -72,49 +72,6 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) return callbacks; } -int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev) -{ - /* Uses default reference frequency defined in below macro */ - u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - - /* Only check lowest frequency in cases when OPPs are used and - * present in the device tree. - */ -#ifdef CONFIG_PM_OPP - struct dev_pm_opp *opp_ptr; - unsigned long found_freq = 0; - - /* find lowest frequency OPP */ - opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq); - if (IS_ERR(opp_ptr)) { - dev_err(kbdev->dev, - "No OPPs found in device tree! Scaling timeouts using %llu kHz", - (unsigned long long)lowest_freq_khz); - } else { -#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE - dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */ -#endif - /* convert found frequency to KHz */ - found_freq /= 1000; - - /* If lowest frequency in OPP table is still higher - * than the reference, then keep the reference frequency - * as the one to use for scaling . - */ - if (found_freq < lowest_freq_khz) - lowest_freq_khz = found_freq; - } -#else - dev_err(kbdev->dev, - "No operating-points-v2 node or operating-points property in DT"); -#endif - - kbdev->lowest_gpu_freq_khz = lowest_freq_khz; - dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", - kbdev->lowest_gpu_freq_khz); - return 0; -} - static int gpu_clk_rate_change_notifier(struct notifier_block *nb, unsigned long event, void *data) { diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h index a6ee959..35b3b8d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,20 +61,6 @@ struct kbase_clk_data { int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); /** - * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can - * run as using the device tree, and save this - * within kbdev. - * @kbdev: Pointer to kbase device. - * - * This function could be called from kbase_clk_rate_trace_manager_init, - * but is left separate as it can be called as soon as - * dev_pm_opp_of_add_table() has been called to initialize the OPP table. - * - * Return: 0 in any case. - */ -int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev); - -/** * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 00b32b9..09c1863 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,7 +57,7 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); if (IS_ERR_OR_NULL(opp)) - dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); else { voltage = dev_pm_opp_get_voltage(opp); #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE @@ -133,8 +133,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) rcu_read_unlock(); #endif if (IS_ERR_OR_NULL(opp)) { - dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); - return PTR_ERR(opp); + dev_err(dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); + return IS_ERR(opp) ? PTR_ERR(opp) : -ENODEV; } #if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE dev_pm_opp_put(opp); @@ -317,6 +317,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, dp->max_state = i; + /* Have the lowest clock as suspend clock. * It may be overridden by 'opp-mali-errata-1485982'. */ @@ -636,6 +637,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) struct devfreq_dev_profile *dp; int err; unsigned int i; + bool free_devfreq_freq_table = true; if (kbdev->nr_clocks == 0) { dev_err(kbdev->dev, "Clock not available for devfreq\n"); @@ -669,32 +671,35 @@ int kbase_devfreq_init(struct kbase_device *kbdev) dp->freq_table[0] / 1000; } - err = kbase_devfreq_init_core_mask_table(kbdev); +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + err = kbase_ipa_init(kbdev); if (err) { - kbase_devfreq_term_freq_table(kbdev); - return err; + dev_err(kbdev->dev, "IPA initialization failed"); + goto ipa_init_failed; } +#endif + + err = kbase_devfreq_init_core_mask_table(kbdev); + if (err) + goto init_core_mask_table_failed; kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); kbdev->devfreq = NULL; - kbase_devfreq_term_core_mask_table(kbdev); - kbase_devfreq_term_freq_table(kbdev); - dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err); - return err; + dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); + goto devfreq_add_dev_failed; } + /* Explicit free of freq table isn't needed after devfreq_add_device() */ + free_devfreq_freq_table = false; + /* Initialize devfreq suspend/resume workqueue */ err = kbase_devfreq_work_init(kbdev); if (err) { - if (devfreq_remove_device(kbdev->devfreq)) - dev_err(kbdev->dev, "Fail to rm devfreq\n"); - kbdev->devfreq = NULL; - kbase_devfreq_term_core_mask_table(kbdev); - dev_err(kbdev->dev, "Fail to init devfreq workqueue\n"); - return err; + dev_err(kbdev->dev, "Fail to init devfreq workqueue"); + goto devfreq_work_init_failed; } /* devfreq_add_device only copies a few of kbdev->dev's fields, so @@ -705,26 +710,20 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); if (err) { dev_err(kbdev->dev, - "Failed to register OPP notifier (%d)\n", err); + "Failed to register OPP notifier (%d)", err); goto opp_notifier_failed; } #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) - err = kbase_ipa_init(kbdev); - if (err) { - dev_err(kbdev->dev, "IPA initialization failed\n"); - goto ipa_init_failed; - } - kbdev->devfreq_cooling = of_devfreq_cooling_register_power( kbdev->dev->of_node, kbdev->devfreq, &kbase_ipa_power_model_ops); if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { - err = PTR_ERR(kbdev->devfreq_cooling); + err = PTR_ERR_OR_ZERO(kbdev->devfreq_cooling); dev_err(kbdev->dev, - "Failed to register cooling device (%d)\n", - err); + "Failed to register cooling device (%d)", err); + err = err == 0 ? -ENODEV : err; goto cooling_reg_failed; } #endif @@ -733,21 +732,29 @@ int kbase_devfreq_init(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) cooling_reg_failed: - kbase_ipa_term(kbdev); -ipa_init_failed: devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); #endif /* CONFIG_DEVFREQ_THERMAL */ opp_notifier_failed: kbase_devfreq_work_term(kbdev); +devfreq_work_init_failed: if (devfreq_remove_device(kbdev->devfreq)) - dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err); kbdev->devfreq = NULL; +devfreq_add_dev_failed: kbase_devfreq_term_core_mask_table(kbdev); +init_core_mask_table_failed: +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + kbase_ipa_term(kbdev); +ipa_init_failed: +#endif + if (free_devfreq_freq_table) + kbase_devfreq_term_freq_table(kbdev); + return err; } @@ -760,8 +767,6 @@ void kbase_devfreq_term(struct kbase_device *kbdev) #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) if (kbdev->devfreq_cooling) devfreq_cooling_unregister(kbdev->devfreq_cooling); - - kbase_ipa_term(kbdev); #endif devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); @@ -775,4 +780,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) kbdev->devfreq = NULL; kbase_devfreq_term_core_mask_table(kbdev); + +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + kbase_ipa_term(kbdev); +#endif } diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 0ea14bc..10e92ec 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,19 +40,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, registers.l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); - registers.core_features = 0; -#if !MALI_USE_CSF - /* TGOx */ - registers.core_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CORE_FEATURES)); -#else /* !MALI_USE_CSF */ - if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TDUX) || - ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TODX))) - registers.core_features = - kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); -#endif /* MALI_USE_CSF */ + registers.tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES)); registers.mem_features = kbase_reg_read(kbdev, @@ -170,6 +158,11 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, regdump->coherency_features = coherency_features; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) + regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); + else + regdump->core_features = 0; + kbase_pm_register_access_disable(kbdev); return error; diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 0ece571..b89b917 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,20 @@ #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_instr_internal.h> +static int wait_prfcnt_ready(struct kbase_device *kbdev) +{ + u32 loops; + + for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { + const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PRFCNT_ACTIVE; + if (!prfcnt_active) + return 0; + } + + dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); + return -EBUSY; +} int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, @@ -43,20 +57,20 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* alignment failure */ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) - goto out_err; + return err; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_err; + return err; } if (kbase_is_gpu_removed(kbdev)) { /* GPU has been removed by Arbiter */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_err; + return err; } /* Enable interrupt */ @@ -81,9 +95,19 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #endif + /* Wait until prfcnt config register can be written */ + err = wait_prfcnt_ready(kbdev); + if (err) + return err; + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + /* Wait until prfcnt is disabled before writing configuration registers */ + err = wait_prfcnt_ready(kbdev); + if (err) + return err; + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), enable->dump_buffer & 0xFFFFFFFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -111,12 +135,8 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - err = 0; - dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); - return err; - out_err: - return err; + return 0; } static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) @@ -135,7 +155,10 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); - /* Disable the counters */ + /* Wait until prfcnt config register can be written, then disable the counters. + * Return value is ignored as we are disabling anyway. + */ + wait_prfcnt_ready(kbdev); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); kbdev->hwcnt.kctx = NULL; @@ -146,7 +169,6 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; - int err = -EINVAL; struct kbase_device *kbdev = kctx->kbdev; while (1) { @@ -167,14 +189,14 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - return err; + return -EINVAL; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - return err; + return -EINVAL; } if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) @@ -233,6 +255,11 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + /* Wait until prfcnt is ready to request dump */ + err = wait_prfcnt_ready(kbdev); + if (err) + goto unlock; + /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF); @@ -248,11 +275,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); - err = 0; - unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); @@ -346,21 +370,24 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) - goto out; + goto unlock; if (kbase_is_gpu_removed(kbdev)) { /* GPU has been removed by Arbiter */ - goto out; + goto unlock; } + /* Wait until prfcnt is ready to clear */ + err = wait_prfcnt_ready(kbdev); + if (err) + goto unlock; + /* Clear the counters */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR); - err = 0; - -out: +unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 32bdf72..20905f7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -191,9 +191,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } -void kbase_job_hw_submit(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js) +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) { struct kbase_context *kctx; u32 cfg; @@ -202,13 +200,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(katom); kctx = katom->kctx; /* Command register must be available */ - KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx), + "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx)) + return -EPERM; dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, (void *)katom); @@ -281,7 +279,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. */ - katom->start_timestamp = ktime_get(); + katom->start_timestamp = ktime_get_raw(); /* GO ! */ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", @@ -329,6 +327,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_START); + + return 0; } /** @@ -393,11 +393,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(kbdev); - KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); - end_timestamp = ktime_get(); + end_timestamp = ktime_get_raw(); while (done) { u32 failed = done >> 16; @@ -409,7 +407,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * numbered interrupts before the higher numbered ones. */ i = ffs(finished) - 1; - KBASE_DEBUG_ASSERT(i >= 0); + if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__)) + break; do { int nr_done; @@ -590,7 +589,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) failed = done >> 16; finished = (done & 0xFFFF) | failed; if (done) - end_timestamp = ktime_get(); + end_timestamp = ktime_get_raw(); } while (finished & (1 << i)); kbasep_job_slot_update_head_start_timestamp(kbdev, i, @@ -619,7 +618,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, u64 job_in_head_before; u32 status_reg_after; - KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + WARN_ON(action & (~JS_COMMAND_MASK)); /* Check the head pointer */ job_in_head_before = ((u64) kbase_reg_read(kbdev, @@ -697,7 +696,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); break; default: - BUG(); + WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, + (void *)target_katom, (void *)target_katom->kctx); break; } } else { @@ -726,7 +726,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); break; default: - BUG(); + WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, + (void *)target_katom, (void *)target_katom->kctx); break; } } @@ -752,9 +753,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, int i; bool stop_sent = false; - KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -934,7 +933,11 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); - KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); + if (sw_flags & JS_COMMAND_MASK) { + WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, + target_katom ? (void *)target_katom->kctx : NULL, sw_flags); + sw_flags &= ~((u32)JS_COMMAND_MASK); + } kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, JS_COMMAND_SOFT_STOP | sw_flags); } @@ -1052,17 +1055,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags; struct kbase_device *kbdev; - ktime_t end_timestamp = ktime_get(); + ktime_t end_timestamp = ktime_get_raw(); struct kbasep_js_device_data *js_devdata; bool silent = false; u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - KBASE_DEBUG_ASSERT(data); - kbdev = container_of(data, struct kbase_device, hwaccess.backend.reset_work); - KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == @@ -1097,7 +1097,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) return; } - KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); spin_lock(&kbdev->mmu_mask_change); @@ -1138,7 +1138,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ - KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + if (unlikely(!kbdev->pm.backend.pm_current_policy)) + dev_warn(kbdev->dev, "No power policy set!"); /* All slot have been soft-stopped and we've waited * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we @@ -1235,8 +1236,6 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) struct kbase_device *kbdev = container_of(timer, struct kbase_device, hwaccess.backend.reset_timer); - KBASE_DEBUG_ASSERT(kbdev); - /* Reset still pending? */ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == @@ -1257,8 +1256,6 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) int i; int pending_jobs = 0; - KBASE_DEBUG_ASSERT(kbdev); - /* Count the number of jobs */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); @@ -1316,8 +1313,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, { int i; - KBASE_DEBUG_ASSERT(kbdev); - #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { /* GPU access has been removed, reset will be done by @@ -1371,13 +1366,11 @@ KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); */ void kbase_reset_gpu(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev); - /* Note this is an assert/atomic_set because it is a software issue for * a race to be occurring here */ - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_PREPARED); + if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) + return; atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); @@ -1395,13 +1388,11 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu); void kbase_reset_gpu_locked(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev); - /* Note this is an assert/atomic_set because it is a software issue for * a race to be occurring here */ - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_PREPARED); + if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) + return; atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index 1039e85..1ebb843 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -76,7 +76,6 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, } #endif - /** * kbase_job_hw_submit() - Submit a job to the GPU * @kbdev: Device pointer @@ -88,10 +87,10 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, * * The following locking conditions are made on the caller: * - it must hold the hwaccess_lock + * + * Return: 0 if the job was successfully submitted to hardware, an error otherwise. */ -void kbase_job_hw_submit(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js); +int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); #if !MALI_USE_CSF /** diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 48d1de8..4fe8046 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -346,16 +346,35 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; + + /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means + * one of two events prevented it from progressing to the next state and + * ultimately reach protected mode: + * - hwcnts were enabled, and the atom had to schedule a worker to + * disable them. + * - the hwcnts were already disabled, but some other error occurred. + * In the first case, if the worker has not yet completed + * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable + * them and signal to the worker they have already been enabled + */ + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + } + /* If the atom has suspended hwcnt but has not yet entered * protected mode, then resume hwcnt now. If the GPU is now in * protected mode then hwcnt will be resumed by GPU reset so * don't resume it here. */ if (kbase_jd_katom_is_protected(katom) && - ((katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { + ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { WARN_ON(!kbdev->protected_mode_hwcnt_disabled); kbdev->protected_mode_hwcnt_desired = true; if (kbdev->protected_mode_hwcnt_disabled) { @@ -506,17 +525,14 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); if (err) { /* - * Failed to switch into protected mode, resume - * GPU hwcnt and fail atom. + * Failed to switch into protected mode. + * + * At this point we expect: + * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED + * ==> + * kbdev->protected_mode_hwcnt_disabled = false */ - WARN_ON(!kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* @@ -536,12 +552,9 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev, /* * Protected mode sanity checks. */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == - kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), - kbase_gpu_in_protected_mode(kbdev)); + WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -951,18 +964,6 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) cores_ready = kbase_pm_cores_requested(kbdev, true); - if (katom[idx]->event_code == - BASE_JD_EVENT_PM_EVENT) { - KBASE_KTRACE_ADD_JM_SLOT_INFO( - kbdev, JM_MARK_FOR_RETURN_TO_JS, - katom[idx]->kctx, katom[idx], - katom[idx]->jc, js, - katom[idx]->event_code); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_RETURN_TO_JS; - break; - } - if (!cores_ready) break; @@ -1011,9 +1012,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); - kbase_job_hw_submit(kbdev, katom[idx], js); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_SUBMITTED; + if (!kbase_job_hw_submit(kbdev, katom[idx], js)) + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; + else + break; kbasep_platform_event_work_begin(katom[idx]); @@ -1346,11 +1348,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } else { char js_string[16]; - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, - js_string, - sizeof(js_string)), - ktime_to_ns(ktime_get()), 0, 0, - 0); + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(ktime_get_raw()), 0, 0, 0); } } #endif @@ -1406,14 +1406,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { /* protected mode sanity checks */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_protected(katom) && js == 0) || - !kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); + WARN(kbase_jd_katom_is_protected(katom) != + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom), + kbase_gpu_in_protected_mode(kbdev)); + WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && + kbase_jd_katom_is_protected(katom), + "Protected atom on JS%d not supported", js); } if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && !kbase_ctx_flag(katom->kctx, KCTX_DYING)) @@ -1804,11 +1804,9 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req) { if (!kbdev->pm.active_count) { - mutex_lock(&kbdev->js_data.runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); kbase_pm_update_active(kbdev); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&kbdev->js_data.runpool_mutex); + kbase_pm_unlock(kbdev); } } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 603ffcf..961a951 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,6 +80,7 @@ static bool ipa_control_timer_enabled; #endif #define LO_MASK(M) ((M) & 0xFFFFFFFF) +#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) static u32 get_implementation_register(u32 reg) { @@ -104,20 +105,15 @@ static u32 get_implementation_register(u32 reg) } struct { + spinlock_t access_lock; +#if !MALI_USE_CSF unsigned long prfcnt_base; +#endif /* !MALI_USE_CSF */ u32 *prfcnt_base_cpu; - struct kbase_device *kbdev; - struct tagged_addr *pages; - size_t page_count; u32 time; - struct { - u32 jm; - u32 tiler; - u32 l2; - u32 shader; - } prfcnt_en; + struct gpu_model_prfcnt_en prfcnt_en; u64 l2_present; u64 shader_present; @@ -181,7 +177,9 @@ struct control_reg_values_t { struct dummy_model_t { int reset_completed; int reset_completed_mask; +#if !MALI_USE_CSF int prfcnt_sample_completed; +#endif /* !MALI_USE_CSF */ int power_changed_mask; /* 2bits: _ALL,_SINGLE */ int power_changed; /* 1bit */ bool clean_caches_completed; @@ -464,6 +462,7 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 event_index; u64 value = 0; u32 core; + unsigned long flags; if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) return 0; @@ -487,6 +486,8 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, event_index -= 4; + spin_lock_irqsave(&performance_counters.access_lock, flags); + switch (core_type) { case KBASE_IPA_CORE_TYPE_CSHW: core_count = 1; @@ -514,28 +515,46 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; } + spin_unlock_irqrestore(&performance_counters.access_lock, flags); + if (is_low_word) return (value & U32_MAX); else return (value >> 32); } +#endif /* MALI_USE_CSF */ -void gpu_model_clear_prfcnt_values(void) +/** + * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values + * + * Sets all performance counter values to zero. The performance counter access + * lock must be held when calling this function. + */ +static void gpu_model_clear_prfcnt_values_nolock(void) { - memset(performance_counters.cshw_counters, 0, - sizeof(performance_counters.cshw_counters)); - - memset(performance_counters.tiler_counters, 0, - sizeof(performance_counters.tiler_counters)); - - memset(performance_counters.l2_counters, 0, - sizeof(performance_counters.l2_counters)); - + lockdep_assert_held(&performance_counters.access_lock); +#if !MALI_USE_CSF + memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters)); +#else + memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters)); +#endif /* !MALI_USE_CSF */ + memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters)); + memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters)); memset(performance_counters.shader_counters, 0, sizeof(performance_counters.shader_counters)); } + +#if MALI_USE_CSF +void gpu_model_clear_prfcnt_values(void) +{ + unsigned long flags; + + spin_lock_irqsave(&performance_counters.access_lock, flags); + gpu_model_clear_prfcnt_values_nolock(); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); -#endif +#endif /* MALI_USE_CSF */ /** * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer @@ -545,17 +564,20 @@ KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); * @block_count: Number of blocks to dump * @prfcnt_enable_mask: Counter enable mask * @blocks_present: Available blocks bit mask + * + * The performance counter access lock must be held before calling this + * function. */ -static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, - u32 block_count, - u32 prfcnt_enable_mask, - u64 blocks_present) +static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count, + u32 prfcnt_enable_mask, u64 blocks_present) { u32 block_idx, counter; u32 counter_value = 0; u32 *prfcnt_base; u32 index = 0; + lockdep_assert_held(&performance_counters.access_lock); + prfcnt_base = performance_counters.prfcnt_base_cpu; for (block_idx = 0; block_idx < block_count; block_idx++) { @@ -594,35 +616,18 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, } } -/** - * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values - * - * Used to ensure counter values are not lost if cache invalidation is performed - * prior to reading. - */ -static void gpu_model_sync_dummy_prfcnt(void) -{ - int i; - struct page *pg; - - for (i = 0; i < performance_counters.page_count; i++) { - pg = as_page(performance_counters.pages[i]); - kbase_sync_single_for_device(performance_counters.kbdev, - kbase_dma_addr(pg), PAGE_SIZE, - DMA_BIDIRECTIONAL); - } -} - -static void midgard_model_dump_prfcnt(void) +static void gpu_model_dump_nolock(void) { u32 index = 0; + lockdep_assert_held(&performance_counters.access_lock); + #if !MALI_USE_CSF - gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, - 1, 0xffffffff, 0x1); + gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1, + performance_counters.prfcnt_en.fe, 0x1); #else - gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, - 1, 0xffffffff, 0x1); + gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, + performance_counters.prfcnt_en.fe, 0x1); #endif /* !MALI_USE_CSF */ gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, &index, 1, @@ -637,12 +642,48 @@ static void midgard_model_dump_prfcnt(void) performance_counters.prfcnt_en.shader, performance_counters.shader_present); - gpu_model_sync_dummy_prfcnt(); + /* Counter values are cleared after each dump */ + gpu_model_clear_prfcnt_values_nolock(); /* simulate a 'long' time between samples */ performance_counters.time += 10; } +#if !MALI_USE_CSF +static void midgard_model_dump_prfcnt(void) +{ + unsigned long flags; + + spin_lock_irqsave(&performance_counters.access_lock, flags); + gpu_model_dump_nolock(); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} +#else +void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps) +{ + unsigned long flags; + + if (WARN_ON(!sample_buf)) + return; + + spin_lock_irqsave(&performance_counters.access_lock, flags); + performance_counters.prfcnt_base_cpu = sample_buf; + performance_counters.prfcnt_en = enable_maps; + gpu_model_dump_nolock(); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} + +void gpu_model_glb_request_job_irq(void *model) +{ + unsigned long flags; + + spin_lock_irqsave(&hw_error_status.access_lock, flags); + hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); + gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); +} +#endif /* !MALI_USE_CSF */ + static void init_register_statuses(struct dummy_model_t *dummy) { int i; @@ -673,6 +714,8 @@ static void init_register_statuses(struct dummy_model_t *dummy) static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) { + lockdep_assert_held(&hw_error_status.access_lock); + if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { if (job_slot == hw_error_status.current_job_slot) { #if !MALI_USE_CSF @@ -922,6 +965,7 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) { int i; + lockdep_assert_held(&hw_error_status.access_lock); pr_debug("%s", "Updating the JS_ACTIVE register"); for (i = 0; i < NUM_SLOTS; i++) { @@ -990,6 +1034,9 @@ void *midgard_model_create(const void *config) { struct dummy_model_t *dummy = NULL; + spin_lock_init(&hw_error_status.access_lock); + spin_lock_init(&performance_counters.access_lock); + dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); if (dummy) { @@ -1009,14 +1056,18 @@ static void midgard_model_get_outputs(void *h) { struct dummy_model_t *dummy = (struct dummy_model_t *)h; + lockdep_assert_held(&hw_error_status.access_lock); + if (hw_error_status.job_irq_status) gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); if ((dummy->power_changed && dummy->power_changed_mask) || (dummy->reset_completed & dummy->reset_completed_mask) || hw_error_status.gpu_error_irq || - (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) || - dummy->prfcnt_sample_completed) +#if !MALI_USE_CSF + dummy->prfcnt_sample_completed || +#endif + (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) @@ -1028,6 +1079,8 @@ static void midgard_model_update(void *h) struct dummy_model_t *dummy = (struct dummy_model_t *)h; int i; + lockdep_assert_held(&hw_error_status.access_lock); + for (i = 0; i < NUM_SLOTS; i++) { if (!dummy->slots[i].job_active) continue; @@ -1074,6 +1127,8 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) { int i; + lockdep_assert_held(&hw_error_status.access_lock); + for (i = 0; i < NUM_SLOTS; i++) { if (dummy->slots[i].job_active) { hw_error_status.job_irq_rawstat |= (1 << (16 + i)); @@ -1085,7 +1140,11 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy) u8 midgard_model_write_reg(void *h, u32 addr, u32 value) { + unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; + + spin_lock_irqsave(&hw_error_status.access_lock, flags); + #if !MALI_USE_CSF if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { @@ -1188,9 +1247,10 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) if (value & (1 << 17)) dummy->clean_caches_completed = false; - if (value & (1 << 16)) +#if !MALI_USE_CSF + if (value & PRFCNT_SAMPLE_COMPLETED) dummy->prfcnt_sample_completed = 0; - +#endif /* !MALI_USE_CSF */ /*update error status */ hw_error_status.gpu_error_irq &= ~(value); } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { @@ -1214,9 +1274,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) pr_debug("clean caches requested"); dummy->clean_caches_completed = true; break; +#if !MALI_USE_CSF case GPU_COMMAND_PRFCNT_SAMPLE: midgard_model_dump_prfcnt(); dummy->prfcnt_sample_completed = 1; +#endif /* !MALI_USE_CSF */ default: break; } @@ -1346,20 +1408,24 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) mem_addr_space, addr, value); break; } - } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) && - addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) { + } else { switch (addr) { +#if !MALI_USE_CSF case PRFCNT_BASE_LO: - performance_counters.prfcnt_base |= value; + performance_counters.prfcnt_base = + HI_MASK(performance_counters.prfcnt_base) | value; + performance_counters.prfcnt_base_cpu = + (u32 *)(uintptr_t)performance_counters.prfcnt_base; break; case PRFCNT_BASE_HI: - performance_counters.prfcnt_base |= ((u64) value) << 32; + performance_counters.prfcnt_base = + LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32); + performance_counters.prfcnt_base_cpu = + (u32 *)(uintptr_t)performance_counters.prfcnt_base; break; -#if !MALI_USE_CSF case PRFCNT_JM_EN: - performance_counters.prfcnt_en.jm = value; + performance_counters.prfcnt_en.fe = value; break; -#endif /* !MALI_USE_CSF */ case PRFCNT_SHADER_EN: performance_counters.prfcnt_en.shader = value; break; @@ -1369,9 +1435,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) case PRFCNT_MMU_L2_EN: performance_counters.prfcnt_en.l2 = value; break; - } - } else { - switch (addr) { +#endif /* !MALI_USE_CSF */ case TILER_PWRON_LO: dummy->power_on |= (value & 1) << 1; /* Also ensure L2 is powered on */ @@ -1416,6 +1480,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) case PWR_OVERRIDE0: #if !MALI_USE_CSF case JM_CONFIG: + case PRFCNT_CONFIG: #else /* !MALI_USE_CSF */ case CSF_CONFIG: #endif /* !MALI_USE_CSF */ @@ -1434,13 +1499,18 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) midgard_model_update(dummy); midgard_model_get_outputs(dummy); + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); return 1; } u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) { + unsigned long flags; struct dummy_model_t *dummy = (struct dummy_model_t *)h; + + spin_lock_irqsave(&hw_error_status.access_lock, flags); + *value = 0; /* 0 by default */ #if !MALI_USE_CSF if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { @@ -1475,24 +1545,31 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) #endif /* !MALI_USE_CSF */ else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | - (dummy->power_changed_mask << 9) | (1 << 7) | 1; + ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | + (dummy->power_changed_mask << 9) | (1 << 7) | 1; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | (dummy->reset_completed << 8) | +#if !MALI_USE_CSF + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | +#endif /* !MALI_USE_CSF */ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | - (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; + hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | +#if !MALI_USE_CSF + (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | +#endif /* !MALI_USE_CSF */ (((dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ? 1u : 0u) << 17) | - (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; + hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { *value = 0; @@ -1827,6 +1904,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, is_low_word); + } else if (addr == USER_REG(LATEST_FLUSH)) { + *value = 0; } #endif else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { @@ -1840,18 +1919,20 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = 0; } + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); CSTD_UNUSED(dummy); return 1; } -static u32 set_user_sample_core_type(u64 *counters, - u32 *usr_data_start, u32 usr_data_offset, - u32 usr_data_size, u32 core_count) +static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) { u32 sample_size; u32 *usr_data = NULL; + lockdep_assert_held(&performance_counters.access_lock); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); @@ -1866,11 +1947,7 @@ static u32 set_user_sample_core_type(u64 *counters, u32 i; for (i = 0; i < loop_cnt; i++) { - if (copy_from_user(&counters[i], &usr_data[i], - sizeof(u32))) { - model_error_log(KBASE_CORE, "Unable to set counter sample 2"); - break; - } + counters[i] = usr_data[i]; } } @@ -1884,6 +1961,8 @@ static u32 set_kernel_sample_core_type(u64 *counters, u32 sample_size; u64 *usr_data = NULL; + lockdep_assert_held(&performance_counters.access_lock); + sample_size = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); @@ -1900,49 +1979,70 @@ static u32 set_kernel_sample_core_type(u64 *counters, } /* Counter values injected through ioctl are of 32 bits */ -void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size) +int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size) { + unsigned long flags; + u32 *user_data; u32 offset = 0; + if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32)) + return -EINVAL; + + /* copy_from_user might sleep so can't be called from inside a spinlock + * allocate a temporary buffer for user data and copy to that before taking + * the lock + */ + user_data = kmalloc(size, GFP_KERNEL); + if (!user_data) + return -ENOMEM; + + if (copy_from_user(user_data, data, size)) { + model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace"); + kfree(user_data); + return -EINVAL; + } + + spin_lock_irqsave(&performance_counters.access_lock, flags); #if !MALI_USE_CSF - offset = set_user_sample_core_type(performance_counters.jm_counters, - usr_data, offset, usr_data_size, 1); + offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset, + size, 1); #else - offset = set_user_sample_core_type(performance_counters.cshw_counters, - usr_data, offset, usr_data_size, 1); + offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset, + size, 1); #endif /* !MALI_USE_CSF */ - offset = set_user_sample_core_type(performance_counters.tiler_counters, - usr_data, offset, usr_data_size, - hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); - offset = set_user_sample_core_type(performance_counters.l2_counters, - usr_data, offset, usr_data_size, - KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); - offset = set_user_sample_core_type(performance_counters.shader_counters, - usr_data, offset, usr_data_size, - KBASE_DUMMY_MODEL_MAX_SHADER_CORES); + offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset, + size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset, + size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); + offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset, + size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); + + kfree(user_data); + return 0; } /* Counter values injected through kutf are of 64 bits */ -void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size) +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) { + unsigned long flags; u32 offset = 0; + spin_lock_irqsave(&performance_counters.access_lock, flags); #if !MALI_USE_CSF - offset = set_kernel_sample_core_type(performance_counters.jm_counters, - usr_data, offset, usr_data_size, 1); + offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size, + 1); #else - offset = set_kernel_sample_core_type(performance_counters.cshw_counters, - usr_data, offset, usr_data_size, 1); + offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size, + 1); #endif /* !MALI_USE_CSF */ - offset = set_kernel_sample_core_type(performance_counters.tiler_counters, - usr_data, offset, usr_data_size, - hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); - offset = set_kernel_sample_core_type(performance_counters.l2_counters, - usr_data, offset, usr_data_size, - hweight64(performance_counters.l2_present)); - offset = set_kernel_sample_core_type(performance_counters.shader_counters, - usr_data, offset, usr_data_size, - hweight64(performance_counters.shader_present)); + offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset, + size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size, + hweight64(performance_counters.l2_present)); + offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset, + size, hweight64(performance_counters.shader_present)); + spin_unlock_irqrestore(&performance_counters.access_lock, flags); } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); @@ -1977,21 +2077,12 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); -void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, - struct tagged_addr *pages, - size_t page_count) -{ - performance_counters.prfcnt_base_cpu = base; - performance_counters.kbdev = kbdev; - performance_counters.pages = pages; - performance_counters.page_count = page_count; -} - int gpu_model_control(void *model, struct kbase_model_control_params *params) { struct dummy_model_t *dummy = (struct dummy_model_t *)model; int i; + unsigned long flags; if (params->command == KBASE_MC_DISABLE_JOBS) { for (i = 0; i < NUM_SLOTS; i++) @@ -2000,8 +2091,10 @@ int gpu_model_control(void *model, return -EINVAL; } + spin_lock_irqsave(&hw_error_status.access_lock, flags); midgard_model_update(dummy); midgard_model_get_outputs(dummy); + spin_unlock_irqrestore(&hw_error_status.access_lock, flags); return 0; } diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h index 87690f4..8eaf1b0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h @@ -116,6 +116,8 @@ struct kbase_error_atom { /*struct to track the system error state*/ struct error_status_t { + spinlock_t access_lock; + u32 errors_mask; u32 mmu_table_level; int faulty_mmu_as; @@ -138,6 +140,20 @@ struct error_status_t { u64 as_transtab[NUM_MMU_AS]; }; +/** + * struct gpu_model_prfcnt_en - Performance counter enable masks + * @fe: Enable mask for front-end block + * @tiler: Enable mask for tiler block + * @l2: Enable mask for L2/Memory system blocks + * @shader: Enable mask for shader core blocks + */ +struct gpu_model_prfcnt_en { + u32 fe; + u32 tiler; + u32 l2; + u32 shader; +}; + void *midgard_model_create(const void *config); void midgard_model_destroy(void *h); u8 midgard_model_write_reg(void *h, u32 addr, u32 value); @@ -148,18 +164,53 @@ int job_atom_inject_error(struct kbase_error_params *params); int gpu_model_control(void *h, struct kbase_model_control_params *params); -void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size); -void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size); +/** + * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values + * @data: Userspace pointer to array of counter values + * @size: Size of counter value array + * + * Counter values set by this function will be used for one sample dump only + * after which counters will be cleared back to zero. + * + * Return: 0 on success, else error code. + */ +int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); + +/** + * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values + * @data: Pointer to array of counter values + * @size: Size of counter value array + * + * Counter values set by this function will be used for one sample dump only + * after which counters will be cleared back to zero. + */ +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); + void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 *l2_present, u64 *shader_present); void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, u64 l2_present, u64 shader_present); -void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, - struct tagged_addr *pages, - size_t page_count); + /* Clear the counter values array maintained by the dummy model */ void gpu_model_clear_prfcnt_values(void); +#if MALI_USE_CSF +/** + * gpu_model_prfcnt_dump_request() - Request performance counter sample dump. + * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array + * in which to store dumped performance counter values. + * @enable_maps: Physical enable maps for performance counter blocks. + */ +void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps); + +/** + * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request + * flag set. + * @model: Model pointer returned by midgard_model_create(). + */ +void gpu_model_glb_request_job_irq(void *model); +#endif /* MALI_USE_CSF */ + enum gpu_dummy_irq { GPU_DUMMY_JOB_IRQ, GPU_DUMMY_GPU_IRQ, diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 3d92251..fcf98b0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -437,8 +437,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) return; /* Stop the metrics gathering framework */ - if (kbase_pm_metrics_is_active(kbdev)) - kbase_pm_metrics_stop(kbdev); + kbase_pm_metrics_stop(kbdev); /* Keep the current freq to restore it upon resume */ kbdev->previous_frequency = clk_get_rate(clk); @@ -880,7 +879,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); #if !MALI_USE_CSF - kbase_backend_slot_update(kbdev); + kbase_backend_slot_update(kbdev); #endif /* !MALI_USE_CSF */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -990,7 +989,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; - ktime_t end_timestamp = ktime_get(); + ktime_t end_timestamp = ktime_get_raw(); struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; if (!kbdev->arb.arb_if) @@ -1065,6 +1064,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); lockdep_assert_held(&kbdev->pm.lock); +#ifdef CONFIG_MALI_DEBUG /* In case of no active CSG on slot, powering up L2 could be skipped and * proceed directly to suspend GPU. * ToDo: firmware has to be reloaded after wake-up as no halt command @@ -1074,6 +1074,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) dev_info( kbdev->dev, "No active CSGs. Can skip the power up of L2 and go for suspension directly"); +#endif ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); if (ret) { diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index 7d14be9..a4d7168 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -92,29 +92,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) * for those cores to get powered down */ if ((core_mask & old_core_mask) != old_core_mask) { - bool can_wait; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* This check is ideally not required, the wait function can - * deal with the GPU power down. But it has been added to - * address the scenario where down-scaling request comes from - * the platform specific code soon after the GPU power down - * and at the time same time application thread tries to - * power up the GPU (on the flush of GPU queue). - * The platform specific @ref callback_power_on that gets - * invoked on power up does not return until down-scaling - * request is complete. The check mitigates the race caused by - * the problem in platform specific code. - */ - if (likely(can_wait)) { - if (kbase_pm_wait_for_desired_state(kbdev)) { - dev_warn(kbdev->dev, - "Wait for update of core_mask from %llx to %llx failed", - old_core_mask, core_mask); - } + if (kbase_pm_wait_for_cores_down_scale(kbdev)) { + dev_warn(kbdev->dev, + "Wait for update of core_mask from %llx to %llx failed", + old_core_mask, core_mask); } } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index a249b1e..66ca0b6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -136,7 +136,7 @@ struct kbasep_pm_metrics { * or removed from a GPU slot. * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. - * @lock: spinlock protecting the kbasep_pm_metrics_data structure + * @lock: spinlock protecting the kbasep_pm_metrics_state structure * @platform_data: pointer to data controlled by platform specific code * @kbdev: pointer to kbase device for which metrics are collected * @values: The current values of the power management metrics. The @@ -145,7 +145,7 @@ struct kbasep_pm_metrics { * @initialized: tracks whether metrics_state has been initialized or not. * @timer: timer to regularly make DVFS decisions based on the power * management metrics. - * @timer_active: boolean indicating @timer is running + * @timer_state: atomic indicating current @timer state, on, off, or stopped. * @dvfs_last: values of the PM metrics from the last DVFS tick * @dvfs_diff: different between the current and previous PM metrics. */ @@ -169,7 +169,7 @@ struct kbasep_pm_metrics_state { #ifdef CONFIG_MALI_MIDGARD_DVFS bool initialized; struct hrtimer timer; - bool timer_active; + atomic_t timer_state; struct kbasep_pm_metrics dvfs_last; struct kbasep_pm_metrics dvfs_diff; #endif @@ -572,7 +572,7 @@ struct kbase_pm_backend_data { }; #if MALI_USE_CSF -/* CSF PM flag, signaling that the MCU CORE should be kept on */ +/* CSF PM flag, signaling that the MCU shader Core should be kept on */ #define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) /* CSF PM flag, signaling no scheduler suspension on idle groups */ #define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 52e228c..aab07c9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -656,6 +656,38 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); } + +/** + * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured + * + * @kbdev: Pointer to the device. + * + * This function is called to wait for the AS used by MCU FW to get configured + * before DB notification on MCU is enabled, as a workaround for HW issue. + */ +static void wait_mcu_as_inactive(struct kbase_device *kbdev) +{ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) + return; + + /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ + while (--max_loops && + kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & + AS_STATUS_AS_ACTIVE_INT) + ; + + if (!WARN_ON_ONCE(max_loops == 0)) + return; + + dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR); + + if (kbase_prepare_to_reset_gpu(kbdev, 0)) + kbase_reset_gpu(kbdev); +} #endif /** @@ -665,10 +697,10 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) * @kbdev: Pointer to the device * @enable: boolean indicating to enable interrupts or not * - * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled - * after L2 has been turned on when FW is controlling the power for the shader - * cores. Correspondingly, the interrupts can be re-enabled after the MCU has - * been disabled before the power down of L2. + * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on + * when FW is controlling the power for the shader cores. Correspondingly, the + * interrupts can be re-enabled after the MCU has been disabled before the + * power down of L2. */ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable) { @@ -679,15 +711,15 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS - (void)enable; /* For IFPO, we require the POWER_CHANGED_ALL interrupt to be always on */ - irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; -#else - if (enable) - irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; - else - irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE); -#endif /* CONFIG_MALI_HOST_CONTROLS_SC_RAILS */ + enable = true; +#endif + if (enable) { + irq_mask |= POWER_CHANGED_ALL; + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); + } else { + irq_mask &= ~POWER_CHANGED_ALL; + } kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); } @@ -921,7 +953,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_HALT: if (kbase_csf_firmware_mcu_halted(kbdev)) { - KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL, + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); if (kbdev->csf.firmware_hctl_core_pwr) backend->mcu_state = @@ -968,7 +1000,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_SLEEP: if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { - KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL, + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); backend->mcu_state = KBASE_MCU_IN_SLEEP; kbase_pm_enable_db_mirror_interrupt(kbdev); @@ -984,6 +1016,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_IN_SLEEP: if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { + wait_mcu_as_inactive(kbdev); KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_enable_mcu_db_notification(kbdev); @@ -994,6 +1027,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (!kbdev->csf.firmware_hctl_core_pwr) kbasep_pm_toggle_power_interrupt(kbdev, false); backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } break; #endif @@ -1120,13 +1154,24 @@ static bool can_power_down_l2(struct kbase_device *kbdev) #endif } +static bool need_tiler_control(struct kbase_device *kbdev) +{ +#if MALI_USE_CSF + if (kbase_pm_no_mcu_core_pwroff(kbdev)) + return true; + else + return false; +#else + return true; +#endif +} + static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; u64 l2_present = kbdev->gpu_props.curr_config.l2_present; -#if !MALI_USE_CSF u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; -#endif + bool l2_power_up_done; enum kbase_l2_core_state prev_state; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1137,24 +1182,18 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); - -#if !MALI_USE_CSF - u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER); -#endif +#ifdef CONFIG_MALI_ARBITER_SUPPORT + u64 tiler_trans = kbase_pm_get_trans_cores( + kbdev, KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_TILER); /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ if (kbase_is_gpu_removed(kbdev) -#ifdef CONFIG_MALI_ARBITER_SUPPORT || kbase_pm_is_gpu_lost(kbdev)) { -#else - ) { -#endif backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; @@ -1177,32 +1216,45 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) } break; } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* mask off ready from trans in case transitions finished * between the register reads */ l2_trans &= ~l2_ready; -#if !MALI_USE_CSF - tiler_trans &= ~tiler_ready; -#endif + prev_state = backend->l2_state; switch (backend->l2_state) { case KBASE_L2_OFF: if (kbase_pm_is_l2_desired(kbdev)) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Enable HW timer of IPA control before + * L2 cache is powered-up. + */ + kbase_ipa_control_handle_gpu_sleep_exit(kbdev); +#endif /* * Set the desired config for L2 before * powering it on */ kbase_pm_l2_config_override(kbdev); kbase_pbha_write_settings(kbdev); -#if !MALI_USE_CSF - /* L2 is required, power on. Powering on the - * tiler will also power the first L2 cache. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, - tiler_present, ACTION_PWRON); + /* If Host is controlling the power for shader + * cores, then it also needs to control the + * power for Tiler. + * Powering on the tiler will also power the + * L2 cache. + */ + if (need_tiler_control(kbdev)) { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, + ACTION_PWRON); + } else { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, + ACTION_PWRON); + } +#if !MALI_USE_CSF /* If we have more than one L2 cache then we * must power them on explicitly. */ @@ -1212,30 +1264,36 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) ACTION_PWRON); /* Clear backend slot submission kctx */ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); -#else - /* With CSF firmware, Host driver doesn't need to - * handle power management with both shader and tiler cores. - * The CSF firmware will power up the cores appropriately. - * So only power the l2 cache explicitly. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present, ACTION_PWRON); #endif backend->l2_state = KBASE_L2_PEND_ON; } break; case KBASE_L2_PEND_ON: -#if !MALI_USE_CSF - if (!l2_trans && l2_ready == l2_present && !tiler_trans - && tiler_ready == tiler_present) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, - tiler_ready); -#else + l2_power_up_done = false; if (!l2_trans && l2_ready == l2_present) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, - l2_ready); -#endif + if (need_tiler_control(kbdev)) { +#ifndef CONFIG_MALI_ARBITER_SUPPORT + u64 tiler_trans = kbase_pm_get_trans_cores( + kbdev, KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_TILER); +#endif + + tiler_trans &= ~tiler_ready; + if (!tiler_trans && tiler_ready == tiler_present) { + KBASE_KTRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, tiler_ready); + l2_power_up_done = true; + } + } else { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, + l2_ready); + l2_power_up_done = true; + } + } + if (l2_power_up_done) { /* * Ensure snoops are enabled after L2 is powered * up. Note that kbase keeps track of the snoop @@ -1431,12 +1489,26 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) /* We only need to check the L2 here - if the L2 * is off then the tiler is definitely also off. */ - if (!l2_trans && !l2_ready) + if (!l2_trans && !l2_ready) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); +#endif /* L2 is now powered off */ backend->l2_state = KBASE_L2_OFF; + } } else { - if (!kbdev->cache_clean_in_progress) + if (!kbdev->cache_clean_in_progress) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); +#endif backend->l2_state = KBASE_L2_OFF; + } } break; @@ -2293,12 +2365,14 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) /* Wait for cores */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev), + timeout); #else remaining = wait_event_timeout( -#endif kbdev->pm.backend.gpu_in_desired_state_wait, kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); +#endif if (!remaining) { kbase_pm_timed_out(kbdev); @@ -2353,6 +2427,66 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); +#if MALI_USE_CSF +/** + * core_mask_update_done - Check if downscaling of shader cores is done + * + * @kbdev: The kbase device structure for the device. + * + * This function checks if the downscaling of cores is effectively complete. + * + * Return: true if the downscale is done. + */ +static bool core_mask_update_done(struct kbase_device *kbdev) +{ + bool update_done = false; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* If MCU is in stable ON state then it implies that the downscale + * request had completed. + * If MCU is not active then it implies all cores are off, so can + * consider the downscale request as complete. + */ + if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) || + kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) + update_done = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return update_done; +} + +int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) +{ + long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); + long remaining; + int err = 0; + + /* Wait for core mask update to complete */ +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + remaining = wait_event_killable_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); +#else + remaining = wait_event_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); +#endif + + if (!remaining) { + kbase_pm_timed_out(kbdev); + err = -ETIMEDOUT; + } else if (remaining < 0) { + dev_info( + kbdev->dev, + "Wait for cores down scaling got interrupted"); + err = (int)remaining; + } + + return err; +} +#endif + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2416,14 +2550,21 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->pm.lock); mutex_lock(&kbdev->csf.reg_lock); - if (kbdev->csf.mali_file_inode) { - /* This would zap the pte corresponding to the mapping of User - * register page for all the Kbase contexts. - */ - unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, - BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, - PAGE_SIZE, 1); + + /* Only if the mappings for USER page exist, update all PTEs associated to it */ + if (kbdev->csf.nr_user_page_mapped > 0) { + if (likely(kbdev->csf.mali_file_inode)) { + /* This would zap the pte corresponding to the mapping of User + * register page for all the Kbase contexts. + */ + unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, + BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1); + } else { + dev_err(kbdev->dev, + "Device file inode not exist even if USER page previously mapped"); + } } + mutex_unlock(&kbdev->csf.reg_lock); } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index 68ded7d..cd5a6a3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -269,6 +269,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); */ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); +#if MALI_USE_CSF +/** + * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function can be called to ensure that the downscaling of cores is + * effectively complete and it would be safe to lower the voltage. + * The function assumes that caller had exercised the MCU state machine for the + * downscale request through the kbase_pm_update_state() function. + * + * This function needs to be used by the caller to safely wait for the completion + * of downscale request, instead of kbase_pm_wait_for_desired_state(). + * The downscale request would trigger a state change in MCU state machine + * and so when MCU reaches the stable ON state, it can be inferred that + * downscaling is complete. But it has been observed that the wake up of the + * waiting thread can get delayed by few milli seconds and by the time the + * thread wakes up the power down transition could have started (after the + * completion of downscale request). + * On the completion of power down transition another wake up signal would be + * sent, but again by the time thread wakes up the power up transition can begin. + * And the power up transition could then get blocked inside the platform specific + * callback_power_on() function due to the thread that called into Kbase (from the + * platform specific code) to perform the downscaling and then ended up waiting + * for the completion of downscale request. + * + * Return: 0 on success, error code on error or remaining jiffies on timeout. + */ +int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev); +#endif + /** * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state * machines after changing shader core @@ -800,7 +831,7 @@ bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) /** * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the - * MCU core powered in accordance to the active + * MCU shader Core powered in accordance to the active * power management policy * * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c index f85b466..2df6804 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,7 @@ */ #include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> @@ -48,27 +49,51 @@ #define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) #endif +/* + * Possible state transitions + * ON -> ON | OFF | STOPPED + * STOPPED -> ON | OFF + * OFF -> ON + * + * + * ┌─e─┐┌────────────f─────────────┐ + * │ v│ v + * └───ON ──a──> STOPPED ──b──> OFF + * ^^ │ │ + * │└──────c─────┘ │ + * │ │ + * └─────────────d─────────────┘ + * + * Transition effects: + * a. None + * b. Timer expires without restart + * c. Timer is not stopped, timer period is unaffected + * d. Timer must be restarted + * e. Callback is executed and the timer is restarted + * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during + * tear-down and should not be subject to a race from an OFF->ON transition + */ +enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON }; + #ifdef CONFIG_MALI_MIDGARD_DVFS static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) { - unsigned long flags; struct kbasep_pm_metrics_state *metrics; - KBASE_DEBUG_ASSERT(timer != NULL); + if (WARN_ON(!timer)) + return HRTIMER_NORESTART; metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); - kbase_pm_get_dvfs_action(metrics->kbdev); - spin_lock_irqsave(&metrics->lock, flags); + /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */ + if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON) + return HRTIMER_NORESTART; - if (metrics->timer_active) - hrtimer_start(timer, - HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); - - spin_unlock_irqrestore(&metrics->lock, flags); + kbase_pm_get_dvfs_action(metrics->kbdev); - return HRTIMER_NORESTART; + /* Set the new expiration time and restart (transition e) */ + hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period)); + return HRTIMER_RESTART; } #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -83,7 +108,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); kbdev->pm.backend.metrics.values.time_busy = 0; kbdev->pm.backend.metrics.values.time_idle = 0; kbdev->pm.backend.metrics.values.time_in_protm = 0; @@ -111,7 +136,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) #else KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); kbdev->pm.backend.metrics.gpu_active = false; kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; @@ -134,6 +159,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; kbdev->pm.backend.metrics.initialized = true; + atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); kbase_pm_metrics_start(kbdev); #endif /* CONFIG_MALI_MIDGARD_DVFS */ @@ -152,16 +178,12 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); void kbasep_pm_metrics_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_MIDGARD_DVFS - unsigned long flags; - KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbdev->pm.backend.metrics.timer_active = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + /* Cancel the timer, and block if the callback is currently executing (transition f) */ kbdev->pm.backend.metrics.initialized = false; + atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if MALI_USE_CSF @@ -199,7 +221,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) * elapsed time. The lock taken inside kbase_ipa_control_query() * function can cause lot of variation. */ - now = ktime_get(); + now = ktime_get_raw(); if (err) { dev_err(kbdev->dev, @@ -231,12 +253,14 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) * time. */ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { - /* Use a margin value that is approximately 1% of the time - * difference. + /* The margin is scaled to allow for the worst-case + * scenario where the samples are maximally separated, + * plus a small offset for sampling errors. */ - u64 margin_ns = diff_ns >> 6; + u64 const MARGIN_NS = + IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2; - if (gpu_active_counter > (diff_ns + margin_ns)) { + if (gpu_active_counter > (diff_ns + MARGIN_NS)) { dev_info( kbdev->dev, "GPU activity takes longer than time interval: %llu ns > %llu ns", @@ -331,7 +355,7 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, #if MALI_USE_CSF kbase_pm_get_dvfs_utilisation_calc(kbdev); #else - kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); + kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw()); #endif memset(diff, 0, sizeof(*diff)); @@ -396,57 +420,33 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) { - bool isactive; - unsigned long flags; - KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - isactive = kbdev->pm.backend.metrics.timer_active; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - return isactive; + return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON; } KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); void kbase_pm_metrics_start(struct kbase_device *kbdev) { - unsigned long flags; - bool update = true; + struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics; - if (unlikely(!kbdev->pm.backend.metrics.initialized)) + if (unlikely(!metrics->initialized)) return; - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - if (!kbdev->pm.backend.metrics.timer_active) - kbdev->pm.backend.metrics.timer_active = true; - else - update = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - if (update) - hrtimer_start(&kbdev->pm.backend.metrics.timer, - HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); + /* Transition to ON, from a stopped state (transition c) */ + if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF) + /* Start the timer only if it's been fully stopped (transition d)*/ + hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); } void kbase_pm_metrics_stop(struct kbase_device *kbdev) { - unsigned long flags; - bool update = true; - if (unlikely(!kbdev->pm.backend.metrics.initialized)) return; - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - if (kbdev->pm.backend.metrics.timer_active) - kbdev->pm.backend.metrics.timer_active = false; - else - update = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - if (update) - hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + /* Timer is Stopped if its currently on (transition a) */ + atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); } @@ -512,7 +512,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); if (!timestamp) { - now = ktime_get(); + now = ktime_get_raw(); timestamp = &now; } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 5f16434..deeb1b5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -310,7 +310,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.backend.policy_change_lock); if (kbase_reset_gpu_prevent_and_wait(kbdev)) { - dev_warn(kbdev->dev, "Set PM policy failed to prevent gpu reset"); + dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset"); reset_op_prevented = false; } @@ -332,7 +332,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON * flag bit. */ - sched_suspend = kbdev->csf.firmware_inited && reset_op_prevented && + sched_suspend = reset_op_prevented && (CSF_DYNAMIC_PM_CORE_KEEP_ON & (new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags)); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index a83206a..5110e3d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,6 +21,9 @@ #include <mali_kbase.h> #include <mali_kbase_hwaccess_time.h> +#if MALI_USE_CSF +#include <csf/mali_kbase_csf_timeout.h> +#endif #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> @@ -113,13 +116,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, */ u64 timeout, nr_cycles = 0; - /* Default value to mean 'no cap' */ - u64 timeout_cap = U64_MAX; - u64 freq_khz = kbdev->lowest_gpu_freq_khz; + u64 freq_khz; + /* Only for debug messages, safe default in case it's mis-maintained */ const char *selector_str = "(unknown)"; - WARN_ON(!freq_khz); + if (WARN(!kbdev->lowest_gpu_freq_khz, + "Lowest frequency uninitialized! Using reference frequency for scaling")) { + freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; + } else { + freq_khz = kbdev->lowest_gpu_freq_khz; + } switch (selector) { case KBASE_TIMEOUT_SELECTOR_COUNT: @@ -135,16 +142,15 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, fallthrough; case CSF_FIRMWARE_TIMEOUT: selector_str = "CSF_FIRMWARE_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES; - /* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS, - * if calculated timeout exceeds it. This should be adapted to - * a direct timeout comparison once the - * FIRMWARE_PING_INTERVAL_MS option is added to this timeout - * function. A compile-time check such as BUILD_BUG_ON can also - * be done once the firmware ping interval in cycles becomes - * available as a macro. + /* Any FW timeout cannot be longer than the FW ping interval, after which + * the firmware_aliveness_monitor will be triggered and may restart + * the GPU if the FW is unresponsive. */ - timeout_cap = FIRMWARE_PING_INTERVAL_MS; + nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); + + if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) + dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", + selector_str); break; case CSF_PM_TIMEOUT: selector_str = "CSF_PM_TIMEOUT"; @@ -154,21 +160,33 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, selector_str = "CSF_GPU_RESET_TIMEOUT"; nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; break; + case CSF_CSG_SUSPEND_TIMEOUT: + selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; + nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; + break; + case CSF_FIRMWARE_BOOT_TIMEOUT: + selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; + nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; + break; + case CSF_FIRMWARE_PING_TIMEOUT: + selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; + nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; + break; + case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: + selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; + nr_cycles = kbase_csf_timeout_get(kbdev); + break; #endif } timeout = div_u64(nr_cycles, freq_khz); - if (timeout > timeout_cap) { - dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str, - (unsigned long long)timeout, (unsigned long long)timeout_cap); - timeout = timeout_cap; - } if (WARN(timeout > UINT_MAX, "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) timeout = UINT_MAX; return (unsigned int)timeout; } +KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) { diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index 04768fe..96aa329 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -139,6 +139,12 @@ bob_defaults { mali_host_controls_sc_rails: { kbuild_options: ["CONFIG_MALI_HOST_CONTROLS_SC_RAILS=y"], }, + platform_is_fpga: { + kbuild_options: ["CONFIG_MALI_IS_FPGA=y"], + }, + mali_fw_core_dump: { + kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"], + }, kbuild_options: [ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", "MALI_CUSTOMER_RELEASE={{.release}}", @@ -159,7 +165,7 @@ bob_defaults { // is an umbrella feature that would be open for inappropriate use // (catch-all for experimental CS code without separating it into // different features). - "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}", + "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}", "MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}", "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}", "MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}", diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c index 34504f7..201349c 100644 --- a/mali_kbase/context/backend/mali_kbase_context_csf.c +++ b/mali_kbase/context/backend/mali_kbase_context_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,11 +39,13 @@ #include <csf/mali_kbase_csf_tiler_heap_debugfs.h> #include <csf/mali_kbase_csf_cpu_queue_debugfs.h> #include <mali_kbase_debug_mem_view.h> +#include <mali_kbase_debug_mem_zones.h> #include <mali_kbase_mem_pool_debugfs.h> void kbase_context_debugfs_init(struct kbase_context *const kctx) { kbase_debug_mem_view_init(kctx); + kbase_debug_mem_zones_init(kctx); kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); kbase_jit_debugfs_init(kctx); kbase_csf_queue_group_debugfs_init(kctx); diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c index 74402ec..4091fb7 100644 --- a/mali_kbase/context/backend/mali_kbase_context_jm.c +++ b/mali_kbase/context/backend/mali_kbase_context_jm.c @@ -36,11 +36,13 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) #include <mali_kbase_debug_mem_view.h> +#include <mali_kbase_debug_mem_zones.h> #include <mali_kbase_mem_pool_debugfs.h> void kbase_context_debugfs_init(struct kbase_context *const kctx) { kbase_debug_mem_view_init(kctx); + kbase_debug_mem_zones_init(kctx); kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); kbase_jit_debugfs_init(kctx); kbasep_jd_debugfs_ctx_init(kctx); diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index c7d7585..95bd641 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -286,7 +286,9 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) /* Add checks, so that the terminating process Should not * hold any gpu_memory. */ + spin_lock(&kctx->kbdev->gpu_mem_usage_lock); WARN_ON(kprcs->total_gpu_pages); + spin_unlock(&kctx->kbdev->gpu_mem_usage_lock); WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); kobject_del(&kprcs->kobj); kobject_put(&kprcs->kobj); diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild index 29983fb..11672a1 100644 --- a/mali_kbase/csf/Kbuild +++ b/mali_kbase/csf/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,14 @@ mali_kbase-y += \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ - csf/mali_kbase_csf_event.o + csf/mali_kbase_csf_event.o \ + csf/mali_kbase_csf_firmware_log.o mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o + ifeq ($(KBUILD_EXTMOD),) # in-tree -include $(src)/csf/ipa_control/Kbuild diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c index a56b689..ccdc48c 100644 --- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c +++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,7 @@ */ #include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include "mali_kbase_csf_ipa_control.h" @@ -44,19 +45,9 @@ #define COMMAND_RESET_ACK ((u32)5) /* - * Default value for the TIMER register of the IPA Control interface, - * expressed in milliseconds. - * - * The chosen value is a trade off between two requirements: the IPA Control - * interface should sample counters with a resolution in the order of - * milliseconds, while keeping GPU overhead as limited as possible. - */ -#define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ - -/* * Number of timer events per second. */ -#define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS) +#define TIMER_EVENTS_PER_SECOND ((u32)1000 / IPA_CONTROL_TIMER_DEFAULT_VALUE_MS) /* * Maximum number of loops polling the GPU before we assume the GPU has hung. @@ -602,9 +593,10 @@ int kbase_ipa_control_register( */ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; session_idx++) { - session = &ipa_ctrl->sessions[session_idx]; - if (!session->active) + if (!ipa_ctrl->sessions[session_idx].active) { + session = &ipa_ctrl->sessions[session_idx]; break; + } } if (!session) { @@ -659,7 +651,7 @@ int kbase_ipa_control_register( /* Reports to this client for GPU time spent in protected mode * should begin from the point of registration. */ - session->last_query_time = ktime_get_ns(); + session->last_query_time = ktime_get_raw_ns(); /* Initially, no time has been spent in protected mode */ session->protm_time = 0; @@ -829,7 +821,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, } if (protected_time) { - u64 time_now = ktime_get_ns(); + u64 time_now = ktime_get_raw_ns(); /* This is the amount of protected-mode time spent prior to * the current protm period. @@ -973,6 +965,43 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post); +#ifdef KBASE_PM_RUNTIME +void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { + /* GPU Sleep is treated as a power down */ + kbase_ipa_control_handle_gpu_power_off(kbdev); + + /* SELECT_CSHW register needs to be cleared to prevent any + * IPA control message to be sent to the top level GPU HWCNT. + */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0); + + /* No need to issue the APPLY command here */ + } +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter); + +void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { + /* To keep things simple, currently exit from + * GPU Sleep is treated as a power on event where + * all 4 SELECT registers are reconfigured. + * On exit from sleep, reconfiguration is needed + * only for the SELECT_CSHW register. + */ + kbase_ipa_control_handle_gpu_power_on(kbdev); + } +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit); +#endif + #if MALI_UNIT_TEST void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index, u32 clk_rate_hz) @@ -992,14 +1021,14 @@ void kbase_ipa_control_protm_entered(struct kbase_device *kbdev) struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; lockdep_assert_held(&kbdev->hwaccess_lock); - ipa_ctrl->protm_start = ktime_get_ns(); + ipa_ctrl->protm_start = ktime_get_raw_ns(); } void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) { struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; size_t i; - u64 time_now = ktime_get_ns(); + u64 time_now = ktime_get_raw_ns(); u32 status; lockdep_assert_held(&kbdev->hwaccess_lock); diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h index 0469c48..69ff897 100644 --- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h +++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -198,6 +198,33 @@ void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev); */ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev); +#ifdef KBASE_PM_RUNTIME +/** + * kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event + * + * @kbdev: Pointer to kbase device. + * + * This function is called after MCU has been put to sleep state & L2 cache has + * been powered down. The top level part of GPU is still powered up when this + * function is called. + */ +void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event + * + * @kbdev: Pointer to kbase device. + * + * This function is called when L2 needs to be powered up and MCU can exit the + * sleep state. The top level part of GPU is powered up when this function is + * called. + * + * This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter() + * was called previously. + */ +void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev); +#endif + #if MALI_UNIT_TEST /** * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index 2678baf..12ab66f 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,7 +35,7 @@ #include <mali_kbase_hwaccess_time.h> #include "mali_kbase_csf_event.h" #include <mali_linux_trace.h> - +#include <linux/protected_memory_allocator.h> #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) @@ -61,7 +61,7 @@ const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_ * * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt. * If NULL, no such case observed in the tracked interrupt case. - * @idle_seq: The highest priority group that notified idle. If no such instnace in the + * @idle_seq: The highest priority group that notified idle. If no such instance in the * interrupt case, marked with the largest field value: U32_MAX. * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case. */ @@ -131,13 +131,13 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx, return 0; } -static void gpu_munmap_user_io_pages(struct kbase_context *kctx, - struct kbase_va_region *reg) +static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg, + struct tagged_addr *phys) { size_t num_pages = 2; - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - reg->start_pfn, num_pages, MCU_AS_NR); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys, + num_pages, MCU_AS_NR); WARN_ON(reg->flags & KBASE_REG_FREE); @@ -178,12 +178,6 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) - mem_flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); -#else if (kbdev->system_coherency == COHERENCY_NONE) { mem_flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); @@ -191,7 +185,6 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, mem_flags |= KBASE_REG_SHARE_BOTH | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); } -#endif mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1); @@ -220,8 +213,7 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, return 0; bad_insert_output_page: - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, - reg->start_pfn, 1, MCU_AS_NR); + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR); bad_insert: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); @@ -250,6 +242,8 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx, { struct page *page_list[2]; pgprot_t cpu_map_prot; + unsigned long flags; + char *user_io_addr; int ret = 0; size_t i; @@ -264,27 +258,25 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx, /* The pages are mapped to Userspace also, so use the same mapping * attributes as used inside the CPU page fault handler. */ -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) - cpu_map_prot = pgprot_device(PAGE_KERNEL); -#else if (kctx->kbdev->system_coherency == COHERENCY_NONE) cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); else cpu_map_prot = PAGE_KERNEL; -#endif for (i = 0; i < ARRAY_SIZE(page_list); i++) page_list[i] = as_page(queue->phys[i]); - queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); + user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); - if (!queue->user_io_addr) + if (!user_io_addr) ret = -ENOMEM; else atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages); + kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); + queue->user_io_addr = user_io_addr; + kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); + unlock: kbase_gpu_vm_unlock(kctx); return ret; @@ -321,7 +313,7 @@ static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, { const size_t num_pages = 2; - gpu_munmap_user_io_pages(kctx, queue->reg); + gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]); kernel_unmap_user_io_pages(kctx, queue); kbase_mem_pool_free_pages( @@ -820,8 +812,8 @@ static void pending_submission_worker(struct kthread_work *work) if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) dev_dbg(kbdev->dev, "queue is not bound to a group"); - else - WARN_ON(kbase_csf_scheduler_queue_start(queue)); + else if (kbase_csf_scheduler_queue_start(queue)) + dev_dbg(kbdev->dev, "Failed to start queue"); } } @@ -954,7 +946,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx, kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); bitmap_clear(queue->group->protm_pending_bitmap, queue->csi_index, 1); - KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR, + KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR, queue->group, queue, queue->group->protm_pending_bitmap[0]); queue->group->bound_queues[queue->csi_index] = NULL; queue->group = NULL; @@ -1364,10 +1356,13 @@ static int create_queue_group(struct kbase_context *const kctx, group->tiler_max = create->in.tiler_max; group->fragment_max = create->in.fragment_max; group->compute_max = create->in.compute_max; + group->csi_handlers = create->in.csi_handlers; group->priority = kbase_csf_priority_queue_group_priority_to_relative( kbase_csf_priority_check(kctx->kbdev, create->in.priority)); group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; group->faulted = false; + group->cs_unrecoverable = false; + group->reevaluate_idle_status = false; group->group_uid = generate_group_uid(); @@ -1411,6 +1406,14 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, const u32 tiler_count = hweight64(create->in.tiler_mask); const u32 fragment_count = hweight64(create->in.fragment_mask); const u32 compute_count = hweight64(create->in.compute_mask); + size_t i; + + for (i = 0; i < sizeof(create->in.padding); i++) { + if (create->in.padding[i] != 0) { + dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + return -EINVAL; + } + } rt_mutex_lock(&kctx->csf.lock); @@ -1429,6 +1432,10 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, "No CSG has at least %d CSs", create->in.cs_min); err = -EINVAL; + } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) { + dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", + create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); + err = -EINVAL; } else if (create->in.reserved) { dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0"); err = -EINVAL; @@ -1467,9 +1474,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx, lockdep_assert_held(&kctx->csf.lock); - WARN_ON(kbase_mmu_teardown_pages( - kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, - s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); + WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, + s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR)); WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); @@ -1499,10 +1505,16 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev, { const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL); + size_t i = 0; - WARN_ON(kbase_mmu_teardown_pages( - kbdev, &kbdev->csf.mcu_mmu, - s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); + for (i = 0; phys && i < nr_pages; i++) + phys[i] = as_tagged(s_buf->pma[i]->pa); + + WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys, + nr_pages, MCU_AS_NR)); + + kfree(phys); WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); @@ -1732,7 +1744,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, int kbase_csf_ctx_init(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; int err = -ENOMEM; INIT_LIST_HEAD(&kctx->csf.queue_list); @@ -1741,19 +1752,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) kbase_csf_event_init(kctx); kctx->csf.user_reg_vma = NULL; - mutex_lock(&kbdev->pm.lock); - /* The inode information for /dev/malixx file is not available at the - * time of device probe as the inode is created when the device node - * is created by udevd (through mknod). - */ - if (kctx->filp) { - if (!kbdev->csf.mali_file_inode) - kbdev->csf.mali_file_inode = kctx->filp->f_inode; - - /* inode is unique for a file */ - WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode); - } - mutex_unlock(&kbdev->pm.lock); /* Mark all the cookies as 'free' */ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); @@ -1763,14 +1761,9 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) if (unlikely(!kctx->csf.wq)) goto out; - kthread_init_worker(&kctx->csf.pending_submission_worker); - kctx->csf.pending_sub_worker_thread = kbase_create_realtime_thread( - kctx->kbdev, - kthread_worker_fn, - &kctx->csf.pending_submission_worker, - "mali_submit"); - - if (IS_ERR(kctx->csf.pending_sub_worker_thread)) { + err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn, + &kctx->csf.pending_submission_worker, "mali_submit"); + if (err) { dev_err(kctx->kbdev->dev, "error initializing pending submission worker thread"); goto out_err_kthread; } @@ -1798,7 +1791,7 @@ out_err_tiler_heap_context: out_err_kcpu_queue_context: kbase_csf_scheduler_context_term(kctx); out_err_scheduler_context: - kthread_stop(kctx->csf.pending_sub_worker_thread); + kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker); out_err_kthread: destroy_workqueue(kctx->csf.wq); out: @@ -1957,8 +1950,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) rt_mutex_unlock(&kctx->csf.lock); - kthread_flush_worker(&kctx->csf.pending_submission_worker); - kthread_stop(kctx->csf.pending_sub_worker_thread); + kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker); kbase_csf_tiler_heap_context_term(kctx); kbase_csf_kcpu_queue_context_term(kctx); @@ -1972,7 +1964,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * handle_oom_event - Handle the OoM event generated by the firmware for the * CSI. * - * @kctx: Pointer to the kbase context in which the tiler heap was initialized. + * @group: Pointer to the CSG group the oom-event belongs to. * @stream: Pointer to the structure containing info provided by the firmware * about the CSI. * @@ -1987,9 +1979,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * Return: 0 if successfully handled the request, otherwise a negative error * code on failure. */ -static int handle_oom_event(struct kbase_context *const kctx, - struct kbase_csf_cmd_stream_info const *const stream) +static int handle_oom_event(struct kbase_queue_group *const group, + struct kbase_csf_cmd_stream_info const *const stream) { + struct kbase_context *const kctx = group->kctx; u64 gpu_heap_va = kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); @@ -2016,12 +2009,18 @@ static int handle_oom_event(struct kbase_context *const kctx, err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); - /* It is okay to acknowledge with a NULL chunk (firmware will then wait - * for the fragment jobs to complete and release chunks) - */ - if (err == -EBUSY) + if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) && + (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) { + /* The group allows incremental rendering, trigger it */ new_chunk_ptr = 0; - else if (err) + dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n", + group->handle, group->csg_nr); + } else if (err == -EBUSY) { + /* Acknowledge with a NULL chunk (firmware will then wait for + * the fragment jobs to complete and release chunks) + */ + new_chunk_ptr = 0; + } else if (err) return err; kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, @@ -2136,7 +2135,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue) if (cs_oom_ack == cs_oom_req) goto unlock; - err = handle_oom_event(kctx, stream); + err = handle_oom_event(group, stream); kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, @@ -2273,7 +2272,7 @@ static void protm_event_worker(struct work_struct *data) struct kbase_queue_group *const group = container_of(data, struct kbase_queue_group, protm_event_work); - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN, + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); kbase_csf_scheduler_group_protm_enter(group); KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, @@ -2440,6 +2439,11 @@ handle_fatal_event(struct kbase_queue *const queue, CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { queue_work(system_wq, &kbdev->csf.fw_error_work); } else { + if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) { + queue->group->cs_unrecoverable = true; + if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(queue->kctx->kbdev); + } get_queue(queue); queue->cs_fatal = cs_fatal; queue->cs_fatal_info = cs_fatal_info; @@ -2493,8 +2497,9 @@ static void handle_queue_exception_event(struct kbase_queue *const queue, * @ginfo: The CSG interface provided by the firmware. * @irqreq: CSG's IRQ request bitmask (one bit per CS). * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS). - * @track: Pointer that tracks the highest idle CSG and the newly possible viable - * protcted mode requesting group, in current IRQ context. + * @track: Pointer that tracks the highest scanout priority idle CSG + * and any newly potentially viable protected mode requesting + * CSG in current IRQ context. * * If the interrupt request bitmask differs from the acknowledge bitmask * then the firmware is notifying the host of an event concerning those @@ -2537,7 +2542,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, if ((cs_req & CS_REQ_EXCEPTION_MASK) ^ (cs_ack & CS_ACK_EXCEPTION_MASK)) { - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, + group, queue, cs_req ^ cs_ack); handle_queue_exception_event(queue, cs_req, cs_ack); } @@ -2549,16 +2555,18 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK; u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND, - group, queue, cs_req_remain ^ cs_ack_remain); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, + CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED, + group, queue, + cs_req_remain ^ cs_ack_remain); continue; } if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ (cs_ack & CS_ACK_TILER_OOM_MASK))) { get_queue(queue); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue, - cs_req ^ cs_ack); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, + group, queue, cs_req ^ cs_ack); if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) { /* The work item shall not have been * already queued, there can be only @@ -2571,8 +2579,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ (cs_ack & CS_ACK_PROTM_PEND_MASK)) { - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue, - cs_req ^ cs_ack); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND, + group, queue, cs_req ^ cs_ack); dev_dbg(kbdev->dev, "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", @@ -2580,7 +2588,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, group->csg_nr); bitmap_set(group->protm_pending_bitmap, i, 1); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue, + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue, group->protm_pending_bitmap[0]); protm_pend = true; } @@ -2611,7 +2619,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @csg_nr: CSG number. * @track: Pointer that tracks the highest idle CSG and the newly possible viable - * protcted mode requesting group, in current IRQ context. + * protected mode requesting group, in current IRQ context. * * Handles interrupts for a CSG and for CSs within it. * @@ -2634,7 +2642,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) return; - KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); ginfo = &kbdev->csf.global_iface.groups[csg_nr]; req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); @@ -2674,7 +2682,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); /* SYNC_UPDATE events shall invalidate GPU idle event */ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); @@ -2691,7 +2699,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c set_bit(csg_nr, scheduler->csg_slots_idle_mask); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, scheduler->csg_slots_idle_mask[0]); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack); dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", group->handle, csg_nr); @@ -2699,7 +2707,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c /* If there are non-idle CSGs waiting for a slot, fire * a tock for a replacement. */ - kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); } if (group->scan_seq_num < track->idle_seq) { @@ -2712,7 +2720,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_PROGRESS_TIMER_EVENT_MASK); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT, + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group, req ^ ack); dev_info(kbdev->dev, "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n", @@ -2874,7 +2882,7 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) GLB_REQ_PROTM_EXIT_MASK); if (likely(scheduler->active_protm_grp)) { - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, 0u); scheduler->active_protm_grp = NULL; } else { @@ -2898,19 +2906,22 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, kbase_csf_scheduler_spin_lock_assert_held(kbdev); + if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID)) + return; + /* Handle protm from the tracked information */ if (track->idle_seq < current_protm_pending_seq) { /* If the protm enter was prevented due to groups priority, then fire a tock * for the scheduler to re-examine the case. - */ + */ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot); - kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); } else if (group) { u32 i, num_groups = kbdev->csf.global_iface.group_num; struct kbase_queue_group *grp; bool tock_triggered = false; - /* A new protem request, and track->idle_seq is not sufficient, check across + /* A new protm request, and track->idle_seq is not sufficient, check across * previously notified idle CSGs in the current tick/tock cycle. */ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { @@ -2927,7 +2938,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, tock_triggered = true; dev_dbg(kbdev->dev, "Attempt new protm from tick/tock idle slot %d\n", i); - kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); break; } } @@ -2975,7 +2986,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val); + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); order_job_irq_clear_with_iface_mem_read(); @@ -3010,7 +3021,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) global_iface, GLB_REQ); glb_ack = kbase_csf_firmware_global_output( global_iface, GLB_ACK); - KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack); + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack); check_protm_enter_req_complete(kbdev, glb_req, glb_ack); diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index f689205..0b87f50 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,8 +45,6 @@ */ #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) -#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */ - /* 60ms optimizes power while minimizing latency impact for UI test cases. */ #define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (60) #define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US (600) @@ -162,7 +160,7 @@ int kbase_csf_queue_bind(struct kbase_context *kctx, * resources allocated for this queue if there * are any. * - * @queue: Pointer to queue to be unbound. + * @queue: Pointer to queue to be unbound. * @process_exit: Flag to indicate if process exit is happening. */ void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit); diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c index 6b1186e..e598f8b 100644 --- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,11 +23,11 @@ #include <mali_kbase.h> #include <linux/seq_file.h> #include <linux/delay.h> -#include <csf/mali_kbase_csf_trace_buffer.h> #include <backend/gpu/mali_kbase_pm_internal.h> #if IS_ENABLED(CONFIG_DEBUG_FS) #include "mali_kbase_csf_tl_reader.h" +#include <linux/version_compat_defs.h> #define MAX_SCHED_STATE_STRING_LEN (16) static const char *scheduler_state_to_string(struct kbase_device *kbdev, @@ -77,16 +77,32 @@ static const char *blocked_reason_to_string(u32 reason_id) return cs_blocked_reason[reason_id]; } +static bool sb_source_supported(u32 glb_version) +{ + bool supported = false; + + if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && + (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && + (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && + (GLB_VERSION_MINOR_GET(glb_version) >= 3))) + supported = true; + + return supported; +} + static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - struct seq_file *file, u32 wait_status, u32 wait_sync_value, - u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, - u32 blocked_reason) + struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value, + u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason) { #define WAITING "Waiting" #define NOT_WAITING "Not waiting" seq_printf(file, "SB_MASK: %d\n", CS_STATUS_WAIT_SB_MASK_GET(wait_status)); + if (sb_source_supported(glb_version)) + seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); seq_printf(file, "PROGRESS_WAIT: %s\n", CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? WAITING : NOT_WAITING); @@ -156,10 +172,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, struct kbase_vmap_struct *mapping; u64 *evt; u64 wait_sync_live_value; + u32 glb_version; if (!queue) return; + glb_version = queue->kctx->kbdev->csf.global_iface.version; + if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || !queue->group)) return; @@ -200,9 +219,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, } kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, - sb_status, blocked_reason); + file, glb_version, wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); } } else { struct kbase_device const *const kbdev = @@ -257,9 +275,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, } kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, sb_status, - blocked_reason); + file, glb_version, wait_status, wait_sync_value, wait_sync_live_value, + wait_sync_pointer, sb_status, blocked_reason); /* Dealing with cs_trace */ if (kbase_csf_scheduler_queue_has_trace(queue)) kbasep_csf_scheduler_dump_active_cs_trace(file, stream); @@ -500,11 +517,7 @@ static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) { struct dentry *file; -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; @@ -556,14 +569,11 @@ static int kbasep_csf_debugfs_scheduling_timer_kick_set( return 0; } -DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, - &kbasep_csf_debugfs_scheduling_timer_enabled_get, - &kbasep_csf_debugfs_scheduling_timer_enabled_set, - "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, - NULL, - &kbasep_csf_debugfs_scheduling_timer_kick_set, - "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, + &kbasep_csf_debugfs_scheduling_timer_enabled_get, + &kbasep_csf_debugfs_scheduling_timer_enabled_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL, + &kbasep_csf_debugfs_scheduling_timer_kick_set, "%llu\n"); /** * kbase_csf_debugfs_scheduler_state_get() - Get the state of scheduler. @@ -671,7 +681,6 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev) &kbasep_csf_debugfs_scheduler_state_fops); kbase_csf_tl_reader_debugfs_init(kbdev); - kbase_csf_firmware_trace_buffer_debugfs_init(kbdev); } #else diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index d65f729..836b558 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -55,7 +55,7 @@ #define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) /** - * enum kbase_csf_bind_state - bind state of the queue + * enum kbase_csf_queue_bind_state - bind state of the queue * * @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link * between queue and the group to which it was bound or being bound is removed. @@ -259,6 +259,11 @@ enum kbase_queue_group_priority { * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired * Shader, L2 and MCU state. * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. + * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended. + * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. + * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond + * to a ping from KBase. + * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang. * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in * the enum. */ @@ -266,6 +271,10 @@ enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT, CSF_PM_TIMEOUT, CSF_GPU_RESET_TIMEOUT, + CSF_CSG_SUSPEND_TIMEOUT, + CSF_FIRMWARE_BOOT_TIMEOUT, + CSF_FIRMWARE_PING_TIMEOUT, + CSF_SCHED_PROTM_PROGRESS_TIMEOUT, /* Must be the last in the enum */ KBASE_TIMEOUT_SELECTOR_COUNT @@ -446,6 +455,7 @@ struct kbase_protected_suspend_buffer { * allowed to use. * @compute_max: Maximum number of compute endpoints the group is * allowed to use. + * @csi_handlers: Requested CSI exception handler flags for the group. * @tiler_mask: Mask of tiler endpoints the group is allowed to use. * @fragment_mask: Mask of fragment endpoints the group is allowed to use. * @compute_mask: Mask of compute endpoints the group is allowed to use. @@ -467,6 +477,12 @@ struct kbase_protected_suspend_buffer { * @faulted: Indicates that a GPU fault occurred for the queue group. * This flag persists until the fault has been queued to be * reported to userspace. + * @cs_unrecoverable: Flag to unblock the thread waiting for CSG termination in + * case of CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE + * @reevaluate_idle_status : Flag set when work is submitted for the normal group + * or it becomes unblocked during protected mode. The + * flag helps Scheduler confirm if the group actually + * became non idle or not. * @bound_queues: Array of registered queues bound to this queue group. * @doorbell_nr: Index of the hardware doorbell page assigned to the * group. @@ -494,6 +510,7 @@ struct kbase_queue_group { u8 tiler_max; u8 fragment_max; u8 compute_max; + u8 csi_handlers; u64 tiler_mask; u64 fragment_mask; @@ -507,6 +524,8 @@ struct kbase_queue_group { u32 prepared_seq_num; u32 scan_seq_num; bool faulted; + bool cs_unrecoverable; + bool reevaluate_idle_status; struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; @@ -529,12 +548,10 @@ struct kbase_queue_group { * @lock: Lock preventing concurrent access to @array and the @in_use bitmap. * @array: Array of pointers to kernel CPU command queues. * @in_use: Bitmap which indicates which kernel CPU command queues are in use. - * @csf_kcpu_worker: Dedicated worker for processing kernel CPU command - * queues. - * @csf_kcpu_thread: The kthread used to process kernel CPU command queues. * @num_cmds: The number of commands that have been enqueued across * all the KCPU command queues. This could be used as a * timestamp to determine the command's enqueueing time. + * @jit_lock: Lock protecting jit_cmds_head and jit_blocked_queues. * @jit_cmds_head: A list of the just-in-time memory commands, both * allocate & free, in submission order, protected * by kbase_csf_kcpu_queue_context.lock. @@ -547,10 +564,8 @@ struct kbase_csf_kcpu_queue_context { struct mutex lock; struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES]; DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); - struct kthread_worker csf_kcpu_worker; - struct task_struct *csf_kcpu_thread; - u64 num_cmds; - + atomic64_t num_cmds; + spinlock_t jit_lock; struct list_head jit_cmds_head; struct list_head jit_blocked_queues; }; @@ -608,6 +623,8 @@ struct kbase_csf_heap_context_allocator { * @ctx_alloc: Allocator for heap context structures. * @nr_of_heaps: Total number of tiler heaps that were added during the * life time of the context. + * @est_count_pages: Estimated potentially freeable pages from all the heaps + * on the @list. * * This contains all of the CSF state relating to chunked tiler heaps for one * @kbase_context. It is not the same as a heap context structure allocated by @@ -618,30 +635,66 @@ struct kbase_csf_tiler_heap_context { struct list_head list; struct kbase_csf_heap_context_allocator ctx_alloc; u64 nr_of_heaps; + atomic_t est_count_pages; +}; + +#define CSF_CTX_RECLAIM_CANDI_FLAG (1ul << 0) +#define CSF_CTX_RECLAIM_SCAN_FLAG (1ul << 1) +/** + * struct kbase_kctx_heap_info - Object representing the data section of a kctx + * for tiler heap reclaim manger + * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists + * @attach_jiffies: jiffies when the kctx is attached to the reclaim manager. + * @nr_scan_pages: Number of a better estimated freeable pages from the kctx + * after all its CSGs are off-slots and have been properly + * gone through the freeable pages count process. This field + * is updated when the kctx is moved to the reclaim manager's + * pending scan (freeing) action list, after the counting. + * @nr_est_pages: Estimated number of pages of the kctx when all its CSGs are + * off-slot. This is a nominal value used for estimating an + * available page counts from the kctx. The kctx is on the + * reclaim manager's candidate list, waiting for count. + * @flags: reflecting the kctx's internal state in relation to the + * scheduler's heap reclaim manager. + * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a + * kctx has groups on-slot, the scheduler will detach it from + * the tiler heap reclaim manager, i.e. no tiler heap memory + * reclaiming operations on the kctx. + */ +struct kbase_kctx_heap_info { + struct list_head mgr_link; + unsigned long attach_jiffies; + u32 nr_scan_pages; + u32 nr_est_pages; + u16 flags; + u8 on_slot_grps; }; /** * struct kbase_csf_scheduler_context - Object representing the scheduler's * context for a GPU address space. * - * @runnable_groups: Lists of runnable GPU command queue groups in the kctx, - * one per queue group relative-priority level. - * @num_runnable_grps: Total number of runnable groups across all priority - * levels in @runnable_groups. - * @idle_wait_groups: A list of GPU command queue groups in which all enabled - * GPU command queues are idle and at least one of them - * is blocked on a sync wait operation. - * @num_idle_wait_grps: Length of the @idle_wait_groups list. - * @sync_update_worker: Dedicated workqueue to process work items corresponding - * to the sync_update events by sync_set/sync_add - * instruction execution on CSs bound to groups - * of @idle_wait_groups list. - * @sync_update_worker_thread: Task struct for @csf_worker. - * @sync_update_work: work item to process the sync_update events by - * sync_set / sync_add instruction execution on command - * streams bound to groups of @idle_wait_groups list. - * @ngrp_to_schedule: Number of groups added for the context to the - * 'groups_to_schedule' list of scheduler instance. + * @runnable_groups: Lists of runnable GPU command queue groups in the kctx, + * one per queue group relative-priority level. + * @num_runnable_grps: Total number of runnable groups across all priority + * levels in @runnable_groups. + * @idle_wait_groups: A list of GPU command queue groups in which all enabled + * GPU command queues are idle and at least one of them + * is blocked on a sync wait operation. + * @num_idle_wait_grps: Length of the @idle_wait_groups list. + * @sync_update_worker: Dedicated workqueue to process work items corresponding + * to the sync_update events by sync_set/sync_add + * instruction execution on CSs bound to groups + * of @idle_wait_groups list. + * @sync_update_work: work item to process the sync_update events by + * sync_set / sync_add instruction execution on command + * streams bound to groups of @idle_wait_groups list. + * @ngrp_to_schedule: Number of groups added for the context to the + * 'groups_to_schedule' list of scheduler instance. + * @heap_info: Heap reclaim information data of the kctx. As the + * reclaim action needs to be coordinated with the scheduler + * operations, the data is placed inside the scheduler's + * context object for this linkage. */ struct kbase_csf_scheduler_context { struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; @@ -649,9 +702,9 @@ struct kbase_csf_scheduler_context { struct list_head idle_wait_groups; u32 num_idle_wait_grps; struct kthread_worker sync_update_worker; - struct task_struct *sync_update_worker_thread; struct kthread_work sync_update_work; u32 ngrp_to_schedule; + struct kbase_kctx_heap_info heap_info; }; /** @@ -735,7 +788,6 @@ struct kbase_csf_event { * @sched: Object representing the scheduler's context * @pending_submission_worker: Worker for the pending submission work item * @pending_submission_work: Work item to process pending kicked GPU command queues. - * @pending_sub_work_thread: task_struct for @pending_submission_worker * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. */ @@ -756,7 +808,6 @@ struct kbase_csf_context { struct kbase_csf_scheduler_context sched; struct kthread_worker pending_submission_worker; struct kthread_work pending_submission_work; - struct task_struct *pending_sub_worker_thread; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_cpu_queue_context cpu_queue; #endif @@ -799,6 +850,25 @@ struct kbase_csf_csg_slot { }; /** + * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim + * kctx lists inside the CSF device's scheduler. + * + * @candidate_ctxs: List of kctxs that have all their CSGs off-slots. Candidates + * are ready for reclaim count examinations. + * @scan_list_ctxs: List counted kctxs, ready for reclaim scan operations. + * @est_cand_pages: Estimated pages based on chunks that could be free-able from the + * candidate list. For each addition of an acandidate, the number is + * increased with an estimate, and decreased vice versa. + * @mgr_scan_pages: Number of pagess free-able in the scan list, device wide. + */ +struct kbase_csf_sched_heap_reclaim_mgr { + struct list_head candidate_ctxs; + struct list_head scan_list_ctxs; + atomic_t est_cand_pages; + atomic_t mgr_scan_pages; +}; + +/** * struct kbase_csf_scheduler - Object representing the scheduler used for * CSF for an instance of GPU platform device. * @lock: Lock to serialize the scheduler operations and @@ -862,7 +932,6 @@ struct kbase_csf_csg_slot { * then it will only perform scheduling under the * influence of external factors e.g., IRQs, IOCTLs. * @csf_worker: Dedicated kthread_worker to execute the @tick_work. - * @csf_worker_thread: Task struct for @csf_worker. * @tick_timer: High-resolution timer employed to schedule tick * workqueue items (kernel-provided delayed_work * items do not use hrtimer and for some reason do @@ -871,6 +940,8 @@ struct kbase_csf_csg_slot { * operation to implement timeslice-based scheduling. * @tock_work: Work item that would perform the schedule on tock * operation to implement the asynchronous scheduling. + * @pending_tock_work: Indicates that the tock work item should re-execute + * once it's finished instead of going back to sleep. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would @@ -880,8 +951,6 @@ struct kbase_csf_csg_slot { * @top_grp. * @top_grp: Pointer to queue group inside @groups_to_schedule * list that was assigned the highest slot priority. - * @tock_pending_request: A "tock" request is pending: a group that is not - * currently on the GPU demands to be scheduled. * @active_protm_grp: Indicates if firmware has been permitted to let GPU * enter protected mode with the given group. On exit * from protected mode the pointer is reset to NULL. @@ -939,6 +1008,8 @@ struct kbase_csf_csg_slot { * is disabled on FW side. It is set for the power * policy where the power managment of shader cores * needs to be done by the Host. + * @protm_enter_time: GPU protected mode enter time. + * @reclaim_mgr: CSGs tiler heap manager object. */ struct kbase_csf_scheduler { struct mutex lock; @@ -962,14 +1033,13 @@ struct kbase_csf_scheduler { unsigned long last_schedule; bool timer_enabled; struct kthread_worker csf_worker; - struct task_struct *csf_worker_thread; struct hrtimer tick_timer; struct kthread_work tick_work; struct kthread_delayed_work tock_work; + atomic_t pending_tock_work; struct delayed_work ping_work; struct kbase_context *top_ctx; struct kbase_queue_group *top_grp; - bool tock_pending_request; struct kbase_queue_group *active_protm_grp; struct delayed_work gpu_idle_work; struct workqueue_struct *idle_wq; @@ -986,6 +1056,8 @@ struct kbase_csf_scheduler { bool gpu_idle_work_pending; #endif bool gpu_idle_fw_timer_enabled; + ktime_t protm_enter_time; + struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; }; /* @@ -1206,6 +1278,57 @@ struct kbase_csf_hwcnt { bool enable_pending; }; +/* + * struct kbase_csf_mcu_fw - Object containing device loaded MCU firmware data. + * + * @size: Loaded firmware data size. Meaningful only when the + * other field @p data is not NULL. + * @data: Pointer to the device retained firmware data. If NULL + * means not loaded yet or error in loading stage. + */ +struct kbase_csf_mcu_fw { + size_t size; + u8 *data; +}; + +/* + * Firmware log polling period. + */ +#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25 + +/** + * enum kbase_csf_firmware_log_mode - Firmware log operating mode + * + * @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read + * manually by the userspace (and it will also be dumped automatically into + * dmesg on GPU reset). + * + * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log + * will be periodically emptied into dmesg, manual reading through debugfs is + * disabled. + */ +enum kbase_csf_firmware_log_mode { + KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL, + KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT +}; + +/** + * struct kbase_csf_firmware_log - Object containing members for handling firmware log. + * + * @mode: Firmware log operating mode. + * @busy: Indicating whether a firmware log operation is in progress. + * @poll_work: Work item that would poll firmware log buffer + * at regular intervals to perform any periodic + * activities required by current log mode. + * @dump_buf: Buffer used for dumping the log. + */ +struct kbase_csf_firmware_log { + enum kbase_csf_firmware_log_mode mode; + atomic_t busy; + struct delayed_work poll_work; + u8 *dump_buf; +}; + /** * struct kbase_csf_device - Object representing CSF for an instance of GPU * platform device. @@ -1249,11 +1372,14 @@ struct kbase_csf_hwcnt { * in the address space of every process, that created * a Base context, to enable the access to LATEST_FLUSH * register from userspace. + * @nr_user_page_mapped: The number of clients using the mapping of USER page. + * This is used to maintain backward compatibility. + * It's protected by @reg_lock. * @mali_file_inode: Pointer to the inode corresponding to mali device * file. This is needed in order to switch to the * @dummy_user_reg_page on GPU power down. * All instances of the mali device file will point to - * the same inode. + * the same inode. It's protected by @reg_lock. * @reg_lock: Lock to serialize the MCU firmware related actions * that affect all contexts such as allocation of * regions from shared interface area, assignment of @@ -1318,6 +1444,9 @@ struct kbase_csf_hwcnt { * for any request sent to the firmware. * @hwcnt: Contain members required for handling the dump of * HW counters. + * @fw: Copy of the loaded MCU firmware image. + * @fw_log: Contain members required for handling firmware log. + * @tiler_heap_reclaim: Tiler heap reclaim shrinker object. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1332,6 +1461,7 @@ struct kbase_csf_device { u32 db_file_offsets; struct tagged_addr dummy_db_page; struct tagged_addr dummy_user_reg_page; + u32 nr_user_page_mapped; struct inode *mali_file_inode; struct mutex reg_lock; wait_queue_head_t event_wait; @@ -1358,6 +1488,9 @@ struct kbase_csf_device { u32 gpu_idle_dur_count; unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; + struct kbase_csf_mcu_fw fw; + struct kbase_csf_firmware_log fw_log; + struct shrinker tiler_heap_reclaim; }; /** diff --git a/mali_kbase/csf/mali_kbase_csf_event.c b/mali_kbase/csf/mali_kbase_csf_event.c index e336658..52a6b10 100644 --- a/mali_kbase/csf/mali_kbase_csf_event.c +++ b/mali_kbase/csf/mali_kbase_csf_event.c @@ -102,7 +102,7 @@ static void sync_update_notify_gpu(struct kbase_context *kctx) if (can_notify_gpu) { kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); - KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u); + KBASE_KTRACE_ADD(kctx->kbdev, CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT, kctx, 0u); } spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); diff --git a/mali_kbase/csf/mali_kbase_csf_event.h b/mali_kbase/csf/mali_kbase_csf_event.h index 4c853b5..52122a9 100644 --- a/mali_kbase/csf/mali_kbase_csf_event.h +++ b/mali_kbase/csf/mali_kbase_csf_event.h @@ -30,8 +30,8 @@ struct kbase_csf_event; enum kbase_csf_event_callback_action; /** - * kbase_csf_event_callback_action - type for callback functions to be - * called upon CSF events. + * kbase_csf_event_callback - type for callback functions to be + * called upon CSF events. * @param: Generic parameter to pass to the callback function. * * This is the type of callback functions that can be registered diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index da89d73..b5e3f0c 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -21,6 +21,7 @@ #include "mali_kbase.h" #include "mali_kbase_csf_firmware_cfg.h" +#include "mali_kbase_csf_firmware_log.h" #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" @@ -44,11 +45,13 @@ #include <linux/mman.h> #include <linux/string.h> #include <linux/mutex.h> +#include <linux/ctype.h> #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) #include <linux/set_memory.h> #endif #include <mmu/mali_kbase_mmu.h> #include <asm/arch_timer.h> +#include <linux/delay.h> #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) @@ -57,7 +60,7 @@ module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); MODULE_PARM_DESC(fw_name, "firmware image"); /* The waiting time for firmware to boot */ -static unsigned int csf_firmware_boot_timeout_ms = 500; +static unsigned int csf_firmware_boot_timeout_ms; module_param(csf_firmware_boot_timeout_ms, uint, 0444); MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, "Maximum time to wait for firmware to boot."); @@ -93,6 +96,7 @@ MODULE_PARM_DESC(fw_debug, #define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2) #define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) #define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) +#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) #define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) @@ -103,12 +107,18 @@ MODULE_PARM_DESC(fw_debug, #define TL_METADATA_ENTRY_NAME_OFFSET (0x8) +#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4) +#define BUILD_INFO_GIT_SHA_LEN (40U) +#define BUILD_INFO_GIT_DIRTY_LEN (1U) +#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: " + #define CSF_MAX_FW_STOP_LOOPS (100000) #define CSF_GLB_REQ_CFG_MASK \ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) + static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -249,10 +259,15 @@ static void stop_csf_firmware(struct kbase_device *kbdev) static void wait_for_firmware_boot(struct kbase_device *kbdev) { - const long wait_timeout = - kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); + long wait_timeout; long remaining; + if (!csf_firmware_boot_timeout_ms) + csf_firmware_boot_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_BOOT_TIMEOUT); + + wait_timeout = kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); + /* Firmware will generate a global interface interrupt once booting * is complete */ @@ -429,24 +444,17 @@ static int reload_fw_image(struct kbase_device *kbdev) { const u32 magic = FIRMWARE_HEADER_MAGIC; struct kbase_csf_firmware_interface *interface; - const struct firmware *firmware; + struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; int ret = 0; - if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { - dev_err(kbdev->dev, - "Failed to reload firmware image '%s'\n", - fw_name); - return -ENOENT; - } - - /* Do couple of basic sanity checks */ - if (firmware->size < FIRMWARE_HEADER_LENGTH) { - dev_err(kbdev->dev, "Firmware image unexpectedly too small\n"); + if (WARN_ON(mcu_fw->data == NULL)) { + dev_err(kbdev->dev, "Firmware image copy not loaded\n"); ret = -EINVAL; goto out; } - if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { + /* Do a basic sanity check on MAGIC signature */ + if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); ret = -EINVAL; goto out; @@ -461,16 +469,14 @@ static int reload_fw_image(struct kbase_device *kbdev) continue; } - load_fw_image_section(kbdev, firmware->data, interface->phys, - interface->num_pages, interface->flags, - interface->data_start, interface->data_end); + load_fw_image_section(kbdev, mcu_fw->data, interface->phys, interface->num_pages, + interface->flags, interface->data_start, interface->data_end); } kbdev->csf.firmware_full_reload_needed = false; kbase_csf_firmware_reload_trace_buffers_data(kbdev); out: - release_firmware(firmware); return ret; } @@ -540,8 +546,8 @@ static inline bool entry_find_large_page_to_reuse( * Return: 0 if successful, negative error code on failure */ static int parse_memory_setup_entry(struct kbase_device *kbdev, - const struct firmware *fw, - const u32 *entry, unsigned int size) + const struct kbase_csf_mcu_fw *const fw, const u32 *entry, + unsigned int size) { int ret = 0; const u32 flags = entry[0]; @@ -773,7 +779,8 @@ out: * @size: Size (in bytes) of the section */ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, - const struct firmware *fw, const u32 *entry, unsigned int size) + const struct kbase_csf_mcu_fw *const fw, const u32 *entry, + unsigned int size) { const u32 data_start = entry[0]; const u32 data_size = entry[1]; @@ -816,6 +823,57 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, } /** + * parse_build_info_metadata_entry() - Process a "build info metadata" section + * @kbdev: Kbase device structure + * @fw: Firmware image containing the section + * @entry: Pointer to the section + * @size: Size (in bytes) of the section + * + * This prints the git SHA of the firmware on frimware load. + * + * Return: 0 if successful, negative error code on failure + */ +static int parse_build_info_metadata_entry(struct kbase_device *kbdev, + const struct kbase_csf_mcu_fw *const fw, + const u32 *entry, unsigned int size) +{ + const u32 meta_start_addr = entry[0]; + char *ptr = NULL; + size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN); + + /* Only print git SHA to avoid releasing sensitive information */ + ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN); + /* Check that we won't overrun the found string */ + if (ptr && + strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) { + char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1]; + int i = 0; + + /* Move ptr to start of SHA */ + ptr += sha_pattern_len; + for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) { + /* Ensure that the SHA is made up of hex digits */ + if (!isxdigit(ptr[i])) + break; + + git_sha[i] = ptr[i]; + } + + /* Check if the next char indicates git SHA is dirty */ + if (ptr[i] == ' ' || ptr[i] == '+') { + git_sha[i] = ptr[i]; + i++; + } + git_sha[i] = '\0'; + + dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha); + } else + dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n"); + + return 0; +} + +/** * load_firmware_entry() - Process an entry from a firmware image * * @kbdev: Kbase device @@ -831,9 +889,8 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, * * Return: 0 if successful, negative error code on failure */ -static int load_firmware_entry(struct kbase_device *kbdev, - const struct firmware *fw, - u32 offset, u32 header) +static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw, + u32 offset, u32 header) { const unsigned int type = entry_type(header); unsigned int size = entry_size(header); @@ -895,6 +952,13 @@ static int load_firmware_entry(struct kbase_device *kbdev, return -EINVAL; } return parse_timeline_metadata_entry(kbdev, fw, entry, size); + case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA: + if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n", + size); + return -EINVAL; + } + return parse_build_info_metadata_entry(kbdev, fw, entry, size); } if (!optional) { @@ -1298,6 +1362,26 @@ u32 kbase_csf_firmware_global_output( KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); /** + * csf_doorbell_offset() - Calculate the offset to the CSF host doorbell + * @doorbell_nr: Doorbell number + * + * Return: CSF host register offset for the specified doorbell number. + */ +static u32 csf_doorbell_offset(int doorbell_nr) +{ + WARN_ON(doorbell_nr < 0); + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); +} + +void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ + kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); +} +EXPORT_SYMBOL(kbase_csf_ring_doorbell); + +/** * handle_internal_firmware_fatal - Handler for CS internal firmware fault. * * @kbdev: Pointer to kbase device @@ -1479,6 +1563,7 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } + static void global_init(struct kbase_device *const kbdev, u64 core_mask) { u32 const ack_irq_mask = @@ -1660,7 +1745,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) if (version != kbdev->csf.global_iface.version) dev_err(kbdev->dev, "Version check failed in firmware reboot."); - KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u); + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u); /* Tell MCU state machine to transit to next state */ kbdev->csf.firmware_reloaded = true; @@ -1694,8 +1779,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter format with firmware idle hysteresis!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } /* Formula for dur_val = ((dur_us/MICROSECONDS_PER_SECOND) * freq_HZ) >> 10) */ @@ -1827,8 +1913,9 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter with MCU Core Poweroff timer!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); } /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ @@ -1852,7 +1939,14 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) { - return kbdev->csf.mcu_core_pwroff_dur_us; + u32 pwroff; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return pwroff; } u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) @@ -1865,7 +1959,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 kbdev->csf.mcu_core_pwroff_dur_count = pwroff; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); + dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); return pwroff; } @@ -1947,11 +2041,28 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); + INIT_LIST_HEAD(&kbdev->csf.firmware_config); + INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); + INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_WORK(&kbdev->csf.firmware_reload_work, + kbase_csf_firmware_reload_worker); + INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + INIT_WORK(&kbdev->csf.coredump_work, coredump_worker); + + mutex_init(&kbdev->csf.reg_lock); + + kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; + + return 0; +} + +int kbase_csf_firmware_late_init(struct kbase_device *kbdev) +{ kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= - FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); @@ -1959,7 +2070,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US); - /* Set to the lowest posssible value for FW to immediately write + /* Set to the lowest possible value for FW to immediately write * to the power off register to disable the cores. */ kbdev->csf.mcu_core_pwroff_dur_count = 1; @@ -1971,23 +2082,13 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); #endif - INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); - INIT_LIST_HEAD(&kbdev->csf.firmware_config); - INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); - INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); - INIT_WORK(&kbdev->csf.firmware_reload_work, - kbase_csf_firmware_reload_worker); - INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); - INIT_WORK(&kbdev->csf.coredump_work, coredump_worker); - - mutex_init(&kbdev->csf.reg_lock); - return 0; } -int kbase_csf_firmware_init(struct kbase_device *kbdev) +int kbase_csf_firmware_load_init(struct kbase_device *kbdev) { - const struct firmware *firmware; + const struct firmware *firmware = NULL; + struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; const u32 magic = FIRMWARE_HEADER_MAGIC; u8 version_major, version_minor; u32 version_hash; @@ -2014,7 +2115,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) if (ret != 0) { dev_err(kbdev->dev, "Failed to setup the rb tree for managing shared interface segment\n"); - goto error; + goto err_out; } if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { @@ -2022,43 +2123,59 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) "Failed to load firmware image '%s'\n", fw_name); ret = -ENOENT; - goto error; + } else { + /* Try to save a copy and then release the loaded firmware image */ + mcu_fw->size = firmware->size; + mcu_fw->data = vmalloc((unsigned long)mcu_fw->size); + + if (mcu_fw->data == NULL) { + ret = -ENOMEM; + } else { + memcpy(mcu_fw->data, firmware->data, mcu_fw->size); + dev_dbg(kbdev->dev, "Firmware image (%zu-bytes) retained in csf.fw\n", + mcu_fw->size); + } + + release_firmware(firmware); } - if (firmware->size < FIRMWARE_HEADER_LENGTH) { + /* If error in loading or saving the image, branches to error out */ + if (ret) + goto err_out; + + if (mcu_fw->size < FIRMWARE_HEADER_LENGTH) { dev_err(kbdev->dev, "Firmware too small\n"); ret = -EINVAL; - goto error; + goto err_out; } - if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { + if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { dev_err(kbdev->dev, "Incorrect firmware magic\n"); ret = -EINVAL; - goto error; + goto err_out; } - version_minor = firmware->data[4]; - version_major = firmware->data[5]; + version_minor = mcu_fw->data[4]; + version_major = mcu_fw->data[5]; if (version_major != FIRMWARE_HEADER_VERSION) { dev_err(kbdev->dev, "Firmware header version %d.%d not understood\n", version_major, version_minor); ret = -EINVAL; - goto error; + goto err_out; } - memcpy(&version_hash, &firmware->data[8], sizeof(version_hash)); + memcpy(&version_hash, &mcu_fw->data[8], sizeof(version_hash)); dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash); - memcpy(&entry_end_offset, &firmware->data[0x10], - sizeof(entry_end_offset)); + memcpy(&entry_end_offset, &mcu_fw->data[0x10], sizeof(entry_end_offset)); - if (entry_end_offset > firmware->size) { + if (entry_end_offset > mcu_fw->size) { dev_err(kbdev->dev, "Firmware image is truncated\n"); ret = -EINVAL; - goto error; + goto err_out; } entry_offset = FIRMWARE_HEADER_LENGTH; @@ -2066,15 +2183,14 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) u32 header; unsigned int size; - memcpy(&header, &firmware->data[entry_offset], sizeof(header)); + memcpy(&header, &mcu_fw->data[entry_offset], sizeof(header)); size = entry_size(header); - ret = load_firmware_entry(kbdev, firmware, entry_offset, - header); + ret = load_firmware_entry(kbdev, mcu_fw, entry_offset, header); if (ret != 0) { dev_err(kbdev->dev, "Failed to load firmware image\n"); - goto error; + goto err_out; } entry_offset += size; } @@ -2082,25 +2198,25 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) if (!kbdev->csf.shared_interface) { dev_err(kbdev->dev, "Shared interface region not found\n"); ret = -EINVAL; - goto error; + goto err_out; } else { ret = setup_shared_iface_static_region(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); - goto error; + goto err_out; } } ret = kbase_csf_firmware_trace_buffers_init(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to initialize trace buffers\n"); - goto error; + goto err_out; } ret = kbasep_platform_fw_config_init(kbdev); if (ret != 0) { dev_err(kbdev->dev, "Failed to perform platform specific FW configuration"); - goto error; + goto err_out; } /* Make sure L2 cache is powered up */ @@ -2113,50 +2229,54 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) ret = parse_capabilities(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_doorbell_mapping_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_scheduler_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_setup_dummy_user_reg_page(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_timeout_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = global_init_on_boot(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_csf_firmware_cfg_init(kbdev); if (ret != 0) - goto error; + goto err_out; ret = kbase_device_csf_iterator_trace_init(kbdev); if (ret != 0) - goto error; + goto err_out; - /* Firmware loaded successfully */ - release_firmware(firmware); - KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL, + ret = kbase_csf_firmware_log_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); + goto err_out; + } + + /* Firmware loaded successfully, ret = 0 */ + KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, (((u64)version_hash) << 32) | (((u64)version_major) << 8) | version_minor); return 0; -error: - kbase_csf_firmware_term(kbdev); - release_firmware(firmware); +err_out: + kbase_csf_firmware_unload_term(kbdev); return ret; } -void kbase_csf_firmware_term(struct kbase_device *kbdev) +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) { unsigned long flags; int ret = 0; @@ -2167,6 +2287,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) WARN(ret, "failed to wait for GPU reset"); + kbase_csf_firmware_log_term(kbdev); + kbase_csf_firmware_cfg_term(kbdev); kbase_csf_timeout_term(kbdev); @@ -2242,6 +2364,13 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) kfree(metadata); } + if (kbdev->csf.fw.data) { + /* Free the copy of the firmware image */ + vfree(kbdev->csf.fw.data); + kbdev->csf.fw.data = NULL; + dev_dbg(kbdev->dev, "Free retained image csf.fw (%zu-bytes)\n", kbdev->csf.fw.size); + } + /* This will also free up the region allocated for the shared interface * entry parsed from the firmware image. */ @@ -2344,10 +2473,46 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) { - int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); + int err; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); + + if (!err) { +#define WAIT_TIMEOUT 5000 /* 50ms timeout */ +#define DELAY_TIME_IN_US 10 + const int max_iterations = WAIT_TIMEOUT; + int loop; + + /* Wait for the GPU to actually enter protected mode */ + for (loop = 0; loop < max_iterations; loop++) { + unsigned long flags; + bool pmode_exited; + + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE) + break; + + /* Check if GPU already exited the protected mode */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + pmode_exited = + !kbase_csf_scheduler_protected_mode_in_use(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + if (pmode_exited) + break; + + udelay(DELAY_TIME_IN_US); + } + + if (loop == max_iterations) { + dev_err(kbdev->dev, "Timeout for actual pmode entry after PROTM_ENTER ack"); + err = -ETIMEDOUT; + } + } if (err) { - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -2555,7 +2720,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( gpu_map_prot = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(cpu_map_prot); - }; + } phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); if (!phys) @@ -2656,3 +2821,4 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } + diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index 74bae39..edb1563 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -246,6 +246,7 @@ void kbase_csf_firmware_csg_input_mask( u32 kbase_csf_firmware_csg_output( const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + /** * struct kbase_csf_global_iface - Global CSF interface * provided by the firmware. @@ -324,24 +325,13 @@ u32 kbase_csf_firmware_global_input_read( u32 kbase_csf_firmware_global_output( const struct kbase_csf_global_iface *iface, u32 offset); -/* Calculate the offset to the Hw doorbell page corresponding to the - * doorbell number. +/** + * kbase_csf_ring_doorbell() - Ring the doorbell + * + * @kbdev: An instance of the GPU platform device + * @doorbell_nr: Index of the HW doorbell page */ -static u32 csf_doorbell_offset(int doorbell_nr) -{ - WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); - - return CSF_HW_DOORBELL_PAGE_OFFSET + - (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); -} - -static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev, - int doorbell_nr) -{ - WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); - - kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); -} +void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr); /** * kbase_csf_read_firmware_memory - Read a value in a GPU address @@ -374,7 +364,7 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value); /** - * kbase_csf_firmware_early_init() - Early initializatin for the firmware. + * kbase_csf_firmware_early_init() - Early initialization for the firmware. * @kbdev: Kbase device * * Initialize resources related to the firmware. Must be called at kbase probe. @@ -384,22 +374,33 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, int kbase_csf_firmware_early_init(struct kbase_device *kbdev); /** - * kbase_csf_firmware_init() - Load the firmware for the CSF MCU + * kbase_csf_firmware_late_init() - Late initialization for the firmware. + * @kbdev: Kbase device + * + * Initialize resources related to the firmware. But must be called after + * backend late init is done. Must be used at probe time only. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_late_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU * @kbdev: Kbase device * * Request the firmware from user space and load it into memory. * * Return: 0 if successful, negative error code on failure */ -int kbase_csf_firmware_init(struct kbase_device *kbdev); +int kbase_csf_firmware_load_init(struct kbase_device *kbdev); /** - * kbase_csf_firmware_term() - Unload the firmware + * kbase_csf_firmware_unload_term() - Unload the firmware * @kbdev: Kbase device * - * Frees the memory allocated by kbase_csf_firmware_init() + * Frees the memory allocated by kbase_csf_firmware_load_init() */ -void kbase_csf_firmware_term(struct kbase_device *kbdev); +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev); /** * kbase_csf_firmware_ping - Send the ping request to firmware. @@ -454,8 +455,8 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev); * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * - * This function needs to be called after kbase_csf_wait_protected_mode_enter() - * to wait for the protected mode entry to complete. GPU reset is triggered if + * This function needs to be called after kbase_csf_enter_protected_mode() to + * wait for the GPU to actually enter protected mode. GPU reset is triggered if * the wait is unsuccessful. */ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev); @@ -523,9 +524,9 @@ bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev); #endif /** - * kbase_trigger_firmware_reload - Trigger the reboot of MCU firmware, for the - * cold boot case firmware image would be - * reloaded from filesystem into memory. + * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for + * the cold boot case firmware image would + * be reloaded from filesystem into memory. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ @@ -738,18 +739,18 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev); u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur); /** - * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU core power-off + * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU shader Core power-off * time value * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * - * Return: the internally recorded MCU core power-off (nominal) value. The unit + * Return: the internally recorded MCU shader Core power-off (nominal) timeout value. The unit * of the value is in micro-seconds. */ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); /** - * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU core power-off + * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU shader Core power-off * time value * * @kbdev: Instance of a GPU platform device that implements a CSF interface. @@ -766,7 +767,7 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); * returned value is the source configuration flag, and it is set to '1' * when CYCLE_COUNTER alternative source is used. * - * The configured MCU core power-off timer will only have effect when the host + * The configured MCU shader Core power-off timer will only have effect when the host * driver has delegated the shader cores' power management to MCU. * * Return: the actual internal core power-off timer value in register defined @@ -805,4 +806,6 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) * Return: 0 if success, or negative error code on failure. */ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); + + #endif diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c index b270c6e..ef8f328 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,7 @@ #include <mali_kbase.h> #include "mali_kbase_csf_firmware_cfg.h" #include <mali_kbase_reset_gpu.h> +#include <linux/version.h> #if CONFIG_SYSFS #define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config" @@ -209,11 +210,18 @@ static struct attribute *fw_cfg_attrs[] = { &fw_cfg_attr_cur, NULL, }; +#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE) +ATTRIBUTE_GROUPS(fw_cfg); +#endif static struct kobj_type fw_cfg_kobj_type = { .release = &fw_cfg_kobj_release, .sysfs_ops = &fw_cfg_ops, +#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE) + .default_groups = fw_cfg_groups, +#else .default_attrs = fw_cfg_attrs, +#endif }; int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) @@ -273,9 +281,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) } int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, - const struct firmware *fw, - const u32 *entry, - unsigned int size, bool updatable) + const struct kbase_csf_mcu_fw *const fw, + const u32 *entry, unsigned int size, bool updatable) { const char *name = (char *)&entry[3]; struct firmware_config *config; @@ -336,8 +343,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) } int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, - const struct firmware *fw, - const u32 *entry, unsigned int size) + const struct kbase_csf_mcu_fw *const fw, + const u32 *entry, unsigned int size) { return 0; } diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h index edf62ed..770fedb 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,10 +67,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); * Return: 0 if successful, negative error code on failure */ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, - const struct firmware *fw, - const u32 *entry, - unsigned int size, - bool updatable); + const struct kbase_csf_mcu_fw *const fw, + const u32 *entry, unsigned int size, bool updatable); /** * kbase_csf_firmware_cfg_find_config_address() - Get a FW config option address diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c new file mode 100644 index 0000000..bfcc6c8 --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include <mali_kbase.h> +#include <csf/mali_kbase_csf_firmware_log.h> +#include <csf/mali_kbase_csf_trace_buffer.h> +#include <linux/debugfs.h> +#include <linux/string.h> +#include <linux/workqueue.h> + +#if defined(CONFIG_DEBUG_FS) + +static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + /* The enabled traces limited to u64 here, regarded practical */ + *val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb); + return 0; +} + +static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + u64 new_mask; + unsigned int enable_bits_count; + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + + /* Ignore unsupported types */ + enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); + if (enable_bits_count > 64) { + dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); + enable_bits_count = 64; + } + new_mask = val & ((1 << enable_bits_count) - 1); + + if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) + return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); + else + return 0; +} + +static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + file->private_data = kbdev; + dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); + + return 0; +} + +static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct kbase_device *kbdev = file->private_data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + unsigned int n_read; + unsigned long not_copied; + /* Limit reads to the kernel dump buffer size */ + size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE); + int ret; + + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return -EBUSY; + + /* Reading from userspace is only allowed in manual mode */ + if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) { + ret = -EINVAL; + goto out; + } + + n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem); + + /* Do the copy, if we have obtained some trace data */ + not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0; + + if (not_copied) { + dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); + ret = -EFAULT; + goto out; + } + + *ppos += n_read; + ret = n_read; + +out: + atomic_set(&fw_log->busy, 0); + return ret; +} + +static int kbase_csf_firmware_log_mode_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + *val = fw_log->mode; + return 0; +} + +static int kbase_csf_firmware_log_mode_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + int ret = 0; + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return -EBUSY; + + if (val == fw_log->mode) + goto out; + + switch (val) { + case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: + cancel_delayed_work_sync(&fw_log->poll_work); + break; + case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + break; + default: + ret = -EINVAL; + goto out; + } + + fw_log->mode = val; + +out: + atomic_set(&fw_log->busy, 0); + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops, + kbase_csf_firmware_log_enable_mask_read, + kbase_csf_firmware_log_enable_mask_write, "%llx\n"); + +static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_csf_firmware_log_debugfs_open, + .read = kbasep_csf_firmware_log_debugfs_read, + .llseek = no_llseek, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read, + kbase_csf_firmware_log_mode_write, "%llu\n"); + +#endif /* CONFIG_DEBUG_FS */ + +static void kbase_csf_firmware_log_poll(struct work_struct *work) +{ + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.fw_log.poll_work.work); + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + + kbase_csf_firmware_log_dump_buffer(kbdev); +} + +int kbase_csf_firmware_log_init(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + /* Add one byte for null-termination */ + fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL); + if (fw_log->dump_buf == NULL) + return -ENOMEM; + + /* Ensure null-termination for all strings */ + fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0; + + fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL; + + atomic_set(&fw_log->busy, 0); + INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); + +#if defined(CONFIG_DEBUG_FS) + debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_log_enable_mask_fops); + debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_firmware_log_debugfs_fops); + debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_log_mode_fops); +#endif /* CONFIG_DEBUG_FS */ + + return 0; +} + +void kbase_csf_firmware_log_term(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + cancel_delayed_work_sync(&fw_log->poll_work); + kfree(fw_log->dump_buf); +} + +void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf; + unsigned int read_size, remaining_size; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + + if (tb == NULL) { + dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); + return; + } + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return; + + /* FW should only print complete messages, so there's no need to handle + * partial messages over multiple invocations of this function + */ + + p = buf; + pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE]; + + while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) { + pend = p + read_size; + p = buf; + + while (p < pend && (pnewline = memchr(p, '\n', pend - p))) { + /* Null-terminate the string */ + *pnewline = 0; + + dev_err(kbdev->dev, "FW> %s", p); + + p = pnewline + 1; + } + + remaining_size = pend - p; + + if (!remaining_size) { + p = buf; + } else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) { + /* Copy unfinished string to the start of the buffer */ + memmove(buf, p, remaining_size); + p = &buf[remaining_size]; + } else { + /* Print abnormally long string without newlines */ + dev_err(kbdev->dev, "FW> %s", buf); + p = buf; + } + } + + if (p != buf) { + /* Null-terminate and print last unfinished string */ + *p = 0; + dev_err(kbdev->dev, "FW> %s", buf); + } + + atomic_set(&fw_log->busy, 0); +} diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.h b/mali_kbase/csf/mali_kbase_csf_firmware_log.h new file mode 100644 index 0000000..6655f6f --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_LOG_H_ +#define _KBASE_CSF_FIRMWARE_LOG_H_ + +#include <mali_kbase.h> + +/* + * Firmware log dumping buffer size. + */ +#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE + +/** + * kbase_csf_firmware_log_init - Initialize firmware log handling. + * + * @kbdev: Pointer to the Kbase device + * + * Return: The initialization error code. + */ +int kbase_csf_firmware_log_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_log_term - Terminate firmware log handling. + * + * @kbdev: Pointer to the Kbase device + */ +void kbase_csf_firmware_log_term(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log + * buffer and print it to dmesg. + * + * @kbdev: Pointer to the Kbase device + */ +void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev); + +#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index 8a961a7..d03cf73 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,6 +32,7 @@ #include "mali_kbase_csf_scheduler.h" #include "mmu/mali_kbase_mmu.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include <backend/gpu/mali_kbase_model_dummy.h> #include <linux/list.h> #include <linux/slab.h> @@ -103,6 +104,7 @@ struct dummy_firmware_interface { (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) + static inline u32 input_page_read(const u32 *const input, const u32 offset) { WARN_ON(offset % sizeof(u32)); @@ -227,7 +229,8 @@ static int invent_capabilities(struct kbase_device *kbdev) iface->version = 1; iface->kbdev = kbdev; iface->features = 0; - iface->prfcnt_size = 64; + iface->prfcnt_size = + GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE); if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { /* update rate=1, max event size = 1<<8 = 256 */ @@ -371,37 +374,6 @@ u32 kbase_csf_firmware_csg_output( } KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); -static void -csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface, - const u32 glb_req) -{ - struct kbase_device *kbdev = iface->kbdev; - u32 glb_ack = output_page_read(iface->output, GLB_ACK); - /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of - * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters. - */ - if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { - /* NO_MALI only uses the first buffer in the ring buffer. */ - input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0); - output_page_write(iface->output, GLB_PRFCNT_INSERT, 1); - kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE); - } - - /* Propagate enable masks to model if request to enable. */ - if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) { - u32 tiler_en, l2_en, sc_en; - - tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN); - l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN); - sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN); - - /* NO_MALI platform enabled all CSHW counters by default. */ - kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en); - kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en); - kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en); - } -} - void kbase_csf_firmware_global_input( const struct kbase_csf_global_iface *const iface, const u32 offset, const u32 value) @@ -412,9 +384,17 @@ void kbase_csf_firmware_global_input( input_page_write(iface->input, offset, value); if (offset == GLB_REQ) { - csf_firmware_prfcnt_process(iface, value); - /* NO_MALI: Immediately acknowledge requests */ - output_page_write(iface->output, GLB_ACK, value); + /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE + * and PRFCNT_SAMPLE. These will be processed along with the + * corresponding performance counter registers when the global doorbell + * is rung in order to emulate the performance counter sampling behavior + * of the real firmware. + */ + const u32 ack = output_page_read(iface->output, GLB_ACK); + const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK); + const u32 toggled = (value ^ ack) & req_mask; + + output_page_write(iface->output, GLB_ACK, ack ^ toggled); } } KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); @@ -455,6 +435,99 @@ u32 kbase_csf_firmware_global_output( KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); /** + * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request + * + * @kbdev: An instance of the GPU platform device + */ +static void csf_doorbell_prfcnt(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *iface; + u32 req; + u32 ack; + u32 extract_index; + + if (WARN_ON(!kbdev)) + return; + + iface = &kbdev->csf.global_iface; + + req = input_page_read(iface->input, GLB_REQ); + ack = output_page_read(iface->output, GLB_ACK); + extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT); + + /* Process enable bit toggle */ + if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) { + if (req & GLB_REQ_PRFCNT_ENABLE_MASK) { + /* Reset insert index to zero on enable bit set */ + output_page_write(iface->output, GLB_PRFCNT_INSERT, 0); + WARN_ON(extract_index != 0); + } + ack ^= GLB_REQ_PRFCNT_ENABLE_MASK; + } + + /* Process sample request */ + if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { + const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET( + input_page_read(iface->input, GLB_PRFCNT_CONFIG)); + u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT); + + const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; + const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; + + /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */ + if (insert_index - extract_index >= ring_size) { + WARN_ON(insert_index - extract_index > ring_size); + if (!prev_overflow) + ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; + } else { + struct gpu_model_prfcnt_en enable_maps = { + .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN), + .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN), + .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN), + .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN), + }; + + const u64 prfcnt_base = + input_page_read(iface->input, GLB_PRFCNT_BASE_LO) + + ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32); + + u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base + + (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE * + (insert_index % ring_size)); + + /* trigger sample dump in the dummy model */ + gpu_model_prfcnt_dump_request(sample_base, enable_maps); + + /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */ + output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index); + ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK; + } + + /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */ + if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2))) + ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; + } + + /* Update GLB_ACK */ + output_page_write(iface->output, GLB_ACK, ack); +} + +void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ + WARN_ON(doorbell_nr < 0); + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + if (WARN_ON(!kbdev)) + return; + + if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) { + csf_doorbell_prfcnt(kbdev); + gpu_model_glb_request_job_irq(kbdev->model); + } +} +EXPORT_SYMBOL(kbase_csf_ring_doorbell); + +/** * handle_internal_firmware_fatal - Handler for CS internal firmware fault. * * @kbdev: Pointer to kbase device @@ -631,17 +704,16 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbdev->csf.gpu_idle_dur_count); } + static void global_init(struct kbase_device *const kbdev, u64 core_mask) { - u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | - GLB_ACK_IRQ_MASK_PING_MASK | - GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | - GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | - GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | - GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | - GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | - GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | - GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; + u32 const ack_irq_mask = + GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | + GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | + GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | + 0; const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -797,8 +869,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter format with firmware idle hysteresis!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ @@ -914,8 +987,9 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); spin_unlock(&kbdev->pm.clk_rtm.lock); - dev_info(kbdev->dev, "Can't get the timestamp frequency, " - "use cycle counter with MCU Core Poweroff timer!"); + dev_info( + kbdev->dev, + "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); } /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ @@ -939,7 +1013,14 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) { - return kbdev->csf.mcu_core_pwroff_dur_us; + u32 pwroff; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return pwroff; } u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) @@ -952,7 +1033,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 kbdev->csf.mcu_core_pwroff_dur_count = pwroff; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); + dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); return pwroff; } @@ -965,16 +1046,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; -#ifdef KBASE_PM_RUNTIME - if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= - FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; -#endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); - kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, kbdev->csf.gpu_idle_hysteresis_ms); - INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); @@ -987,7 +1058,21 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) return 0; } -int kbase_csf_firmware_init(struct kbase_device *kbdev) +int kbase_csf_firmware_late_init(struct kbase_device *kbdev) +{ + kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_gpu_sleep_allowed(kbdev)) + kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; +#endif + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + kbdev->csf.gpu_idle_dur_count = + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + + return 0; +} + +int kbase_csf_firmware_load_init(struct kbase_device *kbdev) { int ret; @@ -1053,11 +1138,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) return 0; error: - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); return ret; } -void kbase_csf_firmware_term(struct kbase_device *kbdev) +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) { cancel_work_sync(&kbdev->csf.fw_error_work); @@ -1392,7 +1477,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( gpu_map_prot = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); cpu_map_prot = pgprot_writecombine(cpu_map_prot); - }; + } phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); if (!phys) @@ -1430,9 +1515,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init( gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); gpu_map_properties |= gpu_map_prot; - ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, - va_reg->start_pfn, &phys[0], num_pages, - gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, + &phys[0], num_pages, gpu_map_properties, + KBASE_MEM_GROUP_CSF_FW, NULL); if (ret) goto mmu_insert_pages_error; @@ -1493,3 +1578,4 @@ void kbase_csf_firmware_mcu_shared_mapping_term( vunmap(csf_mapping->cpu_addr); kfree(csf_mapping->phys); } + diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c index 4b3931f..1876d50 100644 --- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c +++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,8 +154,8 @@ u64 kbase_csf_heap_context_allocator_alloc( struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE; + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | + BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); u64 heap_gpu_va = 0; @@ -164,10 +164,6 @@ u64 kbase_csf_heap_context_allocator_alloc( */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -#ifdef CONFIG_MALI_VECTOR_DUMP - flags |= BASE_MEM_PROT_CPU_RD; -#endif - mutex_lock(&ctx_alloc->lock); /* If the pool of heap contexts wasn't already allocated then diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index 2991060..1321d06 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -33,6 +33,10 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock); #endif +#ifdef CONFIG_MALI_FENCE_DEBUG +#define FENCE_WAIT_TIMEOUT_MS 3000 +#endif + static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue); @@ -51,7 +55,7 @@ static int kbase_kcpu_map_import_prepare( long i; int ret = 0; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); /* Take the processes mmap lock */ down_read(kbase_mem_get_process_mmap_lock()); @@ -110,7 +114,7 @@ static int kbase_kcpu_unmap_import_prepare_internal( struct kbase_va_region *reg; int ret = 0; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); kbase_gpu_vm_lock(kctx); @@ -178,7 +182,9 @@ static void kbase_jit_add_to_pending_alloc_list( &kctx->csf.kcpu_queues.jit_blocked_queues; struct kbase_kcpu_command_queue *blocked_queue; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + + spin_lock(&kctx->csf.kcpu_queues.jit_lock); list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, @@ -194,6 +200,8 @@ static void kbase_jit_add_to_pending_alloc_list( } list_add_tail(&queue->jit_blocked, target_list_head); + + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); } /** @@ -223,10 +231,12 @@ static int kbase_kcpu_jit_allocate_process( u32 i; int ret; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (alloc_info->blocked) { + spin_lock(&kctx->csf.kcpu_queues.jit_lock); list_del(&queue->jit_blocked); + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); alloc_info->blocked = false; } @@ -250,6 +260,7 @@ static int kbase_kcpu_jit_allocate_process( bool can_block = false; struct kbase_kcpu_command const *jit_cmd; + spin_lock(&kctx->csf.kcpu_queues.jit_lock); list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) { if (jit_cmd == cmd) break; @@ -268,6 +279,7 @@ static int kbase_kcpu_jit_allocate_process( } } } + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); if (!can_block) { /* @@ -350,7 +362,7 @@ static int kbase_kcpu_jit_allocate_prepare( int ret = 0; u32 i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (!data || count > kcpu_queue->kctx->jit_max_allocations || count > ARRAY_SIZE(kctx->jit_alloc)) { @@ -388,8 +400,10 @@ static int kbase_kcpu_jit_allocate_prepare( } current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; + spin_lock(&kctx->csf.kcpu_queues.jit_lock); list_add_tail(¤t_command->info.jit_alloc.node, &kctx->csf.kcpu_queues.jit_cmds_head); + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); current_command->info.jit_alloc.info = info; current_command->info.jit_alloc.count = count; current_command->info.jit_alloc.blocked = false; @@ -411,7 +425,9 @@ static void kbase_kcpu_jit_allocate_finish( struct kbase_kcpu_command_queue *queue, struct kbase_kcpu_command *cmd) { - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + + spin_lock(&queue->kctx->csf.kcpu_queues.jit_lock); /* Remove this command from the jit_cmds_head list */ list_del(&cmd->info.jit_alloc.node); @@ -425,6 +441,8 @@ static void kbase_kcpu_jit_allocate_finish( cmd->info.jit_alloc.blocked = false; } + spin_unlock(&queue->kctx->csf.kcpu_queues.jit_lock); + kfree(cmd->info.jit_alloc.info); } @@ -437,8 +455,6 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) { struct kbase_kcpu_command_queue *blocked_queue; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); - /* * Reschedule all queues blocked by JIT_ALLOC commands. * NOTE: This code traverses the list of blocked queues directly. It @@ -446,10 +462,10 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) * time. This precondition is true since we're holding the * kbase_csf_kcpu_queue_context.lock . */ - list_for_each_entry(blocked_queue, - &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) - kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker, - &blocked_queue->work); + spin_lock(&kctx->csf.kcpu_queues.jit_lock); + list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) + kthread_queue_work(&blocked_queue->csf_kcpu_worker, &blocked_queue->work); + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); } static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, @@ -466,7 +482,7 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, if (WARN_ON(!ids)) return -EINVAL; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev, queue); @@ -498,16 +514,18 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, queue->kctx->kbdev, queue, item_err, pages_used); } - /* Free the list of ids */ - kfree(ids); - /* * Remove this command from the jit_cmds_head list and retry pending * allocations. */ + spin_lock(&kctx->csf.kcpu_queues.jit_lock); list_del(&cmd->info.jit_free.node); + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); kbase_kcpu_jit_retry_pending_allocs(kctx); + /* Free the list of ids */ + kfree(ids); + return rc; } @@ -523,7 +541,7 @@ static int kbase_kcpu_jit_free_prepare( int ret; u32 i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); /* Sanity checks */ if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { @@ -569,8 +587,10 @@ static int kbase_kcpu_jit_free_prepare( } current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; + spin_lock(&kctx->csf.kcpu_queues.jit_lock); list_add_tail(¤t_command->info.jit_free.node, &kctx->csf.kcpu_queues.jit_cmds_head); + spin_unlock(&kctx->csf.kcpu_queues.jit_lock); current_command->info.jit_free.ids = ids; current_command->info.jit_free.count = count; @@ -598,7 +618,7 @@ static int kbase_csf_queue_group_suspend_prepare( int pinned_pages = 0, ret = 0; struct kbase_va_region *reg; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (suspend_buf->size < csg_suspend_buf_size) return -EINVAL; @@ -700,10 +720,8 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; - struct kbase_context *const kctx = kcpu_queue->kctx; - kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker, - &kcpu_queue->work); + kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -733,7 +751,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, { u32 i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_wait->objs)) return -EINVAL; @@ -750,7 +768,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev, queue); queue->command_started = true; - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START, + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START, queue, cqs_wait->nr_objs, 0); } @@ -772,7 +790,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, error = true; } - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END, + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END, queue, cqs_wait->objs[i].addr, error); @@ -801,7 +819,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, struct base_cqs_wait_info *objs; unsigned int nr_objs = cqs_wait_info->nr_objs; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -855,7 +873,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, { unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_set->objs)) return; @@ -879,7 +897,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, evt[BASEP_EVENT_VAL_INDEX]++; kbase_phy_alloc_mapping_put(queue->kctx, mapping); - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET, + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr, evt[BASEP_EVENT_ERR_INDEX]); } @@ -896,11 +914,10 @@ static int kbase_kcpu_cqs_set_prepare( struct base_kcpu_command_cqs_set_info *cqs_set_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; struct base_cqs_set *objs; unsigned int nr_objs = cqs_set_info->nr_objs; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -950,7 +967,7 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, { u32 i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_wait_operation->objs)) return -EINVAL; @@ -1037,7 +1054,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue struct base_cqs_wait_operation_info *objs; unsigned int nr_objs = cqs_wait_operation_info->nr_objs; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -1092,7 +1109,7 @@ static void kbase_kcpu_cqs_set_operation_process( { unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_set_operation->objs)) return; @@ -1159,11 +1176,10 @@ static int kbase_kcpu_cqs_set_operation_prepare( struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; struct base_cqs_set_operation_info *objs; unsigned int nr_objs = cqs_set_operation_info->nr_objs; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -1202,12 +1218,15 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; struct kbase_context *const kctx = kcpu_queue->kctx; - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue, +#ifdef CONFIG_MALI_FENCE_DEBUG + /* Fence gets signaled. Deactivate the timer for fence-wait timeout */ + del_timer(&kcpu_queue->fence_timeout); +#endif + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, fence->context, fence->seqno); /* Resume kcpu command queue processing. */ - kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker, - &kcpu_queue->work); + kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work); } static void kbase_kcpu_fence_wait_cancel( @@ -1216,7 +1235,7 @@ static void kbase_kcpu_fence_wait_cancel( { struct kbase_context *const kctx = kcpu_queue->kctx; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (WARN_ON(!fence_info->fence)) return; @@ -1225,8 +1244,15 @@ static void kbase_kcpu_fence_wait_cancel( bool removed = dma_fence_remove_callback(fence_info->fence, &fence_info->fence_cb); +#ifdef CONFIG_MALI_FENCE_DEBUG + /* Fence-wait cancelled or fence signaled. In the latter case + * the timer would already have been deactivated inside + * kbase_csf_fence_wait_callback(). + */ + del_timer_sync(&kcpu_queue->fence_timeout); +#endif if (removed) - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, fence_info->fence->context, fence_info->fence->seqno); } @@ -1238,6 +1264,80 @@ static void kbase_kcpu_fence_wait_cancel( fence_info->fence = NULL; } +#ifdef CONFIG_MALI_FENCE_DEBUG +/** + * fence_timeout_callback() - Timeout callback function for fence-wait + * + * @timer: Timer struct + * + * Context and seqno of the timed-out fence will be displayed in dmesg. + * If the fence has been signalled a work will be enqueued to process + * the fence-wait without displaying debugging information. + */ +static void fence_timeout_callback(struct timer_list *timer) +{ + struct kbase_kcpu_command_queue *kcpu_queue = + container_of(timer, struct kbase_kcpu_command_queue, fence_timeout); + struct kbase_context *const kctx = kcpu_queue->kctx; + struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset]; + struct kbase_kcpu_command_fence_info *fence_info; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct kbase_sync_fence_info info; + + if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) { + dev_err(kctx->kbdev->dev, + "%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__, + cmd->type, kctx->tgid, kctx->id, kcpu_queue->id); + return; + } + + fence_info = &cmd->info.fence; + + fence = kbase_fence_get(fence_info); + if (!fence) { + dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid, + kctx->id, kcpu_queue->id); + return; + } + + kbase_sync_fence_info_get(fence, &info); + + if (info.status == 1) { + kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work); + } else if (info.status == 0) { + dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", + FENCE_WAIT_TIMEOUT_MS); + dev_warn(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s", + kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name); + } else { + dev_warn(kctx->kbdev->dev, "fence has got error"); + dev_warn(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)", + kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status); + } + + kbase_fence_put(fence); +} + +/** + * fence_timeout_start() - Start a timer to check fence-wait timeout + * + * @cmd: KCPU command queue + * + * Activate a timer to check whether a fence-wait command in the queue + * gets completed within FENCE_WAIT_TIMEOUT_MS + */ +static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) +{ + mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); +} +#endif + /** * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command * @@ -1257,8 +1357,9 @@ static int kbase_kcpu_fence_wait_process( #else struct dma_fence *fence; #endif + struct kbase_context *const kctx = kcpu_queue->kctx; - lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (WARN_ON(!fence_info->fence)) return -EINVAL; @@ -1272,14 +1373,26 @@ static int kbase_kcpu_fence_wait_process( &fence_info->fence_cb, kbase_csf_fence_wait_callback); - KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev, - FENCE_WAIT_START, kcpu_queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, + KCPU_FENCE_WAIT_START, kcpu_queue, fence->context, fence->seqno); fence_status = cb_err; - if (cb_err == 0) + if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; - else if (cb_err == -ENOENT) +#ifdef CONFIG_MALI_FENCE_DEBUG + fence_timeout_start(kcpu_queue); +#endif + } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); + if (!fence_status) { + struct kbase_sync_fence_info info; + + kbase_sync_fence_info_get(fence, &info); + dev_warn(kctx->kbdev->dev, + "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", + info.name, kctx->tgid, kctx->id, kcpu_queue->id); + } + } } /* @@ -1302,7 +1415,6 @@ static int kbase_kcpu_fence_wait_prepare( struct base_kcpu_command_fence_info *fence_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; #else @@ -1310,7 +1422,7 @@ static int kbase_kcpu_fence_wait_prepare( #endif struct base_fence fence; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) @@ -1324,7 +1436,6 @@ static int kbase_kcpu_fence_wait_prepare( current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT; current_command->info.fence.fence = fence_in; current_command->info.fence.kcpu_queue = kcpu_queue; - return 0; } @@ -1341,14 +1452,16 @@ static int kbase_kcpu_fence_signal_process( ret = dma_fence_signal(fence_info->fence); if (unlikely(ret < 0)) { - dev_warn(kctx->kbdev->dev, - "fence_signal() failed with %d\n", ret); + dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret); + /* Treated as a success */ + ret = 0; } - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, fence_info->fence->context, fence_info->fence->seqno); + /* dma_fence refcount needs to be decreased to release it. */ dma_fence_put(fence_info->fence); fence_info->fence = NULL; @@ -1360,7 +1473,6 @@ static int kbase_kcpu_fence_signal_prepare( struct base_kcpu_command_fence_info *fence_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_out; #else @@ -1371,7 +1483,7 @@ static int kbase_kcpu_fence_signal_prepare( int ret = 0; int fd; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) @@ -1399,9 +1511,6 @@ static int kbase_kcpu_fence_signal_prepare( /* create a sync_file fd representing the fence */ sync_file = sync_file_create(fence_out); if (!sync_file) { -#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) - dma_fence_put(fence_out); -#endif ret = -ENOMEM; goto file_create_fail; } @@ -1433,8 +1542,16 @@ static int kbase_kcpu_fence_signal_prepare( fd_flags_fail: fput(sync_file->file); file_create_fail: + /* + * Upon failure, dma_fence refcount that was increased by + * dma_fence_get() or sync_file_create() needs to be decreased + * to release it. + */ dma_fence_put(fence_out); + current_command->info.fence.fence = NULL; + kfree(fence_out); + return ret; } #endif /* CONFIG_SYNC_FILE */ @@ -1444,11 +1561,9 @@ static void kcpu_queue_process_worker(struct kthread_work *data) struct kbase_kcpu_command_queue *queue = container_of(data, struct kbase_kcpu_command_queue, work); - mutex_lock(&queue->kctx->csf.kcpu_queues.lock); - + mutex_lock(&queue->lock); kcpu_queue_process(queue, false); - - mutex_unlock(&queue->kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); } static int delete_queue(struct kbase_context *kctx, u32 id) @@ -1461,9 +1576,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id) struct kbase_kcpu_command_queue *queue = kctx->csf.kcpu_queues.array[id]; - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, queue, queue->num_pending_cmds, queue->cqs_wait_count); + /* Disassociate the queue from the system to prevent further + * submissions. Draining pending commands would be acceptable + * even if a new queue is created using the same ID. + */ + kctx->csf.kcpu_queues.array[id] = NULL; + bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + mutex_lock(&queue->lock); + /* Drain the remaining work for this queue first and go past * all the waits. */ @@ -1475,17 +1601,16 @@ static int delete_queue(struct kbase_context *kctx, u32 id) /* All CQS wait commands should have been cleaned up */ WARN_ON(queue->cqs_wait_count); - kctx->csf.kcpu_queues.array[id] = NULL; - bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); - /* Fire the tracepoint with the mutex held to enforce correct * ordering with the summary stream. */ KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); - mutex_unlock(&kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); + + kbase_destroy_kworker_stack(&queue->csf_kcpu_worker); - kthread_cancel_work_sync(&queue->work); + mutex_destroy(&queue->lock); kfree(queue); } else { @@ -1552,7 +1677,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool process_next = true; size_t i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); for (i = 0; i != queue->num_pending_cmds; ++i) { struct kbase_kcpu_command *cmd = @@ -1971,13 +2096,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } mutex_lock(&kctx->csf.kcpu_queues.lock); + queue = kctx->csf.kcpu_queues.array[enq->id]; + mutex_unlock(&kctx->csf.kcpu_queues.lock); - if (!kctx->csf.kcpu_queues.array[enq->id]) { - ret = -EINVAL; - goto out; - } + if (queue == NULL) + return -EINVAL; - queue = kctx->csf.kcpu_queues.array[enq->id]; + mutex_lock(&queue->lock); if (kcpu_queue_get_space(queue) < enq->nr_commands) { ret = -EBUSY; @@ -1992,7 +2117,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, * for the possibility to roll back. */ - for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) { + for (i = 0; (i != enq->nr_commands) && !ret; ++i) { struct kbase_kcpu_command *kcpu_cmd = &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; struct base_kcpu_command command; @@ -2015,7 +2140,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } } - kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds; + kcpu_cmd->enqueue_ts = atomic64_read(&kctx->csf.kcpu_queues.num_cmds); switch (command.type) { case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: #if IS_ENABLED(CONFIG_SYNC_FILE) @@ -2086,6 +2211,8 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ret = -EINVAL; break; } + + atomic64_inc(&kctx->csf.kcpu_queues.num_cmds); } if (!ret) { @@ -2102,15 +2229,14 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } queue->num_pending_cmds += enq->nr_commands; - kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker, - &queue->work); + kthread_queue_work(&queue->csf_kcpu_worker, &queue->work); } else { /* Roll back the number of enqueued commands */ - kctx->csf.kcpu_queues.num_cmds -= i; + atomic64_sub(i, &kctx->csf.kcpu_queues.num_cmds); } out: - mutex_unlock(&kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); return ret; } @@ -2124,20 +2250,9 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) kctx->csf.kcpu_queues.array[idx] = NULL; - kthread_init_worker(&kctx->csf.kcpu_queues.csf_kcpu_worker); - kctx->csf.kcpu_queues.csf_kcpu_thread = kbase_create_realtime_thread( - kctx->kbdev, - kthread_worker_fn, - &kctx->csf.kcpu_queues.csf_kcpu_worker, - "mali_kbase_csf_kcpu"); - - if (IS_ERR(kctx->csf.kcpu_queues.csf_kcpu_thread)) { - return -ENOMEM; - } - mutex_init(&kctx->csf.kcpu_queues.lock); - kctx->csf.kcpu_queues.num_cmds = 0; + atomic64_set(&kctx->csf.kcpu_queues.num_cmds, 0); return 0; } @@ -2155,9 +2270,6 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) (void)delete_queue(kctx, id); } - kthread_flush_worker(&kctx->csf.kcpu_queues.csf_kcpu_worker); - kthread_stop(kctx->csf.kcpu_queues.csf_kcpu_thread); - mutex_destroy(&kctx->csf.kcpu_queues.lock); } @@ -2201,8 +2313,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, goto out; } + ret = kbase_create_realtime_thread( + kctx->kbdev, kthread_worker_fn, &queue->csf_kcpu_worker, "mali_kbase_csf_kcpu_%i", idx); + + if (ret) { + kfree(queue); + goto out; + } + bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); kctx->csf.kcpu_queues.array[idx] = queue; + mutex_init(&queue->lock); queue->kctx = kctx; queue->start_offset = 0; queue->num_pending_cmds = 0; @@ -2226,8 +2347,11 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id, queue->num_pending_cmds); - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0); +#ifdef CONFIG_MALI_FENCE_DEBUG + kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); +#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index 417a096..f982f56 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -47,9 +47,9 @@ struct kbase_kcpu_command_import_info { * struct kbase_kcpu_command_fence_info - Structure which holds information * about the fence object enqueued in the kcpu command queue * - * @fence_cb: Fence callback - * @fence: Fence - * @kcpu_queue: kcpu command queue + * @fence_cb: Fence callback + * @fence: Fence + * @kcpu_queue: kcpu command queue */ struct kbase_kcpu_command_fence_info { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) @@ -184,7 +184,7 @@ struct kbase_suspend_copy_buffer { }; /** - * struct base_kcpu_command_group_suspend - structure which contains + * struct kbase_kcpu_command_group_suspend_info - structure which contains * suspend buffer data captured for a suspended queue group. * * @sus_buf: Pointer to the structure which contains details of the @@ -198,7 +198,7 @@ struct kbase_kcpu_command_group_suspend_info { /** - * struct kbase_cpu_command - Command which is to be part of the kernel + * struct kbase_kcpu_command - Command which is to be part of the kernel * command queue * * @type: Type of the command. @@ -236,9 +236,12 @@ struct kbase_kcpu_command { /** * struct kbase_kcpu_command_queue - a command queue executed by the kernel * + * @lock: Lock to protect accesses to this queue. * @kctx: The context to which this command queue belongs. * @commands: Array of commands which have been successfully * enqueued to this command queue. + * @csf_kcpu_worker: Dedicated worker for processing kernel CPU command + * queues. * @work: struct work_struct which contains a pointer to * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; @@ -271,10 +274,13 @@ struct kbase_kcpu_command { * or without errors since last cleaned. * @jit_blocked: Used to keep track of command queues blocked * by a pending JIT allocation command. + * @fence_timeout: Timer used to detect the fence wait timeout. */ struct kbase_kcpu_command_queue { + struct mutex lock; struct kbase_context *kctx; struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; + struct kthread_worker csf_kcpu_worker; struct kthread_work work; u8 start_offset; u8 id; @@ -287,6 +293,9 @@ struct kbase_kcpu_command_queue { bool command_started; struct list_head jit_blocked; bool has_error; +#ifdef CONFIG_MALI_FENCE_DEBUG + struct timer_list fence_timeout; +#endif /* CONFIG_MALI_FENCE_DEBUG */ }; /** diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c index 0a2cde0..fa87777 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,7 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) /** - * kbasep_csf_kcpu_debugfs_print_queue() - Print additional info for KCPU + * kbasep_csf_kcpu_debugfs_print_cqs_waits() - Print additional info for KCPU * queues blocked on CQS wait commands. * * @file: The seq_file to print to @@ -167,11 +167,7 @@ static const struct file_operations kbasep_csf_kcpu_debugfs_fops = { void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) { struct dentry *file; -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h index 99de444..6cbb4f0 100644 --- a/mali_kbase/csf/mali_kbase_csf_registers.h +++ b/mali_kbase/csf/mali_kbase_csf_registers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -387,7 +387,7 @@ /* CS_BASE register */ #define CS_BASE_POINTER_SHIFT 0 -#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT) +#define CS_BASE_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_BASE_POINTER_SHIFT) #define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) #define CS_BASE_POINTER_SET(reg_val, value) \ (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) @@ -401,7 +401,8 @@ /* CS_TILER_HEAP_START register */ #define CS_TILER_HEAP_START_POINTER_SHIFT 0 -#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT) +#define CS_TILER_HEAP_START_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_START_POINTER_SHIFT) #define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) #define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ @@ -412,7 +413,8 @@ /* CS_TILER_HEAP_END register */ #define CS_TILER_HEAP_END_POINTER_SHIFT 0 -#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT) +#define CS_TILER_HEAP_END_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_END_POINTER_SHIFT) #define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) #define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ @@ -423,7 +425,7 @@ /* CS_USER_INPUT register */ #define CS_USER_INPUT_POINTER_SHIFT 0 -#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT) +#define CS_USER_INPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_INPUT_POINTER_SHIFT) #define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) #define CS_USER_INPUT_POINTER_SET(reg_val, value) \ (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ @@ -431,7 +433,7 @@ /* CS_USER_OUTPUT register */ #define CS_USER_OUTPUT_POINTER_SHIFT 0 -#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT) +#define CS_USER_OUTPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_OUTPUT_POINTER_SHIFT) #define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) #define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ @@ -470,7 +472,8 @@ /* CS_INSTR_BUFFER_BASE register */ #define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0) -#define CS_INSTR_BUFFER_BASE_POINTER_MASK ((u64)0xFFFFFFFFFFFFFFFF << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) +#define CS_INSTR_BUFFER_BASE_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) #define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \ (((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT) #define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ @@ -479,8 +482,8 @@ /* CS_INSTR_BUFFER_OFFSET_POINTER register */ #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0) -#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ - (((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ + ((GPU_ULL(0xFFFFFFFFFFFFFFFF)) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ @@ -529,7 +532,8 @@ /* CS_STATUS_CMD_PTR register */ #define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 -#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT) +#define CS_STATUS_CMD_PTR_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_CMD_PTR_POINTER_SHIFT) #define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) #define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ @@ -543,6 +547,13 @@ #define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) +#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16 +#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT) +#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT) +#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \ + (((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ @@ -608,7 +619,8 @@ /* CS_STATUS_WAIT_SYNC_POINTER register */ #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 -#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ @@ -694,6 +706,7 @@ (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) /* CS_FATAL_EXCEPTION_TYPE values */ #define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 +#define CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE 0x41 #define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 #define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 #define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 @@ -709,7 +722,8 @@ /* CS_FAULT_INFO register */ #define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 -#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) #define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) #define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ @@ -718,7 +732,8 @@ /* CS_FATAL_INFO register */ #define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 -#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) #define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) #define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ @@ -750,7 +765,7 @@ /* CS_HEAP_ADDRESS register */ #define CS_HEAP_ADDRESS_POINTER_SHIFT 0 -#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT) +#define CS_HEAP_ADDRESS_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_HEAP_ADDRESS_POINTER_SHIFT) #define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) #define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ @@ -761,14 +776,14 @@ /* CS_INSERT register */ #define CS_INSERT_VALUE_SHIFT 0 -#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT) +#define CS_INSERT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSERT_VALUE_SHIFT) #define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) #define CS_INSERT_VALUE_SET(reg_val, value) \ (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) /* CS_EXTRACT_INIT register */ #define CS_EXTRACT_INIT_VALUE_SHIFT 0 -#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT) +#define CS_EXTRACT_INIT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_INIT_VALUE_SHIFT) #define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) #define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ @@ -779,7 +794,7 @@ /* CS_EXTRACT register */ #define CS_EXTRACT_VALUE_SHIFT 0 -#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT) +#define CS_EXTRACT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_VALUE_SHIFT) #define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) #define CS_EXTRACT_VALUE_SET(reg_val, value) \ (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) @@ -932,7 +947,7 @@ /* CSG_SUSPEND_BUF register */ #define CSG_SUSPEND_BUF_POINTER_SHIFT 0 -#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_SUSPEND_BUF_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_SUSPEND_BUF_POINTER_SHIFT) #define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) #define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ @@ -940,7 +955,8 @@ /* CSG_PROTM_SUSPEND_BUF register */ #define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 -#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) #define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) #define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ @@ -1408,7 +1424,7 @@ /* GLB_ALLOC_EN register */ #define GLB_ALLOC_EN_MASK_SHIFT 0 -#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT) +#define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT) #define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) #define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) @@ -1521,4 +1537,44 @@ (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \ GLB_REQ_ITER_TRACE_ENABLE_MASK)) +/* GLB_PRFCNT_CONFIG register */ +#define GLB_PRFCNT_CONFIG_SIZE_SHIFT (0) +#define GLB_PRFCNT_CONFIG_SIZE_MASK (0xFF << GLB_PRFCNT_CONFIG_SIZE_SHIFT) +#define GLB_PRFCNT_CONFIG_SIZE_GET(reg_val) \ + (((reg_val)&GLB_PRFCNT_CONFIG_SIZE_MASK) >> GLB_PRFCNT_CONFIG_SIZE_SHIFT) +#define GLB_PRFCNT_CONFIG_SIZE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_CONFIG_SIZE_MASK) | \ + (((value) << GLB_PRFCNT_CONFIG_SIZE_SHIFT) & GLB_PRFCNT_CONFIG_SIZE_MASK)) +#define GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT GPU_U(8) +#define GLB_PRFCNT_CONFIG_SET_SELECT_MASK (GPU_U(0x3) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) +#define GLB_PRFCNT_CONFIG_SET_SELECT_GET(reg_val) \ + (((reg_val)&GLB_PRFCNT_CONFIG_SET_SELECT_MASK) >> GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) +#define GLB_PRFCNT_CONFIG_SET_SELECT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_CONFIG_SET_SELECT_MASK) | \ + (((value) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) & GLB_PRFCNT_CONFIG_SET_SELECT_MASK)) + +/* GLB_PRFCNT_SIZE register */ +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) ((value) >> 8) +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(value) ((value) << 8) +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT GPU_U(0) +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(reg_val) \ + (GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) >> \ + GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT)) +#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \ + ((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \ + GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK)) +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) ((value) >> 8) +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(value) ((value) << 8) +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT GPU_U(16) +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \ + (GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \ + GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT)) +#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \ + ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ + GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) + #endif /* _KBASE_CSF_REGISTERS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c index 1c5dbc9..108e734 100644 --- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c +++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,7 +29,7 @@ #include <csf/mali_kbase_csf_trace_buffer.h> #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> #include <mali_kbase_reset_gpu.h> -#include <linux/string.h> +#include <csf/mali_kbase_csf_firmware_log.h> enum kbasep_soft_reset_status { RESET_SUCCESS = 0, @@ -257,68 +257,6 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); } -static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev) -{ - u8 *buf, *p, *pnewline, *pend, *pendbuf; - unsigned int read_size, remaining_size; - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - - if (tb == NULL) { - dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); - return; - } - - buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL); - if (buf == NULL) { - dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped"); - return; - } - - buf[PAGE_SIZE] = 0; - - p = buf; - pendbuf = &buf[PAGE_SIZE]; - - dev_err(kbdev->dev, "Firmware trace buffer dump:"); - while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, - pendbuf - p))) { - pend = p + read_size; - p = buf; - - while (p < pend && (pnewline = memchr(p, '\n', pend - p))) { - /* Null-terminate the string */ - *pnewline = 0; - - dev_err(kbdev->dev, "FW> %s", p); - - p = pnewline + 1; - } - - remaining_size = pend - p; - - if (!remaining_size) { - p = buf; - } else if (remaining_size < PAGE_SIZE) { - /* Copy unfinished string to the start of the buffer */ - memmove(buf, p, remaining_size); - p = &buf[remaining_size]; - } else { - /* Print abnormal page-long string without newlines */ - dev_err(kbdev->dev, "FW> %s", buf); - p = buf; - } - } - - if (p != buf) { - /* Null-terminate and print last unfinished string */ - *p = 0; - dev_err(kbdev->dev, "FW> %s", buf); - } - - kfree(buf); -} - /** * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the * event of an error during GPU reset. @@ -389,7 +327,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic if (!silent) { kbase_csf_debug_dump_registers(kbdev); if (likely(firmware_inited)) - kbase_csf_dump_firmware_trace_buffer(kbdev); + kbase_csf_firmware_log_dump_buffer(kbdev); } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index 9924ab5..905923a 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -33,6 +33,7 @@ #include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_hwaccess_time.h> #include <trace/events/power.h> +#include "mali_kbase_csf_tiler_heap.h" /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -85,6 +86,21 @@ /* A GPU address space slot is reserved for MCU. */ #define NUM_RESERVED_AS_SLOTS (1) +/* Heap deferral time in ms from a CSG suspend to be included in reclaim scan list. The + * value corresponds to realtime priority CSGs. Other priorites are of derived time value + * from this, with the realtime case the highest delay. + */ +#define HEAP_RECLAIM_PRIO_DEFERRAL_MS (1000) + +/* Additional heap deferral time in ms if a CSG suspended is in state of WAIT_SYNC */ +#define HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS (200) + +/* Tiler heap reclaim count size for limiting a count run length */ +#define HEAP_RECLAIM_COUNT_BATCH_SIZE (HEAP_SHRINKER_BATCH << 6) + +/* Tiler heap reclaim scan (free) method size for limiting a scan run length */ +#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7) + static int scheduler_group_schedule(struct kbase_queue_group *group); static void remove_group_from_idle_wait(struct kbase_queue_group *const group); static @@ -298,7 +314,8 @@ static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) goto out; } - if (suspend_active_groups_on_powerdown(kbdev, true)) + ret = suspend_active_groups_on_powerdown(kbdev, true); + if (ret) goto out; kbase_pm_lock(kbdev); @@ -346,7 +363,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) struct kbase_device *kbdev = container_of(timer, struct kbase_device, csf.scheduler.tick_timer); - kbase_csf_scheduler_advance_tick(kbdev); + kbase_csf_scheduler_tick_advance(kbdev); return HRTIMER_NORESTART; } @@ -553,7 +570,7 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) for (j = 0; j < max_streams; ++j) { struct kbase_queue *const queue = group->bound_queues[j]; - if (queue) { + if (queue && queue->user_io_addr) { u64 const *const output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); @@ -589,7 +606,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL, + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); if (!non_idle_offslot_grps) { @@ -614,7 +631,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) #endif } else { /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ - kbase_csf_scheduler_advance_tick_nolock(kbdev); + kbase_csf_scheduler_tick_advance_nolock(kbdev); } return ack_gpu_idle_event; @@ -686,6 +703,12 @@ static bool on_slot_group_idle_locked(struct kbase_queue_group *group) return (group->run_state == KBASE_CSF_GROUP_IDLE); } +static bool can_schedule_idle_group(struct kbase_queue_group *group) +{ + return (on_slot_group_idle_locked(group) || + (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)); +} + static bool queue_group_scheduled(struct kbase_queue_group *group) { return (group->run_state != KBASE_CSF_GROUP_INACTIVE && @@ -701,34 +724,39 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group) } /** - * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. + * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. * * @kbdev: Pointer to the GPU device * * This function waits for the GPU to exit protected mode which is confirmed * when active_protm_grp is set to NULL. + * + * Return: true on success, false otherwise. */ -static void scheduler_wait_protm_quit(struct kbase_device *kbdev) +static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); long remaining; + bool success = true; lockdep_assert_held(&scheduler->lock); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL, - jiffies_to_msecs(wt)); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt)); remaining = wait_event_timeout(kbdev->csf.event_wait, !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); - if (!remaining) + if (!remaining) { dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms); + success = false; + } + + KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining)); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL, - jiffies_to_msecs(remaining)); + return success; } /** @@ -738,13 +766,39 @@ static void scheduler_wait_protm_quit(struct kbase_device *kbdev) * * This function sends a ping request to the firmware and waits for the GPU * to exit protected mode. + * + * If the GPU does not exit protected mode, it is considered as hang. + * A GPU reset would then be triggered. */ static void scheduler_force_protm_exit(struct kbase_device *kbdev) { + unsigned long flags; + lockdep_assert_held(&kbdev->csf.scheduler.lock); kbase_csf_firmware_ping(kbdev); - scheduler_wait_protm_quit(kbdev); + + if (scheduler_protm_wait_quit(kbdev)) + return; + + dev_err(kbdev->dev, "Possible GPU hang in Protected mode"); + + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + if (kbdev->csf.scheduler.active_protm_grp) { + dev_err(kbdev->dev, + "Group-%d of context %d_%d ran in protected mode for too long on slot %d", + kbdev->csf.scheduler.active_protm_grp->handle, + kbdev->csf.scheduler.active_protm_grp->kctx->tgid, + kbdev->csf.scheduler.active_protm_grp->kctx->id, + kbdev->csf.scheduler.active_protm_grp->csg_nr); + } + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + + /* The GPU could be stuck in Protected mode. To prevent a hang, + * a GPU reset is performed. + */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); } /** @@ -1116,8 +1170,8 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group) return; new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, + new_val); } int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) @@ -1246,11 +1300,10 @@ static int halt_stream_sync(struct kbase_queue *queue) kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, CS_REQ_STATE_MASK); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); - /* Timed wait */ remaining = wait_event_timeout(kbdev->csf.event_wait, (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) @@ -1321,8 +1374,7 @@ static int sched_halt_stream(struct kbase_queue *queue) long remaining; int slot; int err = 0; - const u32 group_schedule_timeout = - 20 * kbdev->csf.scheduler.csg_scheduling_period_ms; + const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); if (WARN_ON(!group)) return -EINVAL; @@ -1736,8 +1788,8 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, group->run_state); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group, - queue, queue->status_wait); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, + queue->status_wait); if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { err = -EIO; @@ -1789,9 +1841,9 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) start_stream_sync(queue); } } - queue_delayed_work(system_long_wq, - &kbdev->csf.scheduler.ping_work, - msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); + queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, + msecs_to_jiffies(kbase_get_timeout_ms( + kbdev, CSF_FIRMWARE_PING_TIMEOUT))); } } @@ -1826,7 +1878,8 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status( slot_state = CSG_SLOT_RUNNING; atomic_set(&csg_slot->state, slot_state); csg_slot->trigger_jiffies = jiffies; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group, + state); dev_dbg(kbdev->dev, "Group %u running on slot %d\n", csg_slot->resident_group->handle, slot); } @@ -1942,7 +1995,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) flags); atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); csg_slot[slot].trigger_jiffies = jiffies; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); @@ -1985,10 +2038,10 @@ static bool evaluate_sync_update(struct kbase_queue *queue) sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, &mapping); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group, - queue, queue->sync_ptr); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON, - queue->group, queue, queue->blocked_reason); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue, + queue->sync_ptr); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue, + queue->blocked_reason); if (!sync_ptr) { dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", @@ -2003,11 +2056,11 @@ static bool evaluate_sync_update(struct kbase_queue *queue) (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); sync_current_val = READ_ONCE(*sync_ptr); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group, - queue, sync_current_val); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue, + sync_current_val); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group, - queue, queue->sync_value); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue, + queue->sync_value); if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && (sync_current_val > queue->sync_value)) || @@ -2024,8 +2077,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue) kbase_phy_alloc_mapping_put(queue->kctx, mapping); out: - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED, - queue->group, queue, updated); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated); return updated; } @@ -2059,8 +2111,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, queue->saved_cmd_ptr = cmd_ptr; #endif - KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT, - queue->group, queue, status); + KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, + queue, status); if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { queue->status_wait = status; @@ -2114,12 +2166,10 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force) * of work needs to be enforced in situation such as entering into * protected mode). */ - if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) && - !scheduler->tock_pending_request) { - scheduler->tock_pending_request = true; + if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { dev_dbg(kbdev->dev, "Kicking async for group %d\n", group->handle); - kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); } } @@ -2146,7 +2196,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, list_add_tail(&group->link, &kctx->csf.sched.runnable_groups[group->priority]); kctx->csf.sched.num_runnable_grps++; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group, + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group, kctx->csf.sched.num_runnable_grps); /* Add the kctx if not yet in runnable kctxs */ @@ -2154,7 +2204,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, /* First runnable csg, adds to the runnable_kctxs */ INIT_LIST_HEAD(&kctx->csf.link); list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u); } scheduler->total_runnable_grps++; @@ -2211,7 +2261,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kctx->kbdev); - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_EXIT_PROTM, + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, 0u); scheduler->active_protm_grp = NULL; } @@ -2241,13 +2291,12 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, } kctx->csf.sched.num_runnable_grps--; - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group, + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group, kctx->csf.sched.num_runnable_grps); new_head_grp = (!list_empty(list)) ? list_first_entry(list, struct kbase_queue_group, link) : NULL; - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp, - 0u); + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); if (kctx->csf.sched.num_runnable_grps == 0) { struct kbase_context *new_head_kctx; @@ -2256,13 +2305,11 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, list_del_init(&kctx->csf.link); if (scheduler->top_ctx == kctx) scheduler->top_ctx = NULL; - KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx, - 0u); + KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u); new_head_kctx = (!list_empty(kctx_list)) ? list_first_entry(kctx_list, struct kbase_context, csf.link) : NULL; - KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE, - new_head_kctx, 0u); + KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u); } WARN_ON(scheduler->total_runnable_grps == 0); @@ -2289,7 +2336,7 @@ static void insert_group_to_idle_wait(struct kbase_queue_group *const group) list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); kctx->csf.sched.num_idle_wait_grps++; - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group, + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group, kctx->csf.sched.num_idle_wait_grps); group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; dev_dbg(kctx->kbdev->dev, @@ -2310,13 +2357,12 @@ static void remove_group_from_idle_wait(struct kbase_queue_group *const group) list_del_init(&group->link); WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); kctx->csf.sched.num_idle_wait_grps--; - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group, + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group, kctx->csf.sched.num_idle_wait_grps); new_head_grp = (!list_empty(list)) ? list_first_entry(list, struct kbase_queue_group, link) : NULL; - KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT, - new_head_grp, 0u); + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u); group->run_state = KBASE_CSF_GROUP_INACTIVE; } @@ -2342,8 +2388,7 @@ static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { int new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); } } @@ -2359,8 +2404,7 @@ static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { int new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); } } @@ -2380,15 +2424,15 @@ static void update_offslot_non_idle_cnt_on_grp_suspend( if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { int new_val = atomic_inc_return( &scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, + group, new_val); } } else { if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { int new_val = atomic_dec_return( &scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, + group, new_val); } } } else { @@ -2396,8 +2440,8 @@ static void update_offslot_non_idle_cnt_on_grp_suspend( if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { int new_val = atomic_inc_return( &scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, + new_val); } } } @@ -2436,6 +2480,145 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) return cs_idle; } +static void detach_from_sched_reclaim_mgr(struct kbase_context *kctx) +{ + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info; + + lockdep_assert_held(&scheduler->lock); + + if (!list_empty(&heap_info->mgr_link)) { + WARN_ON(!heap_info->flags); + list_del_init(&heap_info->mgr_link); + + if (heap_info->flags & CSF_CTX_RECLAIM_CANDI_FLAG) + WARN_ON(atomic_sub_return(heap_info->nr_est_pages, + &scheduler->reclaim_mgr.est_cand_pages) < 0); + if (heap_info->flags & CSF_CTX_RECLAIM_SCAN_FLAG) + WARN_ON(atomic_sub_return(heap_info->nr_scan_pages, + &scheduler->reclaim_mgr.mgr_scan_pages) < 0); + + dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_detach: ctx_%d_%d, flags = 0x%x\n", + kctx->tgid, kctx->id, heap_info->flags); + /* Clear on detaching */ + heap_info->nr_est_pages = 0; + heap_info->nr_scan_pages = 0; + heap_info->flags = 0; + } +} + +static void attach_to_sched_reclaim_mgr(struct kbase_context *kctx) +{ + struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (WARN_ON(!list_empty(&heap_info->mgr_link))) + list_del_init(&heap_info->mgr_link); + + list_add_tail(&heap_info->mgr_link, &scheduler->reclaim_mgr.candidate_ctxs); + + /* Read the kctx's tiler heap estimate of pages, this separates it away + * from the kctx's tiler heap side updates/changes. The value remains static + * for the duration of this kctx on the reclaim manager's candidate_ctxs list. + */ + heap_info->nr_est_pages = (u32)atomic_read(&kctx->csf.tiler_heaps.est_count_pages); + atomic_add(heap_info->nr_est_pages, &scheduler->reclaim_mgr.est_cand_pages); + + heap_info->attach_jiffies = jiffies; + heap_info->flags = CSF_CTX_RECLAIM_CANDI_FLAG; + + dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages = %u\n", + kctx->tgid, kctx->id, heap_info->nr_est_pages); +} + +static void update_kctx_heap_info_on_grp_on_slot(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info; + + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + + heap_info->on_slot_grps++; + /* If the kctx transitioned on-slot CSGs: 0 => 1, detach the kctx scheduler->reclaim_mgr */ + if (heap_info->on_slot_grps == 1) { + dev_dbg(kctx->kbdev->dev, + "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager\n", + group->kctx->tgid, group->kctx->id, group->handle); + + detach_from_sched_reclaim_mgr(kctx); + } +} + +static void update_kctx_heap_info_on_grp_evict(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + const u32 num_groups = kctx->kbdev->csf.global_iface.group_num; + u32 on_slot_grps = 0; + u32 i; + + lockdep_assert_held(&scheduler->lock); + + /* Group eviction from the scheduler is a bit more complex, but fairly less + * frequent in operations. Taking the opportunity to actually count the + * on-slot CSGs from the given kctx, for robustness and clearer code logic. + */ + for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { + struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; + struct kbase_queue_group *grp = csg_slot->resident_group; + + if (unlikely(!grp)) + continue; + + if (grp->kctx == kctx) + on_slot_grps++; + } + + heap_info->on_slot_grps = on_slot_grps; + + /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */ + if (!heap_info->on_slot_grps) { + if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) { + /* The kctx has other operational CSGs, attach it if not yet done */ + if (list_empty(&heap_info->mgr_link)) { + dev_dbg(kctx->kbdev->dev, + "CSG_%d_%d_%d evict, add kctx to reclaim manager\n", + group->kctx->tgid, group->kctx->id, group->handle); + + attach_to_sched_reclaim_mgr(kctx); + } + } else { + /* The kctx is a zombie after the group eviction, drop it out */ + dev_dbg(kctx->kbdev->dev, + "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager\n", + group->kctx->tgid, group->kctx->id, group->handle); + + detach_from_sched_reclaim_mgr(kctx); + } + } +} + +static void update_kctx_heap_info_on_grp_suspend(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info; + + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + + if (!WARN_ON(heap_info->on_slot_grps == 0)) + heap_info->on_slot_grps--; + /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */ + if (heap_info->on_slot_grps == 0) { + dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager\n", + group->kctx->tgid, group->kctx->id, group->handle); + + attach_to_sched_reclaim_mgr(kctx); + } +} + static void save_csg_slot(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; @@ -2506,6 +2689,7 @@ static void save_csg_slot(struct kbase_queue_group *group) } update_offslot_non_idle_cnt_on_grp_suspend(group); + update_kctx_heap_info_on_grp_suspend(group); } } @@ -2640,7 +2824,7 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) group->handle, group->kctx->tgid, group->kctx->id, slot, prev_prio, prio); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio); set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); } @@ -2790,12 +2974,13 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", group->handle, kctx->tgid, kctx->id, slot, prio); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group, - (((u64)ep_cfg) << 32) | - ((((u32)kctx->as_nr) & 0xF) << 16) | - (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group, + (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) | + (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); kbasep_platform_event_work_begin(group); + /* Update the heap reclaim manager */ + update_kctx_heap_info_on_grp_on_slot(group); /* Programming a slot consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); @@ -2835,8 +3020,8 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { int new_val = atomic_dec_return( &scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, + new_val); } for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { @@ -2860,14 +3045,16 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (fault) group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group, - (((u64)scheduler->total_runnable_grps) << 32) | - ((u32)group->run_state)); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group, + (((u64)scheduler->total_runnable_grps) << 32) | + ((u32)group->run_state)); dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", group->handle, scheduler->total_runnable_grps); /* Notify a group has been evicted */ wake_up_all(&kbdev->csf.event_wait); } + + update_kctx_heap_info_on_grp_evict(group); } static int term_group_sync(struct kbase_queue_group *group) @@ -2879,7 +3066,8 @@ static int term_group_sync(struct kbase_queue_group *group) term_csg_slot(group); remaining = wait_event_timeout(kbdev->csf.event_wait, - csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); + group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), + remaining); if (!remaining) { dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", @@ -2900,6 +3088,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + bool wait_for_termination = true; bool on_slot; kbase_reset_gpu_assert_failed_or_prevented(kbdev); @@ -2914,39 +3103,28 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) #ifdef KBASE_PM_RUNTIME /* If the queue group is on slot and Scheduler is in SLEEPING state, - * then we need to wait here for Scheduler to exit the sleep state - * (i.e. wait for the runtime suspend or power down of GPU). This would - * be better than aborting the power down. The group will be suspended - * anyways on power down, so won't have to send the CSG termination - * request to FW. + * then we need to wake up the Scheduler to exit the sleep state rather + * than waiting for the runtime suspend or power down of GPU. + * The group termination is usually triggered in the context of Application + * thread and it has been seen that certain Apps can destroy groups at + * random points and not necessarily when the App is exiting. */ if (on_slot && (scheduler->state == SCHED_SLEEPING)) { - if (wait_for_scheduler_to_exit_sleep(kbdev)) { + scheduler_wakeup(kbdev, true); + + /* Wait for MCU firmware to start running */ + if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { dev_warn( kbdev->dev, - "Wait for scheduler to exit sleep state timedout when terminating group %d of context %d_%d on slot %d", + "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", + kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); - - scheduler_wakeup(kbdev, true); - - /* Wait for MCU firmware to start running */ - if (kbase_csf_scheduler_wait_mcu_active(kbdev)) - dev_warn( - kbdev->dev, - "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, group->kctx->tgid, - group->kctx->id, group->csg_nr); + /* No point in waiting for CSG termination if MCU didn't + * become active. + */ + wait_for_termination = false; } - - /* Check the group state again as scheduler lock would have been - * released when waiting for the exit from SLEEPING state. - */ - if (!queue_group_scheduled_locked(group)) - goto unlock; - - on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); } #endif if (!on_slot) { @@ -2954,7 +3132,11 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) } else { bool as_faulty; - term_group_sync(group); + if (likely(wait_for_termination)) + term_group_sync(group); + else + term_csg_slot(group); + /* Treat the csg been terminated */ as_faulty = cleanup_csg_slot(group); /* remove from the scheduler list */ @@ -3013,6 +3195,8 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) if (protm_grp && protm_grp != group) { clear_bit((unsigned int)group->csg_nr, scheduler->csg_slots_idle_mask); + /* Request the update to confirm the condition inferred. */ + group->reevaluate_idle_status = true; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, scheduler->csg_slots_idle_mask[0]); } @@ -3039,8 +3223,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) /* A new group into the scheduler */ new_val = atomic_inc_return( &kbdev->csf.scheduler.non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); } /* Since a group has become active now, check if GPU needs to be @@ -3706,7 +3889,7 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, */ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); - KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u); for (slot = 0; slot < num_groups; slot++) { group = kbdev->csf.scheduler.csg_slots[slot].resident_group; if (group && group->kctx == kctx) { @@ -3783,8 +3966,8 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, struct kbase_queue *queue = group->bound_queues[i]; clear_bit(i, group->protm_pending_bitmap); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group, - queue, group->protm_pending_bitmap[0]); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue, + group->protm_pending_bitmap[0]); if (!WARN_ON(!queue) && queue->enabled) { struct kbase_csf_cmd_stream_info *stream = @@ -3820,6 +4003,42 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, } /** + * protm_enter_set_next_pending_seq - Update the scheduler's field of + * tick_protm_pending_seq to that from the next available on-slot protm + * pending CSG. + * + * @kbdev: Pointer to the GPU device. + * + * If applicable, the function updates the scheduler's tick_protm_pending_seq + * field from the next available on-slot protm pending CSG. If not, the field + * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID. + */ +static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u32 num_groups = kbdev->csf.global_iface.group_num; + u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num; + DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 }; + u32 i; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap, + num_groups); + /* Reset the tick's pending protm seq number to invalid initially */ + scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; + for_each_set_bit(i, active_csgs, num_groups) { + struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; + + /* Set to the next pending protm group's scan_seq_number */ + if ((group != scheduler->active_protm_grp) && + (!bitmap_empty(group->protm_pending_bitmap, num_csis)) && + (group->scan_seq_num < scheduler->tick_protm_pending_seq)) + scheduler->tick_protm_pending_seq = group->scan_seq_num; + } +} + +/** * scheduler_group_check_protm_enter - Request the given group to be evaluated * for triggering the protected mode. * @@ -3842,6 +4061,12 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, lockdep_assert_held(&scheduler->lock); + /* This lock is taken to prevent the issuing of MMU command during the + * transition to protected mode. This helps avoid the scenario where the + * entry to protected mode happens with a memory region being locked and + * the same region is then accessed by the GPU in protected mode. + */ + mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&scheduler->interrupt_lock, flags); /* Check if the previous transition to enter & exit the protected @@ -3849,8 +4074,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, */ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) || kbdev->protected_mode; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp, - protm_in_use); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use); /* Firmware samples the PROTM_PEND ACK bit for CSs when * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit @@ -3890,22 +4114,62 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, /* Switch to protected mode */ scheduler->active_protm_grp = input_grp; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM, - input_grp, 0u); - /* Reset the tick's pending protm seq number */ - scheduler->tick_protm_pending_seq = - KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp, + 0u); kbase_csf_enter_protected_mode(kbdev); + /* Set the pending protm seq number to the next one */ + protm_enter_set_next_pending_seq(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); kbase_csf_wait_protected_mode_enter(kbdev); + mutex_unlock(&kbdev->mmu_hw_mutex); + + scheduler->protm_enter_time = ktime_get_raw(); + return; } } } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); +} + +/** + * scheduler_check_pmode_progress - Check if protected mode execution is progressing + * + * @kbdev: Pointer to the GPU device. + * + * This function is called when the GPU is in protected mode. + * + * It will check if the time spent in protected mode is less + * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT + * request is sent to the FW. + */ +static void scheduler_check_pmode_progress(struct kbase_device *kbdev) +{ + u64 protm_spent_time_ms; + u64 protm_progress_timeout = + kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT); + s64 diff_ms_signed = + ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time); + + if (diff_ms_signed < 0) + return; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + protm_spent_time_ms = (u64)diff_ms_signed; + if (protm_spent_time_ms < protm_progress_timeout) + return; + + dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu", + protm_spent_time_ms, protm_progress_timeout); + + /* Prompt the FW to exit protected mode */ + scheduler_force_protm_exit(kbdev); } static void scheduler_apply(struct kbase_device *kbdev) @@ -4021,7 +4285,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, } if (queue_group_idle_locked(group)) { - if (on_slot_group_idle_locked(group)) + if (can_schedule_idle_group(group)) list_add_tail(&group->link_to_schedule, &scheduler->idle_groups_to_schedule); continue; @@ -4107,10 +4371,9 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev) new_head_grp = (!list_empty(list)) ? list_first_entry(list, struct kbase_queue_group, link) : NULL; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE, - top_grp, top_ctx->csf.sched.num_runnable_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE, - new_head_grp, 0u); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp, + top_ctx->csf.sched.num_runnable_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); dev_dbg(kbdev->dev, "groups rotated for a context, num_runnable_groups: %u\n", scheduler->top_ctx->csf.sched.num_runnable_grps); @@ -4141,13 +4404,12 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev) struct kbase_context *new_head_kctx; list_move_tail(&pos->csf.link, list); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos, - 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u); new_head_kctx = (!list_empty(list)) ? list_first_entry(list, struct kbase_context, csf.link) : NULL; - KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE, - new_head_kctx, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, + 0u); dev_dbg(kbdev->dev, "contexts rotated\n"); } } @@ -4162,12 +4424,17 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev) * @kbdev: Pointer to the GPU device. * @csg_bitmap: Bitmap of the CSG slots for which * the status update request completed successfully. - * @failed_csg_bitmap: Bitmap of the CSG slots for which + * @failed_csg_bitmap: Bitmap of the idle CSG slots for which * the status update request timedout. * * This function sends a CSG status update request for all the CSG slots - * present in the bitmap scheduler->csg_slots_idle_mask and wait for the - * request to complete. + * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if + * the group's 'reevaluate_idle_status' field is set, the nominally non-idle + * slots are also included in the status update for a confirmation of their + * status. The function wait for the status update request to complete and + * returns the update completed slots bitmap and any timed out idle-flagged + * slots bitmap. + * * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by * this function. */ @@ -4179,6 +4446,7 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; unsigned long flags, i; + u32 active_chk = 0; lockdep_assert_held(&scheduler->lock); @@ -4190,6 +4458,7 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, struct kbase_csf_cmd_stream_group_info *const ginfo = &global_iface->groups[i]; u32 csg_req; + bool idle_flag; if (WARN_ON(!group)) { clear_bit(i, scheduler->csg_inuse_bitmap); @@ -4197,30 +4466,47 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, continue; } - if (test_bit(i, scheduler->csg_slots_idle_mask)) { - clear_bit(i, scheduler->csg_slots_idle_mask); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, - scheduler->csg_slots_idle_mask[0]); + idle_flag = test_bit(i, scheduler->csg_slots_idle_mask); + if (idle_flag || group->reevaluate_idle_status) { + if (idle_flag) { +#ifdef CONFIG_MALI_DEBUG + if (!bitmap_empty(group->protm_pending_bitmap, + ginfo->stream_num)) { + dev_warn(kbdev->dev, + "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution", + group->handle, group->kctx->tgid, + group->kctx->id, (int)i); + } +#endif + clear_bit(i, scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, + scheduler->csg_slots_idle_mask[0]); + } else { + /* Updates include slots for which reevaluation is needed. + * Here one tracks the extra included slots in active_chk. + * For protm pending slots, their status of activeness are + * assured so no need to request an update. + */ + active_chk |= BIT(i); + group->reevaluate_idle_status = false; + } KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i); - csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, CSG_REQ_STATUS_UPDATE_MASK); - set_bit(i, csg_bitmap); - } else if (group->run_state == KBASE_CSF_GROUP_IDLE) { - /* In interrupt context, some previously 'nominal' idle - * on-slot group could have been de-idled. Its idle flag may - * have been cleared, mark the correct run_state for the next - * tick/tock cycle here in the scheduler process context. + /* Track the slot update requests in csg_bitmap. + * Note, if the scheduler requested extended update, the resulting + * csg_bitmap would be the idle_flags + active_chk. Otherwise it's + * identical to the idle_flags. */ + set_bit(i, csg_bitmap); + } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; } } - /* All the idle flags transferred to csg_bitmap, check its empty here */ - WARN_ON(!bitmap_empty(scheduler->csg_slots_idle_mask, num_groups)); /* The groups are aggregated into a single kernel doorbell request */ if (!bitmap_empty(csg_bitmap, num_groups)) { @@ -4243,9 +4529,19 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, /* Store the bitmap of timed out slots */ bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); csg_bitmap[0] = ~csg_bitmap[0] & db_slots; + + /* Mask off any failed bit position contributed from active ones, as the + * intention is to retain the failed bit pattern contains only those from + * idle flags reporting back to the caller. This way, any failed to update + * original idle flag would be kept as 'idle' (an informed guess, as the + * update did not come to a conclusive result). So will be the failed + * active ones be treated as still 'non-idle'. This is for a graceful + * handling to the unexpected timeout condition. + */ + failed_csg_bitmap[0] &= ~active_chk; + } else { - KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL, - db_slots); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots); csg_bitmap[0] = db_slots; } } else { @@ -4326,8 +4622,7 @@ static void scheduler_scan_idle_groups(struct kbase_device *kbdev) list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, link_to_schedule) { - - WARN_ON(!on_slot_group_idle_locked(group)); + WARN_ON(!can_schedule_idle_group(group)); if (!scheduler->ngrp_to_schedule) { /* keep the top csg's origin */ @@ -4462,7 +4757,7 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev) u64 const *output_addr; u64 cur_extract_ofs; - if (!queue) + if (!queue || !queue->user_io_addr) continue; output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); @@ -4569,7 +4864,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) atomic_read( &kbdev->csf.scheduler.non_idle_offslot_grps)); /* Bring forward the next tick */ - kbase_csf_scheduler_advance_tick(kbdev); + kbase_csf_scheduler_tick_advance(kbdev); return false; } @@ -4592,14 +4887,14 @@ static void gpu_idle_worker(struct work_struct *work) bool scheduler_is_idle_suspendable = false; bool all_groups_suspended = false; - KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) if (kbase_reset_gpu_try_prevent(kbdev)) { dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); - KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, __ENCODE_KTRACE_INFO(true, false, false)); return; } @@ -4614,11 +4909,11 @@ static void gpu_idle_worker(struct work_struct *work) scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); if (scheduler_is_idle_suspendable) { - KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL, + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL, kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev) && - scheduler->total_runnable_grps) + kbase_csf_scheduler_get_nr_active_csgs(kbdev)) scheduler_sleep_on_idle(kbdev); else #endif @@ -4630,9 +4925,8 @@ unlock: #endif mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, - __ENCODE_KTRACE_INFO(false, - scheduler_is_idle_suspendable, + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, + __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable, all_groups_suspended)); #undef __ENCODE_KTRACE_INFO } @@ -4925,7 +5219,7 @@ static int scheduler_prepare(struct kbase_device *kbdev) */ atomic_set(&scheduler->non_idle_offslot_grps, scheduler->non_idle_scanout_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL, + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL, scheduler->non_idle_scanout_grps); /* Adds those idle but runnable groups to the scanout list */ @@ -5123,8 +5417,12 @@ redo_local_tock: dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", protm_grp->handle); new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, - protm_grp, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp, + new_val); + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + scheduler_check_pmode_progress(kbdev); } else if (scheduler->top_grp) { if (protm_grp) dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d", @@ -5178,11 +5476,9 @@ redo_local_tock: goto redo_local_tock; } } - - return; + } else { + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } - - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } /** @@ -5236,14 +5532,11 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) static void schedule_on_tock(struct kthread_work *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.scheduler.tock_work.work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.scheduler.tock_work.work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err; - /* Tock work item is serviced */ - scheduler->tock_pending_request = false; - err = kbase_reset_gpu_try_prevent(kbdev); /* Regardless of whether reset failed or is currently happening, exit * early @@ -5259,8 +5552,9 @@ static void schedule_on_tock(struct kthread_work *work) scheduler->state = SCHED_BUSY; /* Undertaking schedule action steps */ - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u); - schedule_actions(kbdev, false); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u); + while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true) + schedule_actions(kbdev, false); /* Record time information on a non-skipped tock */ scheduler->last_schedule = jiffies; @@ -5284,8 +5578,8 @@ exit_no_schedule_unlock: static void schedule_on_tick(struct kthread_work *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.scheduler.tick_work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.scheduler.tick_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err = kbase_reset_gpu_try_prevent(kbdev); @@ -5304,8 +5598,7 @@ static void schedule_on_tick(struct kthread_work *work) scheduler->state = SCHED_BUSY; /* Undertaking schedule action steps */ - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL, - scheduler->total_runnable_grps); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps); schedule_actions(kbdev, true); /* Record time information */ @@ -5566,8 +5859,7 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) * anyways. */ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, - group, new_val); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); } unlock: @@ -5575,10 +5867,15 @@ unlock: return suspend_on_slot_groups; } +static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) +{ + kthread_cancel_work_sync(&scheduler->tick_work); +} + static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) { + atomic_set(&scheduler->pending_tock_work, false); kthread_cancel_delayed_work_sync(&scheduler->tock_work); - scheduler->tock_pending_request = false; } static void scheduler_inner_reset(struct kbase_device *kbdev) @@ -5592,7 +5889,7 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) /* Cancel any potential queued delayed work(s) */ cancel_delayed_work_sync(&scheduler->gpu_idle_work); cancel_tick_timer(kbdev); - kthread_cancel_work_sync(&scheduler->tick_work); + cancel_tick_work(scheduler); cancel_tock_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); @@ -5601,8 +5898,8 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) spin_lock_irqsave(&scheduler->interrupt_lock, flags); bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); if (scheduler->active_protm_grp) - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, - scheduler->active_protm_grp, 0u); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, + 0u); scheduler->active_protm_grp = NULL; memset(kbdev->csf.scheduler.csg_slots, 0, num_groups * sizeof(struct kbase_csf_csg_slot)); @@ -5625,7 +5922,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev) WARN_ON(!kbase_reset_gpu_is_active(kbdev)); - KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); if (scheduler_handle_reset_in_protected_mode(kbdev) && !suspend_active_queue_groups_on_reset(kbdev)) { @@ -5727,9 +6024,9 @@ static void firmware_aliveness_monitor(struct work_struct *work) kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) { - queue_delayed_work(system_long_wq, - &kbdev->csf.scheduler.ping_work, - msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); + queue_delayed_work( + system_long_wq, &kbdev->csf.scheduler.ping_work, + msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT))); } kbase_pm_context_idle(kbdev); @@ -6036,7 +6333,7 @@ static bool check_sync_update_for_on_slot_group( stream, CS_STATUS_WAIT); unsigned long flags; - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, status); if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) @@ -6080,6 +6377,10 @@ static bool check_sync_update_for_on_slot_group( scheduler->csg_slots_idle_mask[0]); spin_unlock_irqrestore( &scheduler->interrupt_lock, flags); + /* Request the scheduler to confirm the condition inferred + * here inside the protected mode. + */ + group->reevaluate_idle_status = true; group->run_state = KBASE_CSF_GROUP_RUNNABLE; } @@ -6176,11 +6477,6 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) continue; if (check_sync_update_for_on_slot_group(group)) { - /* As sync update has been performed for an on-slot - * group, when MCU is in sleep state, ring the doorbell - * so that FW can re-evaluate the SYNC_WAIT on wakeup. - */ - kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); scheduler_wakeup(kbdev, true); return; } @@ -6234,7 +6530,7 @@ static void check_group_sync_update_worker(struct kthread_work *work) mutex_lock(&scheduler->lock); - KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); if (kctx->csf.sched.num_idle_wait_grps != 0) { struct kbase_queue_group *group, *temp; @@ -6281,7 +6577,7 @@ static void check_group_sync_update_worker(struct kthread_work *work) check_sync_update_after_sc_power_down(kbdev); #endif - KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); mutex_unlock(&scheduler->lock); } @@ -6291,7 +6587,8 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) { struct kbase_context *const kctx = param; - KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u); + KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); + kthread_queue_work(&kctx->csf.sched.sync_update_worker, &kctx->csf.sched.sync_update_work); @@ -6313,15 +6610,12 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) kctx->csf.sched.num_idle_wait_grps = 0; kctx->csf.sched.ngrp_to_schedule = 0; - kthread_init_worker(&kctx->csf.sched.sync_update_worker); - kctx->csf.sched.sync_update_worker_thread = kbase_create_realtime_thread( - kctx->kbdev, - kthread_worker_fn, - &kctx->csf.sched.sync_update_worker, - "mali_kbase_csf_sync_update"); - if (IS_ERR(kctx->csf.sched.sync_update_worker_thread)) { + err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn, + &kctx->csf.sched.sync_update_worker, + "mali_kbase_csf_sync_update"); + if (err) { dev_err(kctx->kbdev->dev, - "Failed to initialize scheduler context workqueue"); + "Failed to initialize scheduler context kworker"); return -ENOMEM; } @@ -6333,10 +6627,13 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "Failed to register a sync update callback"); - kthread_flush_worker(&kctx->csf.sched.sync_update_worker); - kthread_stop(kctx->csf.sched.sync_update_worker_thread); + kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker); } + /* Per-kctx heap_info object initialization */ + memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_kctx_heap_info)); + INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); + return err; } @@ -6344,8 +6641,7 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) { kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); kthread_cancel_work_sync(&kctx->csf.sched.sync_update_work); - kthread_flush_worker(&kctx->csf.sched.sync_update_worker); - kthread_stop(kctx->csf.sched.sync_update_worker_thread); + kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker); } int kbase_csf_scheduler_init(struct kbase_device *kbdev) @@ -6367,21 +6663,33 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) return 0; } +static void scheduler_init_heap_reclaim_mgr(struct kbase_csf_scheduler *const scheduler) +{ + INIT_LIST_HEAD(&scheduler->reclaim_mgr.candidate_ctxs); + INIT_LIST_HEAD(&scheduler->reclaim_mgr.scan_list_ctxs); + atomic_set(&scheduler->reclaim_mgr.est_cand_pages, 0); + atomic_set(&scheduler->reclaim_mgr.mgr_scan_pages, 0); +} + +static void scheduler_term_heap_reclaim_mgr(struct kbase_csf_scheduler *const scheduler) +{ + WARN_ON(!list_empty(&scheduler->reclaim_mgr.candidate_ctxs)); + WARN_ON(!list_empty(&scheduler->reclaim_mgr.scan_list_ctxs)); + WARN_ON(atomic_read(&scheduler->reclaim_mgr.est_cand_pages)); + WARN_ON(atomic_read(&scheduler->reclaim_mgr.mgr_scan_pages)); +} + int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) { + int err; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; scheduler->timer_enabled = true; - kthread_init_worker(&scheduler->csf_worker); - scheduler->csf_worker_thread = kbase_create_realtime_thread( - kbdev, - kthread_worker_fn, - &scheduler->csf_worker, - "csf_scheduler"); - - if (!scheduler->csf_worker_thread) { - dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); + err = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &scheduler->csf_worker, + "csf_scheduler"); + if (err) { + dev_err(kbdev->dev, "Failed to allocate scheduler kworker\n"); return -ENOMEM; } scheduler->idle_wq = alloc_ordered_workqueue( @@ -6389,13 +6697,13 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) if (!scheduler->idle_wq) { dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); - kthread_flush_worker(&kbdev->csf.scheduler.csf_worker); - kthread_stop(kbdev->csf.scheduler.csf_worker_thread);; + kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker); return -ENOMEM; } kthread_init_work(&scheduler->tick_work, schedule_on_tick); kthread_init_delayed_work(&scheduler->tock_work, schedule_on_tock); + atomic_set(&scheduler->pending_tock_work, false); INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); @@ -6417,7 +6725,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) scheduler->top_ctx = NULL; scheduler->top_grp = NULL; scheduler->last_schedule = 0; - scheduler->tock_pending_request = false; scheduler->active_protm_grp = NULL; scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); @@ -6436,6 +6743,9 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) scheduler->tick_timer.function = tick_timer_callback; scheduler->tick_timer_active = false; + scheduler_init_heap_reclaim_mgr(scheduler); + kbase_csf_tiler_heap_register_shrinker(kbdev); + return 0; } @@ -6466,7 +6776,7 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) mutex_unlock(&kbdev->csf.scheduler.lock); cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); cancel_tick_timer(kbdev); - kthread_cancel_work_sync(&kbdev->csf.scheduler.tick_work); + cancel_tick_work(&kbdev->csf.scheduler); cancel_tock_work(&kbdev->csf.scheduler); mutex_destroy(&kbdev->csf.scheduler.lock); kfree(kbdev->csf.scheduler.csg_slots); @@ -6478,10 +6788,11 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { if (kbdev->csf.scheduler.idle_wq) destroy_workqueue(kbdev->csf.scheduler.idle_wq); - if (kbdev->csf.scheduler.csf_worker_thread) { - kthread_flush_worker(&kbdev->csf.scheduler.csf_worker); - kthread_stop(kbdev->csf.scheduler.csf_worker_thread); - } + if (kbdev->csf.scheduler.csf_worker.task) + kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker); + + kbase_csf_tiler_heap_unregister_shrinker(kbdev); + scheduler_term_heap_reclaim_mgr(&kbdev->csf.scheduler); } /** @@ -6546,13 +6857,12 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, if (currently_enabled && !enable) { scheduler->timer_enabled = false; cancel_tick_timer(kbdev); - kthread_cancel_delayed_work_sync(&scheduler->tock_work); - scheduler->tock_pending_request = false; mutex_unlock(&scheduler->lock); /* The non-sync version to cancel the normal work item is not * available, so need to drop the lock before cancellation. */ - kthread_cancel_work_sync(&scheduler->tick_work); + cancel_tick_work(scheduler); + cancel_tock_work(scheduler); return; } @@ -6624,7 +6934,7 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; /* Cancel any potential queued delayed work(s) */ - kthread_cancel_work_sync(&scheduler->tick_work); + cancel_tick_work(scheduler); cancel_tock_work(scheduler); result = kbase_reset_gpu_prevent_and_wait(kbdev); @@ -6804,3 +7114,204 @@ void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev) scheduler_wakeup(kbdev, true); mutex_unlock(&scheduler->lock); } + +static bool defer_count_unused_heap_pages(struct kbase_context *kctx) +{ + struct kbase_kctx_heap_info *info = &kctx->csf.sched.heap_info; + u32 prio, shift; + unsigned long ms; + + for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW; + prio++) { + if (!list_empty(&kctx->csf.sched.runnable_groups[prio])) + break; + } + + shift = (prio == KBASE_QUEUE_GROUP_PRIORITY_REALTIME) ? 0 : prio + 1; + /* Delay time from priority */ + ms = HEAP_RECLAIM_PRIO_DEFERRAL_MS >> shift; + + WARN_ON(!(info->flags & CSF_CTX_RECLAIM_CANDI_FLAG)); + + if (kctx->csf.sched.num_idle_wait_grps) + ms += HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS; + + return time_before(jiffies, info->attach_jiffies + msecs_to_jiffies(ms)); +} + +static unsigned long +reclaim_count_candidates_heap_pages(struct kbase_device *kbdev, unsigned long freed_pages, + struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr; + struct kbase_kctx_heap_info *info, *tmp; + unsigned long count = 0; + u32 cnt_ctxs = 0; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + list_for_each_entry_safe(info, tmp, &mgr->candidate_ctxs, mgr_link) { + struct kbase_context *kctx = + container_of(info, struct kbase_context, csf.sched.heap_info); + + /* If the kctx not yet exhausted its deferral time, keep it as a candidate */ + if (defer_count_unused_heap_pages(kctx)) + continue; + + /* Count the freeable pages of the kctx */ + info->nr_scan_pages = shrink_ctrl->count_cb(kctx); + + dev_dbg(kctx->kbdev->dev, "kctx_%d_%d heap pages count : %u\n", kctx->tgid, + kctx->id, info->nr_scan_pages); + cnt_ctxs++; + + /* The kctx is either moved to the pages freeable kctx list, or removed + * from the manager if no pages are available for reclaim. + */ + if (info->nr_scan_pages) { + /* Move the kctx to the scan_list inside the manager */ + list_move_tail(&info->mgr_link, &mgr->scan_list_ctxs); + WARN_ON(atomic_sub_return(info->nr_est_pages, &mgr->est_cand_pages) < 0); + atomic_add(info->nr_scan_pages, &mgr->mgr_scan_pages); + info->flags = CSF_CTX_RECLAIM_SCAN_FLAG; + count += info->nr_scan_pages; + } else + detach_from_sched_reclaim_mgr(kctx); + + /* Combine with the shrinker scan method freed pages to determine the count + * has done enough to avoid holding the scheduler lock too long. + */ + if ((freed_pages + count) > HEAP_RECLAIM_COUNT_BATCH_SIZE) + break; + } + + dev_dbg(kbdev->dev, + "Reclaim CSF count unused heap pages: %lu (processed kctxs: %u, from_scan: %lu)\n", + count, cnt_ctxs, freed_pages); + + return count; +} + +static unsigned long +reclaim_free_counted_heap_pages(struct kbase_device *kbdev, + struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr; + unsigned long freed = 0; + u32 cnt_ctxs = 0; + struct kbase_kctx_heap_info *info, *tmp; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + if (WARN_ON(!shrink_ctrl->scan_cb)) + return 0; + + list_for_each_entry_safe(info, tmp, &mgr->scan_list_ctxs, mgr_link) { + struct kbase_context *kctx = + container_of(info, struct kbase_context, csf.sched.heap_info); + /* Attempt freeing all the counted heap pages from the kctx */ + u32 n = shrink_ctrl->scan_cb(kctx, info->nr_scan_pages); + + /* The free is attempted on all the counted heap pages. If the kctx has + * all its counted heap pages freed, or, it can't offer anymore, drop + * it from the reclaim manger, otherwise leave it remaining in. If the + * kctx changes its state (i.e. some CSGs becoming on-slot), the + * scheduler will pull it out. + */ + if (n >= info->nr_scan_pages || n == 0) + detach_from_sched_reclaim_mgr(kctx); + else + info->nr_scan_pages -= n; + + freed += n; + cnt_ctxs++; + + /* Enough has been freed, break for a gap to avoid holding the lock too long */ + if (freed >= HEAP_RECLAIM_SCAN_BATCH_SIZE) + break; + } + + dev_dbg(kbdev->dev, "Reclaim CSF heap free heap pages: %lu (processed kctxs: %u)\n", freed, + cnt_ctxs); + + return freed; +} + +unsigned long +kbase_csf_scheduler_count_free_heap_pages(struct kbase_device *kbdev, + struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl) +{ + struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; + + unsigned long scan_count = atomic_read(&mgr->mgr_scan_pages); + unsigned long est_count = atomic_read(&mgr->est_cand_pages); + unsigned long total; + bool counted = false; + + if (mutex_trylock(&kbdev->csf.scheduler.lock)) { + reclaim_count_candidates_heap_pages(kbdev, 0, shrink_ctrl); + mutex_unlock(&kbdev->csf.scheduler.lock); + counted = true; + scan_count = atomic_read(&mgr->mgr_scan_pages); + /* We've processed the candidates, so overwrites the estimated to 0 */ + est_count = 0; + } + + total = scan_count + est_count; + dev_dbg(kbdev->dev, "Reclaim count unused pages: %lu (scan: %lu, extra_est: %lu, %d/)\n", + total, scan_count, est_count, counted); + + return total; +} + +unsigned long +kbase_csf_scheduler_scan_free_heap_pages(struct kbase_device *kbdev, + struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl) +{ + struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; + struct shrink_control *sc = shrink_ctrl->sc; + unsigned long freed = 0; + unsigned long count = 0; + unsigned long avail = 0; + + /* If Scheduler is busy in action, return 0 */ + if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + /* Wait for roughly 2-ms */ + wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY), + msecs_to_jiffies(2)); + if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { + dev_dbg(kbdev->dev, + "Reclaim scan see device busy (freed: 0, number to scan: %lu)\n", + sc->nr_to_scan); + return 0; + } + } + + avail = atomic_read(&mgr->mgr_scan_pages); + if (avail) { + freed = reclaim_free_counted_heap_pages(kbdev, shrink_ctrl); + if (freed < sc->nr_to_scan && atomic_read(&mgr->est_cand_pages)) + count = reclaim_count_candidates_heap_pages(kbdev, freed, shrink_ctrl); + } else { + count = reclaim_count_candidates_heap_pages(kbdev, freed, shrink_ctrl); + } + + /* If having done count in this call, try reclaim free again */ + if (count) + freed += reclaim_free_counted_heap_pages(kbdev, shrink_ctrl); + + mutex_unlock(&kbdev->csf.scheduler.lock); + + dev_info(kbdev->dev, + "Reclaim scan freed pages: %lu (avail: %lu, extra: %lu, number to scan: %lu)\n", + freed, avail, count, sc->nr_to_scan); + + /* On no avilablity, and with no new extra count, return STOP */ + if (!avail && !count) + return SHRINK_STOP; + else + return freed; +} diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h index 7c39415..358d18a 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.h +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,7 @@ #include "mali_kbase_csf.h" #include "mali_kbase_csf_event.h" +#include "mali_kbase_csf_tiler_heap_def.h" /** * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue @@ -472,7 +473,7 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) } /** - * kbase_csf_scheduler_advance_tick_nolock() - Advance the scheduling tick + * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick * * @kbdev: Pointer to the device * @@ -482,23 +483,23 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) * The caller must hold the interrupt lock. */ static inline void -kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev) +kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; lockdep_assert_held(&scheduler->interrupt_lock); if (scheduler->tick_timer_active) { - KBASE_KTRACE_ADD(kbdev, SCHEDULER_ADVANCE_TICK, NULL, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u); scheduler->tick_timer_active = false; kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work); } else { - KBASE_KTRACE_ADD(kbdev, SCHEDULER_NOADVANCE_TICK, NULL, 0u); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u); } } /** - * kbase_csf_scheduler_advance_tick() - Advance the scheduling tick + * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick * * @kbdev: Pointer to the device * @@ -506,13 +507,13 @@ kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev) * immediate execution, but only if the tick hrtimer is active. If the timer * is inactive then the tick work item is already in flight. */ -static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev) +static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; unsigned long flags; spin_lock_irqsave(&scheduler->interrupt_lock, flags); - kbase_csf_scheduler_advance_tick_nolock(kbdev); + kbase_csf_scheduler_tick_advance_nolock(kbdev); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } @@ -537,6 +538,22 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) } /** + * kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock + * + * @kbdev: Pointer to the device + * + * This function will queue the scheduling tock work item for immediate + * execution. + */ +static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false) + kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0); +} + +/** * kbase_csf_scheduler_queue_has_trace() - report whether the queue has been * configured to operate with the * cs_trace feature. @@ -674,4 +691,35 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); void turn_on_sc_power_rails(struct kbase_device *kbdev); #endif +/* Forward declaration */ +struct kbase_csf_tiler_heap_shrink_control; + +/** + * kbase_csf_scheduler_count_free_heap_pages() - Undertake shrinker reclaim count action + * + * @kbdev: Pointer to the device + * @shrink_ctrl: Pointer to the kbase CSF schrink control object. + * + * This function is called from CSF tiler heap memory shrinker reclaim 'count_objects' operation. + * + * Return: number of potentially freeable tiler heap pages. + */ +unsigned long +kbase_csf_scheduler_count_free_heap_pages(struct kbase_device *kbdev, + struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl); + +/** + * kbase_csf_scheduler_scan_free_heap_pages() - Undertake shrinker reclaim scan action + * + * @kbdev: Pointer to the device + * @shrink_ctrl: Pointer to the kbase CSF schrink control object. + * + * This function is called from CSF tiler heap memory shrinker reclaim 'scan_objects' operation. + * + * Return: number of actually freed tiler heap pagess. + */ +unsigned long +kbase_csf_scheduler_scan_free_heap_pages(struct kbase_device *kbdev, + struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl); + #endif /* _KBASE_CSF_SCHEDULER_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 85babf9..b0d3825 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -179,9 +179,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap, int err = 0; struct kbase_context *const kctx = heap->kctx; u64 nr_pages = PFN_UP(heap->chunk_size); - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | - BASE_MEM_COHERENT_LOCAL; + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | + BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; struct kbase_csf_tiler_heap_chunk *chunk = NULL; /* Calls to this function are inherently synchronous, with respect to @@ -191,10 +190,6 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap, flags |= kbase_mem_group_id_set(kctx->jit_group_id); -#if defined(CONFIG_MALI_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP) - flags |= BASE_MEM_PROT_CPU_RD; -#endif - chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); if (unlikely(!chunk)) { dev_err(kctx->kbdev->dev, @@ -234,26 +229,39 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap, return err; } +static void mark_free_mem_bypassing_pool(struct kbase_va_region *reg) +{ + if (WARN_ON(reg->gpu_alloc == NULL)) + return; + + reg->gpu_alloc->evicted = reg->gpu_alloc->nents; + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); +} + /** * delete_chunk - Delete a tiler heap chunk * * @heap: Pointer to the tiler heap for which @chunk was allocated. * @chunk: Pointer to a chunk to be deleted. + * @reclaim: Indicating the deletion is from shrinking reclaim or not. * * This function frees a tiler heap chunk previously allocated by @create_chunk * and removes it from the list of chunks associated with the heap. * * WARNING: The deleted chunk is not unlinked from the list of chunks used by * the GPU, therefore it is only safe to use this function when - * deleting a heap. + * deleting a heap, or under reclaim operations when the relevant CSGS + * are off-slots for the given kctx. */ static void delete_chunk(struct kbase_csf_tiler_heap *const heap, - struct kbase_csf_tiler_heap_chunk *const chunk) + struct kbase_csf_tiler_heap_chunk *const chunk, bool reclaim) { struct kbase_context *const kctx = heap->kctx; kbase_gpu_vm_lock(kctx); chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + if (reclaim) + mark_free_mem_bypassing_pool(chunk->region); kbase_mem_free_region(kctx, chunk->region); kbase_gpu_vm_unlock(kctx); list_del(&chunk->link); @@ -277,7 +285,7 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) struct kbase_csf_tiler_heap_chunk *chunk = list_entry( entry, struct kbase_csf_tiler_heap_chunk, link); - delete_chunk(heap, chunk); + delete_chunk(heap, chunk, false); } } @@ -334,12 +342,19 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) heap->gpu_va); list_del(&heap->link); + atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages); WARN_ON(heap->chunk_count); KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, heap->target_in_flight, 0); + if (heap->buf_desc_va) { + kbase_gpu_vm_lock(kctx); + heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_gpu_vm_unlock(kctx); + } + kfree(heap); } @@ -385,6 +400,7 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list); mutex_init(&kctx->csf.tiler_heaps.lock); + atomic_set(&kctx->csf.tiler_heaps.est_count_pages, 0); dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n"); @@ -405,25 +421,27 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) delete_heap(heap); } + WARN_ON(atomic_read(&kctx->csf.tiler_heaps.est_count_pages) != 0); mutex_unlock(&kctx->csf.tiler_heaps.lock); mutex_destroy(&kctx->csf.tiler_heaps.lock); kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); } -int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, - u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks, - u16 const target_in_flight, u64 *const heap_gpu_va, - u64 *const first_chunk_va) +int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size, + u32 const initial_chunks, u32 const max_chunks, + u16 const target_in_flight, u64 const buf_desc_va, + u64 *const heap_gpu_va, u64 *const first_chunk_va) { int err = 0; struct kbase_csf_tiler_heap *heap = NULL; struct kbase_csf_heap_context_allocator *const ctx_alloc = &kctx->csf.tiler_heaps.ctx_alloc; + struct kbase_va_region *reg = NULL; dev_dbg(kctx->kbdev->dev, - "Creating a tiler heap with %u chunks (limit: %u) of size %u\n", - initial_chunks, max_chunks, chunk_size); + "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx", + initial_chunks, max_chunks, chunk_size, buf_desc_va); if (!kbase_mem_allow_alloc(kctx)) return -EINVAL; @@ -443,17 +461,35 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, if (target_in_flight == 0) return -EINVAL; + /* Check on the buffer descriptor virtual Address */ + if (buf_desc_va) { + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va); + if (kbase_is_region_invalid_or_free(reg) || !(reg->flags & KBASE_REG_CPU_RD) || + (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + reg->flags |= KBASE_REG_NO_USER_FREE; + kbase_gpu_vm_unlock(kctx); + } + heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (unlikely(!heap)) { - dev_err(kctx->kbdev->dev, - "No kernel memory for a new tiler heap\n"); - return -ENOMEM; + dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap"); + err = -ENOMEM; + goto err_out; } heap->kctx = kctx; heap->chunk_size = chunk_size; heap->max_chunks = max_chunks; heap->target_in_flight = target_in_flight; + heap->buf_desc_va = buf_desc_va; + heap->buf_desc_reg = reg; + heap->desc_chk_flags = 0; + heap->desc_chk_cnt = 0; INIT_LIST_HEAD(&heap->chunks_list); heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); @@ -468,9 +504,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va); } - if (unlikely(err)) { - kfree(heap); - } else { + if (likely(!err)) { struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry( &heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); @@ -494,16 +528,27 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va); } #endif + kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; + kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count; + if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) + kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; - dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va); + /* Assuming at least one chunk reclaimable per heap on (estimated) count */ + atomic_add(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages); + dev_dbg(kctx->kbdev->dev, + "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d", + heap->gpu_va, buf_desc_va, kctx->tgid, kctx->id); mutex_unlock(&kctx->csf.tiler_heaps.lock); - kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; - kctx->running_total_tiler_heap_memory += - heap->chunk_size * heap->chunk_count; - if (kctx->running_total_tiler_heap_memory > - kctx->peak_total_tiler_heap_memory) - kctx->peak_total_tiler_heap_memory = - kctx->running_total_tiler_heap_memory; + + return 0; + } + +err_out: + kfree(heap); + if (buf_desc_va) { + kbase_gpu_vm_lock(kctx); + reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_gpu_vm_unlock(kctx); } return err; } @@ -526,7 +571,6 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, } else err = -EINVAL; - mutex_unlock(&kctx->csf.tiler_heaps.lock); if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) kctx->running_total_tiler_heap_memory -= heap_size; else @@ -537,6 +581,11 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, else dev_warn(kctx->kbdev->dev, "Running total tiler chunk count lower than expected!"); + if (!err) + dev_dbg(kctx->kbdev->dev, + "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d", + heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id); + mutex_unlock(&kctx->csf.tiler_heaps.lock); return err; } @@ -637,3 +686,352 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, return err; } + +static bool delete_chunk_from_gpu_va(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va, + u64 *hdr_val) +{ + struct kbase_context *kctx = heap->kctx; + struct kbase_csf_tiler_heap_chunk *chunk; + + list_for_each_entry(chunk, &heap->chunks_list, link) { + struct kbase_vmap_struct map; + u64 *chunk_hdr; + + if (chunk->gpu_va != chunk_gpu_va) + continue; + /* Found it, extract next chunk header before delete it */ + chunk_hdr = kbase_vmap_prot(kctx, chunk_gpu_va, sizeof(*chunk_hdr), + KBASE_REG_CPU_RD, &map); + + if (unlikely(!chunk_hdr)) { + dev_warn( + kctx->kbdev->dev, + "Failed to map tiler heap(0x%llX) chunk(0x%llX) for reclaim extract next header", + heap->gpu_va, chunk_gpu_va); + return false; + } + + *hdr_val = *chunk_hdr; + kbase_vunmap(kctx, &map); + + dev_dbg(kctx->kbdev->dev, + "Scan reclaim delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)", + chunk_gpu_va, heap->gpu_va, *hdr_val); + delete_chunk(heap, chunk, true); + + return true; + } + + dev_warn(kctx->kbdev->dev, + "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete", heap->gpu_va, + chunk_gpu_va); + return false; +} + +static bool heap_buffer_decsriptor_checked(struct kbase_csf_tiler_heap *const heap) +{ + return heap->desc_chk_flags & HEAP_BUF_DESCRIPTOR_CHECKED; +} + +static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, + struct kbase_csf_gpu_buffer_heap *desc) +{ + u64 ptr_addr = desc->pointer & CHUNK_ADDR_MASK; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + if (ptr_addr) { + struct kbase_csf_tiler_heap_chunk *chunk; + + /* desc->pointer must be a chunk in the given heap */ + list_for_each_entry(chunk, &heap->chunks_list, link) { + if (chunk->gpu_va == ptr_addr) { + dev_dbg(heap->kctx->kbdev->dev, + "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed", + heap->buf_desc_va); + + heap->desc_chk_flags = HEAP_BUF_DESCRIPTOR_CHECKED; + return; + } + } + } + /* If there is no match, defer the check to next time */ + dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred", + heap->buf_desc_va); +} + +static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *ptr_u64) +{ + struct kbase_context *kctx = heap->kctx; + bool checked = false; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + /* Initialize the descriptor pointer value to 0 */ + *ptr_u64 = 0; + + if (heap_buffer_decsriptor_checked(heap)) + return true; + + /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */ + if (heap->buf_desc_va) { + struct kbase_vmap_struct map; + struct kbase_csf_gpu_buffer_heap *desc = kbase_vmap_prot( + kctx, heap->buf_desc_va, sizeof(*desc), KBASE_REG_CPU_RD, &map); + + if (unlikely(!desc)) { + dev_warn_once(kctx->kbdev->dev, + "Sanity check: buffer descriptor 0x%llX map failed", + heap->buf_desc_va); + goto out; + } + + sanity_check_gpu_buffer_heap(heap, desc); + checked = heap_buffer_decsriptor_checked(heap); + if (checked) + *ptr_u64 = desc->pointer & CHUNK_ADDR_MASK; + + kbase_vunmap(kctx, &map); + } + +out: + return checked; +} + +static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap) +{ + u32 freed = 0; + u64 gpu_va = 0; + struct kbase_context *kctx = heap->kctx; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + if (can_read_hw_gpu_buffer_heap(heap, &gpu_va)) { + u64 chunk_hdr_val; + u64 *hw_hdr; + struct kbase_vmap_struct map; + + if (!gpu_va) { + struct kbase_csf_gpu_buffer_heap *desc = kbase_vmap_prot( + kctx, heap->buf_desc_va, sizeof(*desc), KBASE_REG_CPU_RD, &map); + + if (unlikely(!desc)) { + dev_warn( + kctx->kbdev->dev, + "Failed to map Buffer descriptor 0x%llX for HW reclaim scan", + heap->buf_desc_va); + goto out; + } + + gpu_va = desc->pointer & CHUNK_ADDR_MASK; + kbase_vunmap(kctx, &map); + + if (!gpu_va) { + dev_dbg(kctx->kbdev->dev, + "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan", + heap->buf_desc_va); + goto out; + } + } + + /* Map the HW chunk header here with RD/WR for likely update */ + hw_hdr = kbase_vmap_prot(kctx, gpu_va, sizeof(*hw_hdr), + KBASE_REG_CPU_RD | KBASE_REG_CPU_WR, &map); + if (unlikely(!hw_hdr)) { + dev_warn(kctx->kbdev->dev, + "Failed to map HW chnker header 0x%llX for HW reclaim scan", + gpu_va); + goto out; + } + + /* Move onto the next chunk relevant information */ + chunk_hdr_val = *hw_hdr; + gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + + while (gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { + bool success = delete_chunk_from_gpu_va(heap, gpu_va, &chunk_hdr_val); + + if (!success) + break; + + freed++; + /* On success, chunk_hdr_val is updated, extract the next chunk address */ + gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + } + + /* Update the existing hardware chunk header, after reclaim deletion of chunks */ + *hw_hdr = chunk_hdr_val; + kbase_vunmap(kctx, &map); + dev_dbg(heap->kctx->kbdev->dev, + "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX", freed, + chunk_hdr_val); + } else + dev_dbg(kctx->kbdev->dev, + "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)", + heap->buf_desc_va); + +out: + return freed; +} + +static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap) +{ + u32 freed_chunks = 0; + u64 freed_pages = 0; + u64 gpu_va; + u64 chunk_hdr_val; + struct kbase_context *kctx = heap->kctx; + unsigned long prot = KBASE_REG_CPU_RD | KBASE_REG_CPU_WR; + struct kbase_vmap_struct map; + u64 *ctx_ptr; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + ctx_ptr = kbase_vmap_prot(kctx, heap->gpu_va, sizeof(*ctx_ptr), prot, &map); + if (unlikely(!ctx_ptr)) { + dev_dbg(kctx->kbdev->dev, + "Failed to map tiler heap context 0x%llX for reclaim_scan", heap->gpu_va); + goto out; + } + + /* Extract the first chunk address from the context's free_list_head */ + chunk_hdr_val = *ctx_ptr; + gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + + while (gpu_va) { + u64 hdr_val; + bool success = delete_chunk_from_gpu_va(heap, gpu_va, &hdr_val); + + if (!success) + break; + + freed_chunks++; + chunk_hdr_val = hdr_val; + /* extract the next chunk address */ + gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + } + + /* Update the post-scan deletion to context header */ + *ctx_ptr = chunk_hdr_val; + kbase_vunmap(kctx, &map); + + /* Try to scan the HW hoarded list of unused chunks */ + freed_chunks += delete_hoarded_chunks(heap); + freed_pages = freed_chunks * PFN_UP(heap->chunk_size); + dev_dbg(heap->kctx->kbdev->dev, + "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX", + freed_chunks, freed_pages, chunk_hdr_val); + + /* Update context tiler heaps memory usage */ + kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT; + kctx->running_total_tiler_heap_nr_chunks -= freed_chunks; +out: + return freed_pages; +} + +static u32 scan_kctx_unused_heap_pages_cb(struct kbase_context *kctx, u32 to_free) +{ + u64 freed = 0; + struct kbase_csf_tiler_heap *heap; + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { + freed += delete_unused_chunk_pages(heap); + /* If freed enough, then stop here */ + if (freed >= to_free) + break; + } + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + /* The scan is surely not more than 4-G pages, but for logic flow limit it */ + if (WARN_ON(unlikely(freed > U32_MAX))) + return U32_MAX; + else + return (u32)freed; +} + +static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap) +{ + u32 chunk_cnt = 0; + u64 page_cnt = 0; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping + * in the chunk list walk. The downside is that the number is a less reliable guide for + * later on scan (free) calls on this heap for what actually is freeable. + */ + if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { + chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT; + page_cnt = chunk_cnt * PFN_UP(heap->chunk_size); + } + + dev_dbg(heap->kctx->kbdev->dev, + "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX", chunk_cnt, + page_cnt, heap->gpu_va); + + return page_cnt; +} + +static u32 count_kctx_unused_heap_pages_cb(struct kbase_context *kctx) +{ + u64 page_cnt = 0; + struct kbase_csf_tiler_heap *heap; + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) + page_cnt += count_unused_heap_pages(heap); + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + /* The count is surely not more than 4-G pages, but for logic flow limit it */ + if (WARN_ON(unlikely(page_cnt > U32_MAX))) + return U32_MAX; + else + return (u32)page_cnt; +} + +static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_device *kbdev = container_of(s, struct kbase_device, csf.tiler_heap_reclaim); + struct kbase_csf_tiler_heap_shrink_control shrink_ctrl = { + .sc = sc, + .count_cb = count_kctx_unused_heap_pages_cb, + .scan_cb = scan_kctx_unused_heap_pages_cb, + }; + + return kbase_csf_scheduler_count_free_heap_pages(kbdev, &shrink_ctrl); +} + +static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_device *kbdev = container_of(s, struct kbase_device, csf.tiler_heap_reclaim); + struct kbase_csf_tiler_heap_shrink_control shrink_ctrl = { + .sc = sc, + .count_cb = count_kctx_unused_heap_pages_cb, + .scan_cb = scan_kctx_unused_heap_pages_cb, + }; + + return kbase_csf_scheduler_scan_free_heap_pages(kbdev, &shrink_ctrl); +} + +void kbase_csf_tiler_heap_register_shrinker(struct kbase_device *kbdev) +{ + struct shrinker *reclaim = &kbdev->csf.tiler_heap_reclaim; + + reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects; + reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; + reclaim->seeks = HEAP_SHRINKER_SEEKS; + reclaim->batch = HEAP_SHRINKER_BATCH; + + register_shrinker(reclaim); +} + +void kbase_csf_tiler_heap_unregister_shrinker(struct kbase_device *kbdev) +{ + unregister_shrinker(&kbdev->csf.tiler_heap_reclaim); +} diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h index 4031ad4..da60c59 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,6 @@ #define _KBASE_CSF_TILER_HEAP_H_ #include <mali_kbase.h> - /** * kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a * GPU address space @@ -58,6 +57,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); * @target_in_flight: Number of render-passes that the driver should attempt to * keep in flight for which allocation of new chunks is * allowed. Must not be zero. + * @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for + * indicating that the caller is intending to perform tiler heap + * chunks reclaim for those that are hoarded with hardware while + * the associated shader activites are suspended and the CSGs are + * off slots. If the referred reclaiming is not desired, can + * set it to 0. * @gpu_heap_va: Where to store the GPU virtual address of the context that was * set up for the tiler heap. * @first_chunk_va: Where to store the GPU virtual address of the first chunk @@ -66,13 +71,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); * * Return: 0 if successful or a negative error code on failure. */ -int kbase_csf_tiler_heap_init(struct kbase_context *kctx, - u32 chunk_size, u32 initial_chunks, u32 max_chunks, - u16 target_in_flight, u64 *gpu_heap_va, - u64 *first_chunk_va); +int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks, + u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va, + u64 *gpu_heap_va, u64 *first_chunk_va); /** - * kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap. + * kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap. * * @kctx: Pointer to the kbase context in which the tiler heap was initialized. * @gpu_heap_va: The GPU virtual address of the context that was set up for the @@ -112,4 +116,21 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); */ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); + +/** + * kbase_csf_tiler_heap_register_shrinker - Register shrinker for tiler heap. + * + * @kbdev: Pointer to the device. + * + */ +void kbase_csf_tiler_heap_register_shrinker(struct kbase_device *kbdev); + +/** + * kbase_csf_tiler_heap_unregister_shrinker - Unregister shrinker for tiler heap on device + * shut down. + * + * @kbdev: Pointer to the device. + * + */ +void kbase_csf_tiler_heap_unregister_shrinker(struct kbase_device *kbdev); #endif diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h index 2c006d9..70dbb6c 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h @@ -56,6 +56,15 @@ ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) +/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */ +#define HEAP_SHRINK_STOP_LIMIT (1) + +/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */ +#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2) + +/* Tiler heap shrinker batch value */ +#define HEAP_SHRINKER_BATCH (512) + /** * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel * @@ -78,6 +87,8 @@ struct kbase_csf_tiler_heap_chunk { u64 gpu_va; }; +#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0) + /** * struct kbase_csf_tiler_heap - A tiler heap managed by the kernel * @@ -85,6 +96,16 @@ struct kbase_csf_tiler_heap_chunk { * associated. * @link: Link to this heap in a list of tiler heaps belonging to * the @kbase_csf_tiler_heap_context. + * @chunks_list: Linked list of allocated chunks. + * @gpu_va: The GPU virtual address of the heap context structure that + * was allocated for the firmware. This is also used to + * uniquely identify the heap. + * @heap_id: Unique id representing the heap, assigned during heap + * initialization. + * @buf_desc_va: Buffer decsriptor GPU VA. Can be 0 for backward compatible + * to earlier version base interfaces. + * @buf_desc_reg: Pointer to the VA region that covers the provided buffer + * descriptor memory object pointed to by buf_desc_va. * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. * @chunk_count: The number of chunks currently allocated. Must not be * zero or greater than @max_chunks. @@ -93,22 +114,56 @@ struct kbase_csf_tiler_heap_chunk { * @target_in_flight: Number of render-passes that the driver should attempt * to keep in flight for which allocation of new chunks is * allowed. Must not be zero. - * @gpu_va: The GPU virtual address of the heap context structure that - * was allocated for the firmware. This is also used to - * uniquely identify the heap. - * @heap_id: Unique id representing the heap, assigned during heap - * initialization. - * @chunks_list: Linked list of allocated chunks. + * @desc_chk_flags: Runtime sanity check flags on heap chunk reclaim. + * @desc_chk_cnt: Counter for providing a deferral gap if runtime sanity check + * needs to be retried later. */ struct kbase_csf_tiler_heap { struct kbase_context *kctx; struct list_head link; + struct list_head chunks_list; + u64 gpu_va; + u64 heap_id; + u64 buf_desc_va; + struct kbase_va_region *buf_desc_reg; u32 chunk_size; u32 chunk_count; u32 max_chunks; u16 target_in_flight; - u64 gpu_va; - u64 heap_id; - struct list_head chunks_list; + u8 desc_chk_flags; + u8 desc_chk_cnt; +}; + +/** + * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap + * + * @cdsbp_0: Descriptor_type and buffer_type + * @size: The size of the current heap chunk + * @pointer: Pointer to the current heap chunk + * @low_pointer: Pointer to low end of current heap chunk + * @high_pointer: Pointer to high end of current heap chunk + */ +struct kbase_csf_gpu_buffer_heap { + u32 cdsbp_0; + u32 size; + u64 pointer; + u64 low_pointer; + u64 high_pointer; +} __packed; + +/** + * struct kbase_csf_tiler_heap_shrink_control - Kbase wraper object that wraps around + * kernel shrink_control + * + * @sc: Pointer to shrinker control object in reclaim callback. + * @count_cb: Functin pointer for counting tiler heap free list. + * @scan_cb: Functin pointer for counting tiler heap free list. + */ + +struct kbase_csf_tiler_heap_shrink_control { + struct shrink_control *sc; + u32 (*count_cb)(struct kbase_context *kctx); + u32 (*scan_cb)(struct kbase_context *kctx, u32 pages); }; + #endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c index f40be8f..27677ba 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,10 +38,7 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) #include "tl/mali_kbase_timeline_priv.h" #include <linux/debugfs.h> - -#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) -#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE -#endif +#include <linux/version_compat_defs.h> #endif /* Name of the CSFFW timeline tracebuffer. */ @@ -301,7 +298,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) dev_warn( kbdev->dev, "Unable to parse CSFFW tracebuffer event header."); - ret = -EBUSY; + ret = -EBUSY; break; } @@ -322,7 +319,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) dev_warn(kbdev->dev, "event_id: %u, can't read with event_size: %u.", event_id, event_size); - ret = -EBUSY; + ret = -EBUSY; break; } diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c index c6b89f5..9ce6776 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,12 +28,7 @@ #include <linux/list.h> #include <linux/mman.h> - -#if IS_ENABLED(CONFIG_DEBUG_FS) -#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) -#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE -#endif -#endif +#include <linux/version_compat_defs.h> /** * struct firmware_trace_buffer - Trace Buffer within the MCU firmware @@ -127,9 +122,9 @@ static const struct firmware_trace_buffer_data trace_buffer_data[] = { #endif #ifdef CONFIG_MALI_PIXEL_GPU_SSCD /* Enable all the logs */ - { FW_TRACE_BUF_NAME, { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES }, + { FIRMWARE_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES }, #else - { FW_TRACE_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES }, + { FIRMWARE_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES }, #endif /* CONFIG_MALI_PIXEL_GPU_SSCD */ { "benchmark", { 0 }, 2 }, { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, @@ -517,10 +512,16 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data( } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); -#if IS_ENABLED(CONFIG_DEBUG_FS) +static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) +{ + unsigned int i; + + for (i = 0; i < tb->trace_enable_entry_count; i++) + kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1); +} #define U32_BITS 32 -static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb) +u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb) { u64 active_mask = tb->trace_enable_init_mask[0]; @@ -530,18 +531,7 @@ static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb) return active_mask; } -static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, - u64 mask) -{ - unsigned int i; - - for (i = 0; i < tb->trace_enable_entry_count; i++) - kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( - tb, i, (mask >> i) & 1); -} - -static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, - u64 mask) +int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { struct kbase_device *kbdev = tb->kbdev; unsigned long flags; @@ -569,124 +559,3 @@ static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, return err; } - -static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - - if (tb == NULL) { - dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); - return -EIO; - } - /* The enabled traces limited to u64 here, regarded practical */ - *val = get_trace_buffer_active_mask64(tb); - return 0; -} - -static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - u64 new_mask; - unsigned int enable_bits_count; - - if (tb == NULL) { - dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); - return -EIO; - } - - /* Ignore unsupported types */ - enable_bits_count = - kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); - if (enable_bits_count > 64) { - dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", - enable_bits_count); - enable_bits_count = 64; - } - new_mask = val & ((1 << enable_bits_count) - 1); - - if (new_mask != get_trace_buffer_active_mask64(tb)) - return set_trace_buffer_active_mask64(tb, new_mask); - else - return 0; -} - -static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in, - struct file *file) -{ - struct kbase_device *kbdev = in->i_private; - - file->private_data = kbdev; - dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); - - return 0; -} - -static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file, - char __user *buf, size_t size, loff_t *ppos) -{ - struct kbase_device *kbdev = file->private_data; - u8 *pbyte; - unsigned int n_read; - unsigned long not_copied; - /* Limit the kernel buffer to no more than two pages */ - size_t mem = MIN(size, 2 * PAGE_SIZE); - unsigned long flags; - - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - - if (tb == NULL) { - dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); - return -EIO; - } - - pbyte = kmalloc(mem, GFP_KERNEL); - if (pbyte == NULL) { - dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump"); - return -ENOMEM; - } - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Do the copy, if we have obtained some trace data */ - not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0; - kfree(pbyte); - - if (!not_copied) { - *ppos += n_read; - return n_read; - } - - dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); - return -EFAULT; -} - - -DEFINE_SIMPLE_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops, - kbase_csf_firmware_trace_enable_mask_read, - kbase_csf_firmware_trace_enable_mask_write, "%llx\n"); - -static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = { - .owner = THIS_MODULE, - .open = kbasep_csf_firmware_trace_debugfs_open, - .read = kbasep_csf_firmware_trace_debugfs_read, - .llseek = no_llseek, -}; - -void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("fw_trace_enable_mask", 0644, - kbdev->mali_debugfs_directory, kbdev, - &kbase_csf_firmware_trace_enable_mask_fops); - - debugfs_create_file("fw_traces", 0444, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_firmware_trace_debugfs_fops); -} -#endif /* CONFIG_DEBUG_FS */ diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h index 6c3907c..037dc22 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ #include <linux/types.h> #define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) -#define FW_TRACE_BUF_NAME "fwlog" +#define FIRMWARE_LOG_BUF_NAME "fwlog" #define FW_TRACE_BUF_NR_PAGES 4 /* Forward declarations */ @@ -59,7 +59,7 @@ struct kbase_device; int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev); /** - * kbase_csf_firmware_trace_buffer_term - Terminate trace buffers + * kbase_csf_firmware_trace_buffers_term - Terminate trace buffers * * @kbdev: Device pointer */ @@ -166,15 +166,23 @@ bool kbase_csf_firmware_trace_buffer_is_empty( unsigned int kbase_csf_firmware_trace_buffer_read_data( struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); -#if IS_ENABLED(CONFIG_DEBUG_FS) /** - * kbase_csf_fw_trace_buffer_debugfs_init() - Add debugfs entries for setting - * enable mask and dumping the binary - * firmware trace buffer + * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask * - * @kbdev: Pointer to the device + * @tb: Trace buffer handle + * + * Return: Trace buffer active mask. + */ +u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb); + +/** + * kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask + * + * @tb: Trace buffer handle + * @mask: New active mask + * + * Return: 0 if successful, negative error code on failure. */ -void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev); -#endif /* CONFIG_DEBUG_FS */ +int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask); #endif /* _KBASE_CSF_TRACE_BUFFER_H_ */ diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h index 2506ce1..9e4da9f 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,67 +42,67 @@ int dummy_array[] = { /* * Generic CSF events */ - KBASE_KTRACE_CODE_MAKE_CODE(EVICT_CTX_SLOTS), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), /* info_val[0:7] == fw version_minor * info_val[15:8] == fw version_major * info_val[63:32] == fw version_hash */ - KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_BOOT), - KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_REBOOT), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START), KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END), /* info_val == total number of runnable groups across all kctxs */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START), KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START), /* info_val = timeout in ms */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START), /* info_val = remaining ms timeout, or 0 if timedout */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT_DONE), - KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT), - KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT_NOTIFY_GPU), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_END), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_EVENT), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT), /* info_val = JOB_IRQ_STATUS */ - KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_START), /* info_val = JOB_IRQ_STATUS */ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END), /* info_val = JOB_IRQ_STATUS */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_START), /* info_val = GLB_REQ ^ GLB_ACQ */ - KBASE_KTRACE_CODE_MAKE_CODE(GLB_REQ_ACQ), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_GLB_REQ_ACK), /* info_val[31:0] = num non idle offslot groups * info_val[32] = scheduler can suspend on idle */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CAN_IDLE), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ADVANCE_TICK), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NOADVANCE_TICK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_ADVANCE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_NOADVANCE), /* kctx is added to the back of the list */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_INSERT_RUNNABLE), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_REMOVE_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_INSERT), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_REMOVE), /* kctx is moved to the back of the list */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ROTATE_RUNNABLE), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HEAD_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_ROTATE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_HEAD), - KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_BEGIN), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_START), /* 4-bit encoding of boolean values (ease of reading as hex values) * * info_val[3:0] = was reset active/failed to be prevented * info_val[7:4] = whether scheduler was both idle and suspendable * info_val[11:8] = whether all groups were suspended */ - KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_END), - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_BEGIN), - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_END), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_END), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END), /* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */ - KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_UPDATE_IDLE_SLOTS_ACK), /* info_val[63:0] = GPU cycle counter, used mainly for benchmarking * purpose. */ - KBASE_KTRACE_CODE_MAKE_CODE(GPU_IDLE_HANDLING_START), - KBASE_KTRACE_CODE_MAKE_CODE(MCU_HALTED), - KBASE_KTRACE_CODE_MAKE_CODE(MCU_IN_SLEEP), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP), /* * Group events @@ -111,17 +111,17 @@ int dummy_array[] = { * info_val[19:16] == as_nr * info_val[63:32] == endpoint config (max number of endpoints allowed) */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START_REQ), /* info_val == CSG_REQ state issued */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP_REQ), /* info_val == CSG_ACK state */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STARTED), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_RUNNING), /* info_val == CSG_ACK state */ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED), /* info_val == slot cleaned */ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED), /* info_val = slot requesting STATUS_UPDATE */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STATUS_UPDATE), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_UPDATE_IDLE_SLOT_REQ), /* info_val = scheduler's new csg_slots_idle_mask[0] * group->csg_nr indicates which bit was set */ @@ -133,13 +133,13 @@ int dummy_array[] = { */ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR), /* info_val == previous priority */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_PRIO_UPDATE), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_PRIO_UPDATE), /* info_val == CSG_REQ ^ CSG_ACK */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_SYNC_UPDATE_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_SYNC_UPDATE), /* info_val == CSG_REQ ^ CSG_ACK */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_IDLE_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_IDLE), /* info_val == CSG_REQ ^ CSG_ACK */ - KBASE_KTRACE_CODE_MAKE_CODE(CSG_PROGRESS_TIMER_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROGRESS_TIMER_EVENT), /* info_val[31:0] == CSG_REQ ^ CSG_ACQ * info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK */ @@ -152,34 +152,34 @@ int dummy_array[] = { /* info_val[31:0] == new run state of the evicted group * info_val[63:32] == number of runnable groups */ - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT_SCHED), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT), /* info_val == new num_runnable_grps * group is added to the back of the list for its priority level */ - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_INSERT), /* info_val == new num_runnable_grps */ - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_REMOVE), /* info_val == num_runnable_grps * group is moved to the back of the list for its priority level */ - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_ROTATE_RUNNABLE), - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_ROTATE), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_HEAD), /* info_val == new num_idle_wait_grps * group is added to the back of the list */ - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_IDLE_WAIT), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_INSERT), /* info_val == new num_idle_wait_grps * group is added to the back of the list */ - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_IDLE_WAIT), - KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_IDLE_WAIT), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_REMOVE), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_HEAD), /* info_val == is scheduler running with protected mode tasks */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CHECK_PROTM_ENTER), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ENTER_PROTM), - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EXIT_PROTM), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER_CHECK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_EXIT), /* info_val[31:0] == number of GPU address space slots in use * info_val[63:32] == number of runnable groups */ @@ -187,11 +187,11 @@ int dummy_array[] = { /* info_val == new count of off-slot non-idle groups * no group indicates it was set rather than incremented */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_INC), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC), /* info_val == new count of off-slot non-idle groups */ - KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_DEC), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC), - KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_BEGIN), + KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START), KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), /* @@ -201,42 +201,42 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(CSI_START), /* info_val == queue->enabled before stop */ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), - KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQUESTED), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ), /* info_val == CS_REQ ^ CS_ACK that were not processed due to the group * being suspended */ - KBASE_KTRACE_CODE_MAKE_CODE(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED), /* info_val == CS_REQ ^ CS_ACK */ - KBASE_KTRACE_CODE_MAKE_CODE(CSI_FAULT_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_FAULT), /* info_val == CS_REQ ^ CS_ACK */ - KBASE_KTRACE_CODE_MAKE_CODE(CSI_TILER_OOM_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_TILER_OOM), /* info_val == CS_REQ ^ CS_ACK */ - KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_INTERRUPT), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_PROTM_PEND), /* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK), /* info_val == group->run_State (for group the queue is bound to) */ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), /* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_START), /* info_val == bool for result of the evaluation */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVALUATED), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_END), /* info_val == contents of CS_STATUS_WAIT */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_STATUS_WAIT), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_WAIT_STATUS), /* info_val == current sync value pointed to by queue->sync_ptr */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_CURRENT_VAL), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_CUR_VAL), /* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_TEST_VAL), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_TEST_VAL), /* info_val == current value of CS_STATUS_BLOCKED_REASON */ - KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_BLOCKED_REASON), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_BLOCKED_REASON), /* info_val = group's new protm_pending_bitmap[0] * queue->csi_index indicates which bit was set */ - KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_SET), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_SET), /* info_val = group's new protm_pending_bitmap[0] * queue->csi_index indicates which bit was cleared */ - KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_CLEAR), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_CLEAR), /* * KCPU queue events @@ -244,42 +244,42 @@ int dummy_array[] = { /* KTrace info_val == KCPU queue fence context * KCPU extra_info_val == N/A. */ - KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_NEW), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_CREATE), /* KTrace info_val == Number of pending commands in KCPU queue when * it is destroyed. * KCPU extra_info_val == Number of CQS wait operations present in * the KCPU queue when it is destroyed. */ - KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DESTROY), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DELETE), /* KTrace info_val == CQS event memory address * KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents * of error field. */ - KBASE_KTRACE_CODE_MAKE_CODE(CQS_SET), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_SET), /* KTrace info_val == Number of CQS objects to be waited upon * KCPU extra_info_val == N/A. */ - KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_START), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_START), /* KTrace info_val == CQS event memory address * KCPU extra_info_val == 1 if CQS was signaled with an error and queue * inherited the error, otherwise 0. */ - KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_END), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_END), /* KTrace info_val == Fence context * KCPU extra_info_val == Fence seqno. */ - KBASE_KTRACE_CODE_MAKE_CODE(FENCE_SIGNAL), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_SIGNAL), /* KTrace info_val == Fence context * KCPU extra_info_val == Fence seqno. */ - KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_START), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_START), /* KTrace info_val == Fence context * KCPU extra_info_val == Fence seqno. */ - KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_END), + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_END), #if 0 /* Dummy section to avoid breaking formatting */ }; #endif -/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + /* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c index 824ca4b..cff6f89 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c +++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -98,6 +98,9 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev, struct kbase_ktrace_msg *trace_msg; struct kbase_context *kctx = NULL; + if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) + return; + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); /* Reserve and update indices */ @@ -165,6 +168,9 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, struct kbase_ktrace_msg *trace_msg; struct kbase_context *kctx = queue->kctx; + if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) + return; + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); /* Reserve and update indices */ diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h index 7f32cd2..1896e10 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h +++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,7 +47,7 @@ * 1.3: * Add a lot of extra new traces. Tweak some existing scheduler related traces * to contain extra information information/happen at slightly different times. - * SCHEDULER_EXIT_PROTM now has group information + * SCHEDULER_PROTM_EXIT now has group information */ #define KBASE_KTRACE_VERSION_MAJOR 1 #define KBASE_KTRACE_VERSION_MINOR 3 diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c index 05d1677..6597a15 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c +++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,6 +80,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, unsigned long irqflags; struct kbase_ktrace_msg *trace_msg; + if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) + return; + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); /* Reserve and update indices */ diff --git a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h index 9ee7f81..86e81e5 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +++ b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,37 +30,36 @@ /* * Generic CSF events - using the common DEFINE_MALI_ADD_EVENT */ -DEFINE_MALI_ADD_EVENT(EVICT_CTX_SLOTS); -DEFINE_MALI_ADD_EVENT(FIRMWARE_BOOT); -DEFINE_MALI_ADD_EVENT(FIRMWARE_REBOOT); -DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START); +DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT); +DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START); DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END); -DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START); DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END); -DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET); -DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT); -DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT_DONE); -DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT); -DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT_NOTIFY_GPU); -DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT); +DEFINE_MALI_ADD_EVENT(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT); +DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_START); DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END); -DEFINE_MALI_ADD_EVENT(CSG_INTERRUPT_PROCESS); -DEFINE_MALI_ADD_EVENT(GLB_REQ_ACQ); -DEFINE_MALI_ADD_EVENT(SCHEDULER_CAN_IDLE); -DEFINE_MALI_ADD_EVENT(SCHEDULER_ADVANCE_TICK); -DEFINE_MALI_ADD_EVENT(SCHEDULER_NOADVANCE_TICK); -DEFINE_MALI_ADD_EVENT(SCHEDULER_INSERT_RUNNABLE); -DEFINE_MALI_ADD_EVENT(SCHEDULER_REMOVE_RUNNABLE); -DEFINE_MALI_ADD_EVENT(SCHEDULER_ROTATE_RUNNABLE); -DEFINE_MALI_ADD_EVENT(SCHEDULER_HEAD_RUNNABLE); -DEFINE_MALI_ADD_EVENT(IDLE_WORKER_BEGIN); -DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END); -DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN); -DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END); -DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK); -DEFINE_MALI_ADD_EVENT(GPU_IDLE_HANDLING_START); -DEFINE_MALI_ADD_EVENT(MCU_HALTED); -DEFINE_MALI_ADD_EVENT(MCU_IN_SLEEP); +DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_GLB_REQ_ACK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_ADVANCE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_NOADVANCE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_INSERT); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_REMOVE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_ROTATE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_HEAD); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_END); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END); +DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START); +DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED); +DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP); DECLARE_EVENT_CLASS(mali_csf_grp_q_template, TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, @@ -130,37 +129,38 @@ DECLARE_EVENT_CLASS(mali_csf_grp_q_template, __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ __entry->csg_nr, __entry->slot_prio, __entry->info_val)) -DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START); -DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP); -DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STARTED); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START_REQ); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP_REQ); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_RUNNING); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED); -DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STATUS_UPDATE); +DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR); -DEFINE_MALI_CSF_GRP_EVENT(CSG_PRIO_UPDATE); -DEFINE_MALI_CSF_GRP_EVENT(CSG_SYNC_UPDATE_INTERRUPT); -DEFINE_MALI_CSF_GRP_EVENT(CSG_IDLE_INTERRUPT); -DEFINE_MALI_CSF_GRP_EVENT(CSG_PROGRESS_TIMER_INTERRUPT); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_IDLE); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROGRESS_TIMER_EVENT); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_START); DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END); DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE); DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE); DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT_SCHED); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_RUNNABLE); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_RUNNABLE); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_ROTATE_RUNNABLE); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_RUNNABLE); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_IDLE_WAIT); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_IDLE_WAIT); -DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_IDLE_WAIT); -DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_CHECK_PROTM_ENTER); -DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_ENTER_PROTM); -DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_EXIT_PROTM); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_INSERT); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_REMOVE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_ROTATE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_HEAD); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_INSERT); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_REMOVE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_HEAD); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER_CHECK); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT); DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP); -DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_INC); -DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_DEC); -DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_BEGIN); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC); +DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START); DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); #undef DEFINE_MALI_CSF_GRP_EVENT @@ -176,22 +176,22 @@ DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START); DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP); -DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQUESTED); -DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND); -DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_FAULT_INTERRUPT); -DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_TILER_OOM_INTERRUPT); -DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_INTERRUPT); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQ); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_FAULT); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_TILER_OOM); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_PROTM_PEND); DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK); DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START); DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP); -DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE); -DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVALUATED); -DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_STATUS_WAIT); -DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_CURRENT_VAL); -DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_TEST_VAL); -DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_BLOCKED_REASON); -DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_SET); -DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_CLEAR); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_START); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_END); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_WAIT_STATUS); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_CUR_VAL); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_TEST_VAL); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_BLOCKED_REASON); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_SET); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_CLEAR); #undef DEFINE_MALI_CSF_GRP_Q_EVENT @@ -230,14 +230,14 @@ DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template, u64 info_val1, u64 info_val2), \ TP_ARGS(queue, info_val1, info_val2)) -DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_NEW); -DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DESTROY); -DEFINE_MALI_CSF_KCPU_EVENT(CQS_SET); -DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_START); -DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_END); -DEFINE_MALI_CSF_KCPU_EVENT(FENCE_SIGNAL); -DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_START); -DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_END); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_CREATE); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DELETE); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_SET); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_START); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_END); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_SIGNAL); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_START); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_END); #undef DEFINE_MALI_CSF_KCPU_EVENT diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace.c b/mali_kbase/debug/mali_kbase_debug_ktrace.c index 9bf8610..f521b47 100644 --- a/mali_kbase/debug/mali_kbase_debug_ktrace.c +++ b/mali_kbase/debug/mali_kbase_debug_ktrace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,13 +27,13 @@ int kbase_ktrace_init(struct kbase_device *kbdev) #if KBASE_KTRACE_TARGET_RBUF struct kbase_ktrace_msg *rbuf; + spin_lock_init(&kbdev->ktrace.lock); rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); if (!rbuf) return -EINVAL; kbdev->ktrace.rbuf = rbuf; - spin_lock_init(&kbdev->ktrace.lock); #endif /* KBASE_KTRACE_TARGET_RBUF */ return 0; } @@ -42,6 +42,7 @@ void kbase_ktrace_term(struct kbase_device *kbdev) { #if KBASE_KTRACE_TARGET_RBUF kfree(kbdev->ktrace.rbuf); + kbdev->ktrace.rbuf = NULL; #endif /* KBASE_KTRACE_TARGET_RBUF */ } @@ -183,6 +184,9 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, unsigned long irqflags; struct kbase_ktrace_msg *trace_msg; + if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) + return; + WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL)); spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace.h b/mali_kbase/debug/mali_kbase_debug_ktrace.h index f1e6d3d..31a15a0 100644 --- a/mali_kbase/debug/mali_kbase_debug_ktrace.h +++ b/mali_kbase/debug/mali_kbase_debug_ktrace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -82,6 +82,18 @@ void kbase_ktrace_debugfs_init(struct kbase_device *kbdev); */ #if KBASE_KTRACE_TARGET_RBUF /** + * kbasep_ktrace_initialized - Check whether kbase ktrace is initialized + * + * @ktrace: ktrace of kbase device. + * + * Return: true if ktrace has been initialized. + */ +static inline bool kbasep_ktrace_initialized(struct kbase_ktrace *ktrace) +{ + return ktrace->rbuf != NULL; +} + +/** * kbasep_ktrace_add - internal function to add trace to the ringbuffer. * @kbdev: kbase device * @code: ktrace code diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h b/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h index 4694b78..8d9e11e 100644 --- a/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h +++ b/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -138,8 +138,8 @@ enum kbase_ktrace_code { }; /** - * struct kbase_ktrace - object representing a trace message added to trace - * buffer trace_rbuf in &kbase_device + * struct kbase_ktrace_msg - object representing a trace message added to trace + * buffer trace_rbuf in &kbase_device * @timestamp: CPU timestamp at which the trace message was added. * @thread_id: id of the thread in the context of which trace message was * added. diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index 5325658..51abad0 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -43,6 +43,7 @@ #include <mali_kbase_hwcnt_virtualizer.h> #include <mali_kbase_kinstr_prfcnt.h> #include <mali_kbase_vinstr.h> +#include <tl/mali_kbase_timeline.h> /** * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC @@ -60,7 +61,7 @@ static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) kbase_vinstr_term(kbdev->vinstr_ctx); kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); } } @@ -191,7 +192,7 @@ static int kbase_csf_early_init(struct kbase_device *kbdev) } /** - * kbase_csf_early_init - Early termination for firmware & scheduler. + * kbase_csf_early_term() - Early termination for firmware & scheduler. * @kbdev: Device pointer */ static void kbase_csf_early_term(struct kbase_device *kbdev) @@ -200,6 +201,19 @@ static void kbase_csf_early_term(struct kbase_device *kbdev) } /** + * kbase_csf_late_init - late initialization for firmware. + * @kbdev: Device pointer + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_csf_late_init(struct kbase_device *kbdev) +{ + int err = kbase_csf_firmware_late_init(kbdev); + + return err; +} + +/** * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog * interface. * @kbdev: Device pointer @@ -269,59 +283,46 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) static const struct kbase_device_init dev_init[] = { #if IS_ENABLED(CONFIG_MALI_NO_MALI) - { kbase_gpu_device_create, kbase_gpu_device_destroy, - "Dummy model initialization failed" }, + { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, #endif - { power_control_init, power_control_term, - "Power control initialization failed" }, + { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, - { kbase_device_early_init, kbase_device_early_term, - "Early device initialization failed" }, - { kbase_device_populate_max_freq, NULL, - "Populating max frequency failed" }, + { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, + { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, + { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, "Priority control manager initialization failed" }, - { kbase_ctx_sched_init, kbase_ctx_sched_term, - "Context scheduler initialization failed" }, - { kbase_mem_init, kbase_mem_term, - "Memory subsystem initialization failed" }, + { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" }, + { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" }, { kbase_csf_protected_memory_init, kbase_csf_protected_memory_term, "Protected memory allocator initialization failed" }, { kbase_device_coherency_init, NULL, "Device coherency init failed" }, { kbase_protected_mode_init, kbase_protected_mode_term, "Protected mode subsystem initialization failed" }, - { kbase_device_list_init, kbase_device_list_term, - "Device list setup failed" }, + { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" }, { kbase_device_timeline_init, kbase_device_timeline_term, "Timeline stream initialization failed" }, { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, "Clock rate trace manager initialization failed" }, - { kbase_lowest_gpu_freq_init, NULL, - "Lowest freq initialization failed" }, - { kbase_device_hwcnt_watchdog_if_init, - kbase_device_hwcnt_watchdog_if_term, + { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, "GPU hwcnt backend watchdog interface creation failed" }, - { kbase_device_hwcnt_backend_csf_if_init, - kbase_device_hwcnt_backend_csf_if_term, + { kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term, "GPU hwcnt backend CSF interface creation failed" }, - { kbase_device_hwcnt_backend_csf_init, - kbase_device_hwcnt_backend_csf_term, + { kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term, "GPU hwcnt backend creation failed" }, { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, "GPU hwcnt context initialization failed" }, - { kbase_backend_late_init, kbase_backend_late_term, - "Late backend initialization failed" }, - { kbase_csf_early_init, kbase_csf_early_term, - "Early CSF initialization failed" }, + { kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" }, + { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, + { kbase_csf_late_init, NULL, "Late CSF initialization failed" }, { NULL, kbase_device_firmware_hwcnt_term, NULL }, - { kbase_device_debugfs_init, kbase_device_debugfs_term, - "DebugFS initialization failed" }, + { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, /* Sysfs init needs to happen before registering the device with * misc_register(), otherwise it causes a race condition between * registering the device and a uevent event being generated for @@ -339,8 +340,7 @@ static const struct kbase_device_init dev_init[] = { "Misc device registration failed" }, { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed" }, - { kbase_device_late_init, kbase_device_late_term, - "Late device initialization failed" }, + { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, }; static void kbase_device_term_partial(struct kbase_device *kbdev, @@ -468,7 +468,7 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->fw_load_lock); - err = kbase_csf_firmware_init(kbdev); + err = kbase_csf_firmware_load_init(kbdev); if (!err) { unsigned long flags; @@ -498,11 +498,12 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev) ret = kbase_device_hwcnt_csf_deferred_init(kbdev); if (ret) { - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); goto out; } kbase_csf_debugfs_init(kbdev); + kbase_timeline_io_debugfs_init(kbdev); out: kbase_pm_context_idle(kbdev); } diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c index 1e914d0..fcd0c50 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -177,7 +177,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +#ifdef CONFIG_MALI_DEBUG WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev)); +#endif kbase_pm_disable_db_mirror_interrupt(kbdev); kbdev->pm.backend.exit_gpu_sleep_mode = true; kbase_csf_scheduler_invoke_tick(kbdev); diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c index ff57cf6..e6f0197 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,9 +63,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) if (val & RESET_COMPLETED) kbase_pm_reset_done(kbdev); - if (val & PRFCNT_SAMPLE_COMPLETED) - kbase_instr_hwcnt_sample_done(kbdev); - /* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done. * We need to acquire hwaccess_lock to avoid a race condition with * kbase_gpu_cache_flush_and_busy_wait @@ -73,6 +70,13 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); + /* kbase_instr_hwcnt_sample_done frees the HWCNT pipeline to request another + * sample. Therefore this must be called after clearing the IRQ to avoid a + * race between clearing and the next sample raising the IRQ again. + */ + if (val & PRFCNT_SAMPLE_COMPLETED) + kbase_instr_hwcnt_sample_done(kbdev); + /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must * be called after the IRQ has been cleared. This is because it might * trigger further power transitions and we don't want to miss the @@ -105,8 +109,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) #if !IS_ENABLED(CONFIG_MALI_NO_MALI) void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + WARN_ON(!kbdev->pm.backend.gpu_powered); writel(value, kbdev->reg + offset); @@ -123,8 +126,7 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { u32 val; - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + WARN_ON(!kbdev->pm.backend.gpu_powered); val = readl(kbdev->reg + offset); diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index 260afef..9287d73 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -241,7 +241,7 @@ static const struct kbase_device_init dev_init[] = { "Timeline stream initialization failed" }, { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, "Clock rate trace manager initialization failed" }, - { kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, + { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, { kbase_instr_backend_init, kbase_instr_backend_term, "Instrumentation backend initialization failed" }, { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, @@ -326,20 +326,19 @@ int kbase_device_init(struct kbase_device *kbdev) if (err) return err; - kthread_init_worker(&kbdev->job_done_worker); - kbdev->job_done_worker_thread = kbase_create_realtime_thread(kbdev, + err = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &kbdev->job_done_worker, "mali_jd_thread"); - if (IS_ERR(kbdev->job_done_worker_thread)) - return PTR_ERR(kbdev->job_done_worker_thread); + if (err) + return err; err = kbase_pm_apc_init(kbdev); if (err) return err; kthread_init_worker(&kbdev->event_worker); - kbdev->event_worker_thread = kthread_run(kthread_worker_fn, - &kbdev->event_worker, "mali_event_thread"); - if (IS_ERR(kbdev->event_worker_thread)) { + kbdev->event_worker.task = + kthread_run(kthread_worker_fn, &kbdev->event_worker, "mali_event_thread"); + if (IS_ERR(kbdev->event_worker.task)) { err = -ENOMEM; } diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index c123010..9571830 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -279,9 +279,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) goto dma_set_mask_failed; - /* There is no limit for Mali, so set to max. We only do this if dma_parms - * is already allocated by the platform. - */ + /* There is no limit for Mali, so set to max. */ if (kbdev->dev->dma_parms) err = dma_set_max_seg_size(kbdev->dev, UINT_MAX); if (err) @@ -293,12 +291,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) if (err) goto dma_set_mask_failed; - err = kbase_ktrace_init(kbdev); - if (err) - goto term_as; err = kbase_pbha_read_dtb(kbdev); if (err) - goto term_ktrace; + goto term_as; init_waitqueue_head(&kbdev->cache_clean_wait); @@ -308,7 +303,11 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; - kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; +#if MALI_USE_CSF + kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); +#else + kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS; +#endif /* MALI_USE_CSF */ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); @@ -326,8 +325,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) } return 0; -term_ktrace: - kbase_ktrace_term(kbdev); term_as: kbase_device_all_as_term(kbdev); dma_set_mask_failed: @@ -344,9 +341,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev) #if KBASE_KTRACE_ENABLE kbase_debug_assert_register_hook(NULL, NULL); #endif - - kbase_ktrace_term(kbdev); - kbase_device_all_as_term(kbdev); @@ -484,10 +478,14 @@ int kbase_device_early_init(struct kbase_device *kbdev) { int err; + err = kbase_ktrace_init(kbdev); + if (err) + return err; + err = kbasep_platform_device_init(kbdev); if (err) - return err; + goto ktrace_term; err = kbase_pm_runtime_init(kbdev); if (err) @@ -501,7 +499,12 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* Ensure we can access the GPU registers */ kbase_pm_register_access_enable(kbdev); - /* Find out GPU properties based on the GPU feature registers */ + /* + * Find out GPU properties based on the GPU feature registers. + * Note that this does not populate the few properties that depend on + * hw_features being initialized. Those are set by kbase_gpuprops_set_features + * soon after this in the init process. + */ kbase_gpuprops_set(kbdev); /* We're done accessing the GPU registers for now. */ @@ -524,6 +527,8 @@ fail_interrupts: kbase_pm_runtime_term(kbdev); fail_runtime_pm: kbasep_platform_device_term(kbdev); +ktrace_term: + kbase_ktrace_term(kbdev); return err; } @@ -540,6 +545,7 @@ void kbase_device_early_term(struct kbase_device *kbdev) #endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); + kbase_ktrace_term(kbdev); } int kbase_device_late_init(struct kbase_device *kbdev) diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h index 5ff970a..6706a61 100644 --- a/mali_kbase/device/mali_kbase_device.h +++ b/mali_kbase/device/mali_kbase_device.h @@ -39,7 +39,7 @@ const struct list_head *kbase_device_get_list(void); void kbase_device_put_list(const struct list_head *dev_list); /** - * Kbase_increment_device_id - increment device id. + * kbase_increment_device_id - increment device id. * * Used to increment device id on successful initialization of the device. */ @@ -116,6 +116,22 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); bool kbase_is_gpu_removed(struct kbase_device *kbdev); /** + * kbase_gpu_cache_flush_pa_range_and_busy_wait() - Start a cache physical range flush + * and busy wait + * + * @kbdev: kbase device to issue the MMU operation on. + * @phys: Starting address of the physical range to start the operation on. + * @nr_bytes: Number of bytes to work on. + * @flush_op: Flush command register value to be sent to HW + * + * Issue a cache flush physical range command, then busy wait an irq status. + * This function will clear FLUSH_PA_RANGE_COMPLETED irq mask bit + * and busy-wait the rawstat register. + * + * Return: 0 if successful or a negative error code on failure. + */ +#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0) +/** * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait * @kbdev: Kbase device * @flush_op: Flush command register value to be sent to HW @@ -188,7 +204,7 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); /** - * kbase_clean_caches_done - Issue preiously queued cache clean request or + * kbase_clean_caches_done - Issue previously queued cache clean request or * wake up the requester that issued cache clean. * @kbdev: Kbase device * diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c index 249d5f8..4e03e44 100644 --- a/mali_kbase/device/mali_kbase_device_hw.c +++ b/mali_kbase/device/mali_kbase_device_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,9 @@ #include <mali_kbase_reset_gpu.h> #include <mmu/mali_kbase_mmu.h> +#define U64_LO_MASK ((1ULL << 32) - 1) +#define U64_HI_MASK (~U64_LO_MASK) + #if !IS_ENABLED(CONFIG_MALI_NO_MALI) bool kbase_is_gpu_removed(struct kbase_device *kbdev) { @@ -38,8 +41,9 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev) } #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ -static int busy_wait_cache_clean_irq(struct kbase_device *kbdev) +static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) { + char *irq_flag_name; /* Previously MMU-AS command was used for L2 cache flush on page-table update. * And we're using the same max-loops count for GPU command, because amount of * L2 cache flush overhead are same between them. @@ -48,28 +52,42 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev) /* Wait for the GPU cache clean operation to complete */ while (--max_loops && - !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - CLEAN_CACHES_COMPLETED)) { + !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { ; } /* reset gpu if time-out occurred */ if (max_loops == 0) { + switch (irq_bit) { + case CLEAN_CACHES_COMPLETED: + irq_flag_name = "CLEAN_CACHES_COMPLETED"; + break; + case FLUSH_PA_RANGE_COMPLETED: + irq_flag_name = "FLUSH_PA_RANGE_COMPLETED"; + break; + default: + irq_flag_name = "UNKNOWN"; + break; + } + dev_err(kbdev->dev, - "CLEAN_CACHES_COMPLETED bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); + "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", + irq_flag_name); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); return -EBUSY; } - /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ - KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), - CLEAN_CACHES_COMPLETED); + /* Clear the interrupt bit. */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); return 0; } +#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0) + int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, u32 flush_op) { @@ -97,7 +115,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, irq_mask & ~CLEAN_CACHES_COMPLETED); /* busy wait irq status to be enabled */ - ret = busy_wait_cache_clean_irq(kbdev); + ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); if (ret) return ret; @@ -118,7 +136,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); /* 3. Busy-wait irq status to be enabled. */ - ret = busy_wait_cache_clean_irq(kbdev); + ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); if (ret) return ret; diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c index 893a335..15bfd03 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -86,6 +86,9 @@ const char *kbase_gpu_exception_name(u32 const exception_code) case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR: e = "FIRMWARE_INTERNAL_ERROR"; break; + case CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE: + e = "CS_UNRECOVERABLE"; + break; case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT: e = "RESOURCE_EVICTION_TIMEOUT"; break; diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h index f6945b3..6ef61ce 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,10 +35,7 @@ #define MCU_SUBSYSTEM_BASE 0x20000 /* IPA control registers */ -#define IPA_CONTROL_BASE 0x40000 -#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) #define COMMAND 0x000 /* (WO) Command register */ -#define STATUS 0x004 /* (RO) Status register */ #define TIMER 0x008 /* (RW) Timer control register */ #define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ @@ -68,6 +65,8 @@ #define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ #define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ +#define AS_STATUS_AS_ACTIVE_INT 0x2 + /* Set to implementation defined, outer caching */ #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull /* Set to write back memory, outer caching */ @@ -125,42 +124,18 @@ #define MCU_STATUS_HALTED (1 << 1) -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory - * region base address, low word - */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory - * region base address, high word - */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter - * configuration - */ - -#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter - * enable for CS Hardware - */ - -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable - * flags for shader cores - */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable - * flags for tiler - */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable - * flags for MMU/L2 cache - */ - /* JOB IRQ flags */ #define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ /* GPU_COMMAND codes */ #define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ #define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ -#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */ #define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ #define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ #define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ #define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ #define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ +#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */ /* GPU_COMMAND_RESET payloads */ @@ -179,27 +154,34 @@ */ #define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 -/* GPU_COMMAND_PRFCNT payloads */ -#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */ -#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */ - /* GPU_COMMAND_TIME payloads */ #define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ #define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ /* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_L2_NONE 0x000 /* No flush */ -#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ -#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ /* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE 0x000 /* No flush */ -#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ -#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ /* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE 0x000 /* No flush */ -#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ + +/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */ + +/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */ +#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */ /* GPU_COMMAND command + payload */ #define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ @@ -218,14 +200,6 @@ #define GPU_COMMAND_HARD_RESET \ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) -/* Clear all performance counters, setting them all to zero. */ -#define GPU_COMMAND_PRFCNT_CLEAR \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR) - -/* Sample all performance counters, writing them out to memory */ -#define GPU_COMMAND_PRFCNT_SAMPLE \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE) - /* Starts the cycle counter, and system timestamp propagation */ #define GPU_COMMAND_CYCLE_COUNT_START \ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) @@ -235,28 +209,53 @@ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) /* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */ -#define GPU_COMMAND_CACHE_CLN_INV_L2 \ - GPU_COMMAND_CODE_PAYLOAD( \ - GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE | \ - GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE)) +#define GPU_COMMAND_CACHE_CLN_INV_L2 \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) /* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */ -#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ - GPU_COMMAND_CODE_PAYLOAD( \ - GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE)) +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) /* Clean and invalidate L2, LSC, and Other caches */ -#define GPU_COMMAND_CACHE_CLN_INV_FULL \ - GPU_COMMAND_CODE_PAYLOAD( \ - GPU_COMMAND_CODE_FLUSH_CACHES, \ - (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ - GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE)) +#define GPU_COMMAND_CACHE_CLN_INV_FULL \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE)) + +/* Clean and invalidate only LSC cache */ +#define GPU_COMMAND_CACHE_CLN_INV_LSC \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + +/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */ +#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ + (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + +/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */ +#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ + (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + +/* Clean and invalidate physical range L2, LSC and Other caches */ +#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ + (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ + GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) /* Merge cache flush commands */ #define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) @@ -337,14 +336,16 @@ (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) /* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ -#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ +#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ +#define FLUSH_PA_RANGE_COMPLETED \ + (1 << 20) /* Set when a physical range cache clean operation has completed. */ /* * In Debug build, @@ -362,7 +363,11 @@ #define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) -/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */ -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */ +/* GPU_FEATURES register */ +#define GPU_FEATURES_RAY_TRACING_SHIFT GPU_U(2) +#define GPU_FEATURES_RAY_TRACING_MASK (GPU_U(0x1) << GPU_FEATURES_RAY_TRACING_SHIFT) +#define GPU_FEATURES_RAY_TRACING_GET(reg_val) \ + (((reg_val)&GPU_FEATURES_RAY_TRACING_MASK) >> GPU_FEATURES_RAY_TRACING_SHIFT) +/* End of GPU_FEATURES register */ #endif /* _KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h index d1cd8fc..c349f4b 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -262,19 +262,22 @@ #define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES #define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES #define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES /* Merge cache flush commands */ #define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) /* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define FLUSH_PA_RANGE_COMPLETED \ + (1 << 20) /* Set when a physical range cache clean operation has completed. */ /* * In Debug build, diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h index 1d2a49b..1f4e5f0 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h +++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,8 +34,12 @@ /* GPU_U definition */ #ifdef __ASSEMBLER__ #define GPU_U(x) x +#define GPU_UL(x) x +#define GPU_ULL(x) x #else #define GPU_U(x) x##u +#define GPU_UL(x) x##ul +#define GPU_ULL(x) x##ull #endif /* __ASSEMBLER__ */ /* Begin Register Offsets */ @@ -96,6 +100,7 @@ #define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + #define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ #define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ @@ -355,8 +360,8 @@ (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \ AS_LOCKADDR_LOCKADDR_SIZE_MASK)) #define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) -#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ - (GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) #define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ AS_LOCKADDR_LOCKADDR_BASE_SHIFT) @@ -364,6 +369,11 @@ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ AS_LOCKADDR_LOCKADDR_BASE_MASK)) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \ + (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ + ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c index 81dc56b..60b061e 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c @@ -281,7 +281,7 @@ int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) if (WARN_ON(ret)) return ret; - now = ktime_get(); + now = ktime_get_raw(); diff = ktime_sub(now, kbdev->ipa.last_sample_time); diff_ms = ktime_to_ms(diff); diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c index e240117..34515a9 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,7 @@ #define DEFAULT_MIN_SAMPLE_CYCLES 10000 /** - * read_hwcnt() - read a counter value + * kbase_ipa_read_hwcnt() - read a counter value * @model_data: pointer to model data * @offset: offset, in bytes, into vinstr buffer * diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h index e1718c6..4479a4b 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -83,7 +83,7 @@ struct kbase_ipa_model_vinstr_data { }; /** - * struct ipa_group - represents a single IPA group + * struct kbase_ipa_group - represents a single IPA group * @name: name of the IPA group * @default_value: default value of coefficient for IPA group. * Coefficients are interpreted as fractions where the @@ -152,7 +152,7 @@ s64 kbase_ipa_single_counter( s32 coeff, u32 counter); /** - * attach_vinstr() - attach a vinstr_buffer to an IPA model. + * kbase_ipa_attach_vinstr() - attach a vinstr_buffer to an IPA model. * @model_data: pointer to model data * * Attach a vinstr_buffer to an IPA model. The vinstr_buffer @@ -164,7 +164,7 @@ s64 kbase_ipa_single_counter( int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); /** - * detach_vinstr() - detach a vinstr_buffer from an IPA model. + * kbase_ipa_detach_vinstr() - detach a vinstr_buffer from an IPA model. * @model_data: pointer to model data * * Detach a vinstr_buffer from an IPA model. diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c index f11be0d..eaa2258 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,9 +59,11 @@ #define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) /** - * get_jm_counter() - get performance counter offset inside the Job Manager block + * kbase_g7x_power_model_get_jm_counter() - get performance counter offset + * inside the Job Manager block * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block. + * @counter_block_offset: offset in bytes of the performance counter inside + * the Job Manager block. * * Return: Block offset in bytes of the required performance counter. */ @@ -72,9 +74,11 @@ static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_da } /** - * get_memsys_counter() - get performance counter offset inside the Memory System block + * kbase_g7x_power_model_get_memsys_counter() - get performance counter offset + * inside the Memory System block * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block. + * @counter_block_offset: offset in bytes of the performance counter inside + * the (first) Memory System block. * * Return: Block offset in bytes of the required performance counter. */ @@ -88,9 +92,11 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst } /** - * get_sc_counter() - get performance counter offset inside the Shader Cores block + * kbase_g7x_power_model_get_sc_counter() - get performance counter offset + * inside the Shader Cores block * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block. + * @counter_block_offset: offset in bytes of the performance counter inside + * the (first) Shader Cores block. * * Return: Block offset in bytes of the required performance counter. */ @@ -110,10 +116,12 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da } /** - * memsys_single_counter() - calculate energy for a single Memory System performance counter. + * kbase_g7x_sum_all_memsys_blocks() - calculate energy for a single Memory + * System performance counter. * @model_data: pointer to GPU model data. * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * @counter_block_offset: offset in bytes of the counter inside the block it + * belongs to. * * Return: Energy estimation for a single Memory System performance counter. */ @@ -130,12 +138,15 @@ static s64 kbase_g7x_sum_all_memsys_blocks( } /** - * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores. + * kbase_g7x_sum_all_shader_cores() - calculate energy for a Shader Cores + * performance counter for all cores. * @model_data: pointer to GPU model data. * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * @counter_block_offset: offset in bytes of the counter inside the block it + * belongs to. * - * Return: Energy estimation for a Shader Cores performance counter for all cores. + * Return: Energy estimation for a Shader Cores performance counter for all + * cores. */ static s64 kbase_g7x_sum_all_shader_cores( struct kbase_ipa_model_vinstr_data *model_data, @@ -150,7 +161,7 @@ static s64 kbase_g7x_sum_all_shader_cores( } /** - * jm_single_counter() - calculate energy for a single Job Manager performance counter. + * kbase_g7x_jm_single_counter() - calculate energy for a single Job Manager performance counter. * @model_data: pointer to GPU model data. * @coeff: default value of coefficient for IPA group. * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. @@ -170,7 +181,7 @@ static s64 kbase_g7x_jm_single_counter( } /** - * get_active_cycles() - return the GPU_ACTIVE counter + * kbase_g7x_get_active_cycles() - return the GPU_ACTIVE counter * @model_data: pointer to GPU model data. * * Return: the number of cycles the GPU was active during the counter sampling diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c index 428e68b..8b8bbd1 100644 --- a/mali_kbase/ipa/mali_kbase_ipa.c +++ b/mali_kbase/ipa/mali_kbase_ipa.c @@ -324,7 +324,7 @@ int kbase_ipa_init(struct kbase_device *kbdev) kbdev->ipa.configured_model = default_model; } - kbdev->ipa.last_sample_time = ktime_get(); + kbdev->ipa.last_sample_time = ktime_get_raw(); end: if (err) @@ -750,7 +750,7 @@ void kbase_ipa_reset_data(struct kbase_device *kbdev) mutex_lock(&kbdev->ipa.lock); - now = ktime_get(); + now = ktime_get_raw(); diff = ktime_sub(now, kbdev->ipa.last_sample_time); elapsed_time = ktime_to_ms(diff); @@ -765,7 +765,7 @@ void kbase_ipa_reset_data(struct kbase_device *kbdev) if (model != kbdev->ipa.fallback_model) model->ops->reset_counter_data(model); - kbdev->ipa.last_sample_time = ktime_get(); + kbdev->ipa.last_sample_time = ktime_get_raw(); } mutex_unlock(&kbdev->ipa.lock); diff --git a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c index d554fff..a8523a7 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c +++ b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,7 @@ */ #include <linux/debugfs.h> +#include <linux/version_compat_defs.h> #include <linux/list.h> #include <linux/mutex.h> @@ -27,10 +28,6 @@ #include "mali_kbase_ipa.h" #include "mali_kbase_ipa_debugfs.h" -#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) -#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE -#endif - struct kbase_ipa_model_param { char *name; union { diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c index fadae7d..f748144 100644 --- a/mali_kbase/ipa/mali_kbase_ipa_simple.c +++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -321,8 +321,9 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) mutex_lock(&model->kbdev->ipa.lock); if (IS_ERR_OR_NULL(tz)) { - pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n", - PTR_ERR(tz), tz_name); + pr_warn_ratelimited( + "Error %d getting thermal zone \'%s\', not yet ready?\n", + PTR_ERR_OR_ZERO(tz), tz_name); return -EPROBE_DEFER; } diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h index c9b9ea0..66cf323 100644 --- a/mali_kbase/jm/mali_kbase_jm_defs.h +++ b/mali_kbase/jm/mali_kbase_jm_defs.h @@ -194,8 +194,6 @@ struct kbase_jd_atom_dependency { static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) { - KBASE_DEBUG_ASSERT(dep != NULL); - return (const struct kbase_jd_atom *)(dep->atom); } @@ -209,8 +207,6 @@ kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) static inline u8 kbase_jd_katom_dep_type( const struct kbase_jd_atom_dependency *dep) { - KBASE_DEBUG_ASSERT(dep != NULL); - return dep->dep_type; } @@ -227,8 +223,6 @@ static inline void kbase_jd_katom_dep_set( { struct kbase_jd_atom_dependency *dep; - KBASE_DEBUG_ASSERT(const_dep != NULL); - dep = (struct kbase_jd_atom_dependency *)const_dep; dep->atom = a; @@ -245,8 +239,6 @@ static inline void kbase_jd_katom_dep_clear( { struct kbase_jd_atom_dependency *dep; - KBASE_DEBUG_ASSERT(const_dep != NULL); - dep = (struct kbase_jd_atom_dependency *)const_dep; dep->atom = NULL; @@ -504,7 +496,6 @@ enum kbase_atom_exit_protected_state { * BASE_JD_REQ_START_RENDERPASS set in its core requirements * with an atom that has BASE_JD_REQ_END_RENDERPASS set. * @jc_fragment: Set of GPU fragment job chains - * @retry_count: TODO: Not used,to be removed */ struct kbase_jd_atom { struct kthread_work work; @@ -615,8 +606,6 @@ struct kbase_jd_atom { u32 atom_flags; - int retry_count; - enum kbase_atom_gpu_rb_state gpu_rb_state; bool need_cache_flush_cores_retained; @@ -660,7 +649,7 @@ static inline bool kbase_jd_katom_is_protected( } /** - * kbase_atom_is_younger - query if one atom is younger by age than another + * kbase_jd_atom_is_younger - query if one atom is younger by age than another * * @katom_a: the first atom * @katom_b: the second atom diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h index 74d02f5..d03bcc0 100644 --- a/mali_kbase/jm/mali_kbase_jm_js.h +++ b/mali_kbase/jm/mali_kbase_jm_js.h @@ -29,6 +29,8 @@ #include "mali_kbase_js_ctx_attr.h" +#define JS_MAX_RUNNING_JOBS 8 + /** * kbasep_js_devdata_init - Initialize the Job Scheduler * @kbdev: The kbase_device to operate on @@ -618,7 +620,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); void kbase_js_sched(struct kbase_device *kbdev, int js_mask); /** - * kbase_jd_zap_context - Attempt to deschedule a context that is being + * kbase_js_zap_context - Attempt to deschedule a context that is being * destroyed * @kctx: Context pointer * @@ -705,8 +707,10 @@ static inline bool kbasep_js_is_submit_allowed( bool is_allowed; /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), + "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, + kctx->as_nr, atomic_read(&kctx->flags))) + return false; test_bit = (u16) (1u << kctx->as_nr); @@ -733,8 +737,10 @@ static inline void kbasep_js_set_submit_allowed( u16 set_bit; /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), + "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, + kctx->as_nr, atomic_read(&kctx->flags))) + return; set_bit = (u16) (1u << kctx->as_nr); @@ -763,8 +769,10 @@ static inline void kbasep_js_clear_submit_allowed( u16 clear_mask; /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), + "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, + kctx->as_nr, atomic_read(&kctx->flags))) + return; clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; @@ -798,7 +806,7 @@ static inline void kbasep_js_atom_retained_state_init_invalid( * @retained_state: where to copy * @katom: where to copy from * - * Copy atom state that can be made available after jd_done_nolock() is called + * Copy atom state that can be made available after kbase_jd_done_nolock() is called * on that atom. */ static inline void kbasep_js_atom_retained_state_copy( @@ -872,9 +880,6 @@ static inline void kbase_js_runpool_inc_context_count( struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -882,13 +887,12 @@ static inline void kbase_js_runpool_inc_context_count( lockdep_assert_held(&js_devdata->runpool_mutex); /* Track total contexts */ - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + WARN_ON_ONCE(js_devdata->nr_all_contexts_running >= JS_MAX_RUNNING_JOBS); ++(js_devdata->nr_all_contexts_running); if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { /* Track contexts that can submit jobs */ - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < - S8_MAX); + WARN_ON_ONCE(js_devdata->nr_user_contexts_running >= JS_MAX_RUNNING_JOBS); ++(js_devdata->nr_user_contexts_running); } } @@ -909,9 +913,6 @@ static inline void kbase_js_runpool_dec_context_count( struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -920,12 +921,12 @@ static inline void kbase_js_runpool_dec_context_count( /* Track total contexts */ --(js_devdata->nr_all_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + WARN_ON_ONCE(js_devdata->nr_all_contexts_running < 0); if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { /* Track contexts that can submit jobs */ --(js_devdata->nr_user_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + WARN_ON_ONCE(js_devdata->nr_user_contexts_running < 0); } } @@ -984,6 +985,7 @@ static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) * kbasep_js_sched_prio_to_atom_prio - Convert relative scheduler priority * to atom priority (base_jd_prio). * + * @kbdev: Device pointer * @sched_prio: Relative scheduler priority to translate. * * This function will convert relative scheduler priority back into base_jd_prio @@ -999,7 +1001,7 @@ static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) * 0..BASE_JD_NR_PRIO_LEVELS-1. On failure: BASE_JD_PRIO_INVALID. */ static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(struct kbase_device *kbdev, - int sched_prio) + int sched_prio) { if (likely(sched_prio >= 0 && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT)) return kbasep_js_relative_priority_to_atom[sched_prio]; diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h index c5cb9ea..15576fb 100644 --- a/mali_kbase/jm/mali_kbase_js_defs.h +++ b/mali_kbase/jm/mali_kbase_js_defs.h @@ -387,7 +387,7 @@ struct kbasep_js_kctx_info { * @sched_priority: priority * @device_nr: Core group atom was executed on * - * Subset of atom state that can be available after jd_done_nolock() is called + * Subset of atom state that can be available after kbase_jd_done_nolock() is called * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), * because the original atom could disappear. */ diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index a713681..3669f7e 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,6 +38,7 @@ enum base_hw_feature { BASE_HW_FEATURE_ASN_HASH, BASE_HW_FEATURE_GPU_SLEEP, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; @@ -87,6 +88,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; @@ -151,6 +153,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; @@ -159,6 +162,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; @@ -169,6 +173,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_ASN_HASH, BASE_HW_FEATURE_GPU_SLEEP, + BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_END }; diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 8766a6d..3917301 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,6 +61,9 @@ enum base_hw_issue { BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_TURSEHW_1997, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_END }; @@ -598,6 +601,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0 BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_END }; @@ -608,6 +612,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_END }; @@ -616,6 +621,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_END }; @@ -625,6 +631,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_END }; @@ -633,6 +640,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0 BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_END }; @@ -642,6 +650,19 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_1997, + BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_END }; @@ -651,23 +672,31 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TURSEHW_1997, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_END }; diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index 53ee51e..b04cf94 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -87,14 +87,7 @@ #if MALI_USE_CSF #include "csf/mali_kbase_csf.h" -#endif - -#ifndef u64_to_user_ptr -/* Introduced in Linux v4.6 */ -#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) -#endif -#if MALI_USE_CSF /* Physical memory group ID for CSF user I/O. */ #define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT @@ -266,7 +259,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); /* - * jd_done_nolock - Perform the necessary handling of an atom that has completed + * kbase_jd_done_nolock - Perform the necessary handling of an atom that has completed * the execution. * * @katom: Pointer to the atom that completed the execution @@ -282,7 +275,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx); * * The caller must hold the kbase_jd_context.lock. */ -bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately); +bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately); void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); @@ -559,6 +552,21 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev) } /** + * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can + * run as using the device tree, and save this + * within kbdev. + * @kbdev: Pointer to kbase device. + * + * This function could be called from kbase_clk_rate_trace_manager_init, + * but is left separate as it can be called as soon as + * dev_pm_opp_of_add_table() has been called to initialize the OPP table, + * which occurs in power_control_init(). + * + * Return: 0 in any case. + */ +int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev); + +/** * kbase_pm_metrics_start - Start the utilization metrics timer * @kbdev: Pointer to the kbase device for which to start the utilization * metrics calculation thread. @@ -807,16 +815,23 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev); * * @kbdev: the kbase device * @threadfn: the function the realtime thread will execute - * @data: pointer to the thread's data + * @worker: pointer to the thread's kworker * @namefmt: a name for the thread. * * Creates a realtime kthread with priority &KBASE_RT_THREAD_PRIO and restricted * to cores defined by &KBASE_RT_THREAD_CPUMASK_MIN and &KBASE_RT_THREAD_CPUMASK_MAX. * - * Return: A valid &struct task_struct pointer on success, or an ERR_PTR on failure. + * Return: Zero on success, or an PTR_ERR on failure. + */ +int kbase_create_realtime_thread(struct kbase_device *kbdev, + int (*threadfn)(void *data), struct kthread_worker *worker, const char namefmt[], ...); + +/** + * kbase_destroy_kworker_stack - Destroy a kthread_worker and it's thread on the stack + * + * @worker: pointer to the thread's kworker */ -struct task_struct * kbase_create_realtime_thread(struct kbase_device *kbdev, - int (*threadfn)(void *data), void *data, const char namefmt[]); +void kbase_destroy_kworker_stack(struct kthread_worker *worker); #if !defined(UINT64_MAX) #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index 18e40b5..60fe2ce 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -89,6 +89,18 @@ enum { KBASE_3BIT_AID_4 = 0x7 }; +#if MALI_USE_CSF +/* + * Default value for the TIMER register of the IPA Control interface, + * expressed in milliseconds. + * + * The chosen value is a trade off between two requirements: the IPA Control + * interface should sample counters with a resolution in the order of + * milliseconds, while keeping GPU overhead as limited as possible. + */ +#define IPA_CONTROL_TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ +#endif /* MALI_USE_CSF */ + /* Default period for DVFS sampling (can be overridden by platform header) */ #ifndef DEFAULT_PM_DVFS_PERIOD #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ @@ -158,11 +170,6 @@ enum { */ #define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ -/* Default number of milliseconds given for other jobs on the GPU to be - * soft-stopped when the GPU needs to be reset. - */ -#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ - /* Nominal reference frequency that was used to obtain all following * <...>_TIMEOUT_CYCLES macros, in kHz. * @@ -180,7 +187,7 @@ enum { * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based * on scaling from a 50MHz GPU system. */ -#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000) +#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000ull) /* Timeout in clock cycles for GPU Power Management to reach the desired * Shader, L2 and MCU state. @@ -191,10 +198,28 @@ enum { /* Waiting timeout in clock cycles for GPU reset to complete. * - * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system. + * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system */ #define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000) +/* Waiting timeout in clock cycles for all active CSGs to be suspended. + * + * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system. + */ +#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000) + +/* Waiting timeout in clock cycles for GPU firmware to boot. + * + * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system. + */ +#define CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES (25000000) + +/* Waiting timeout for a ping request to be acknowledged, in clock cycles. + * + * Based on 6000ms timeout at 100MHz, scaled from a 50MHz GPU system. + */ +#define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull) + #else /* MALI_USE_CSF */ /* A default timeout in clock cycles to be used when an invalid timeout @@ -202,6 +227,11 @@ enum { */ #define JM_DEFAULT_TIMEOUT_CYCLES (150000000) +/* Default number of milliseconds given for other jobs on the GPU to be + * soft-stopped when the GPU needs to be reset. + */ +#define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ + #endif /* MALI_USE_CSF */ /* Default timeslice that a context is scheduled in for, in nanoseconds. diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index bf7d524..a16dbad 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -99,6 +99,7 @@ #include <linux/compat.h> /* is_compat_task/in_compat_syscall */ #include <linux/mman.h> #include <linux/version.h> +#include <linux/version_compat_defs.h> #include <mali_kbase_hw.h> #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) #include <mali_kbase_sync.h> @@ -171,6 +172,11 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA #endif }; +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) +/* Mutex to synchronize the probe of multiple kbase instances */ +static struct mutex kbase_probe_mutex; +#endif + /** * mali_kbase_supports_cap - Query whether a kbase capability is supported * @@ -199,10 +205,14 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) return supported; } -struct task_struct *kbase_create_realtime_thread(struct kbase_device *kbdev, - int (*threadfn)(void *data), void *data, const char namefmt[]) +int kbase_create_realtime_thread(struct kbase_device *kbdev, + int (*threadfn)(void *data), struct kthread_worker *worker, const char namefmt[], ...) { + struct task_struct *task; unsigned int i; + va_list args; + char name_buf[128]; + int len; cpumask_t mask = { CPU_BITS_NONE }; @@ -210,24 +220,51 @@ struct task_struct *kbase_create_realtime_thread(struct kbase_device *kbdev, .sched_priority = KBASE_RT_THREAD_PRIO, }; - struct task_struct *ret = kthread_create(kthread_worker_fn, data, namefmt); + kthread_init_worker(worker); + + /* Construct the thread name */ + va_start(args, namefmt); + len = vsnprintf(name_buf, sizeof(name_buf), namefmt, args); + va_end(args); + if (len + 1 > sizeof(name_buf)) { + dev_warn(kbdev->dev, "RT thread name truncated to %s", name_buf); + } + + task = kthread_create(kthread_worker_fn, worker, name_buf); - if (!IS_ERR(ret)) { + if (!IS_ERR(task)) { for (i = KBASE_RT_THREAD_CPUMASK_MIN; i <= KBASE_RT_THREAD_CPUMASK_MAX ; i++) cpumask_set_cpu(i, &mask); - kthread_bind_mask(ret, &mask); + kthread_bind_mask(task, &mask); - wake_up_process(ret); + /* Link the worker and the thread */ + worker->task = task; + wake_up_process(task); - if (sched_setscheduler_nocheck(ret, SCHED_FIFO, ¶m)) - dev_warn(kbdev->dev, "%s not set to RT prio", namefmt); + if (sched_setscheduler_nocheck(task, SCHED_FIFO, ¶m)) + dev_warn(kbdev->dev, "%s not set to RT prio", name_buf); else dev_dbg(kbdev->dev, "%s set to RT prio: %i", - namefmt, param.sched_priority); + name_buf, param.sched_priority); + } else { + return PTR_ERR(task); } - return ret; + return 0; +} + +void kbase_destroy_kworker_stack(struct kthread_worker *worker) +{ + struct task_struct *task; + + task = worker->task; + if (WARN_ON(!task)) + return; + + kthread_flush_worker(worker); + kthread_stop(task); + WARN_ON(!list_empty(&worker->work_list)); } /** @@ -341,10 +378,9 @@ static int kbase_file_create_kctx(struct kbase_file *kfile, * * @kfile: A device file created by kbase_file_new() * - * This function returns an error code (encoded with ERR_PTR) if no context - * has been created for the given @kfile. This makes it safe to use in - * circumstances where the order of initialization cannot be enforced, but - * only if the caller checks the return value. + * This function returns NULL if no context has been created for the given @kfile. + * This makes it safe to use in circumstances where the order of initialization + * cannot be enforced, but only if the caller checks the return value. * * Return: Address of the kernel base context associated with the @kfile, or * NULL if no context exists. @@ -532,27 +568,6 @@ void kbase_release_device(struct kbase_device *kbdev) EXPORT_SYMBOL(kbase_release_device); #if IS_ENABLED(CONFIG_DEBUG_FS) -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && \ - !(KERNEL_VERSION(4, 4, 28) <= LINUX_VERSION_CODE && \ - KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE) -/* - * Older versions, before v4.6, of the kernel doesn't have - * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 - */ -static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) -{ - char buf[4]; - - count = min(count, sizeof(buf) - 1); - - if (copy_from_user(buf, s, count)) - return -EFAULT; - buf[count] = '\0'; - - return strtobool(buf, res); -} -#endif - static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) { struct kbase_context *kctx = f->private_data; @@ -664,13 +679,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, kbdev = kfile->kbdev; -#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, kfile->filp); -#else - kctx = kbase_create_context(kbdev, is_compat_task(), - flags, kfile->api_version, kfile->filp); -#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */ /* if bad flags, will stay stuck in setup mode */ if (!kctx) @@ -691,16 +701,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, /* we don't treat this as a fail - just warn about it */ dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); } else { -#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) - /* prevent unprivileged use of debug file system - * in old kernel version - */ - debugfs_create_file("infinite_cache", 0600, kctx->kctx_dentry, - kctx, &kbase_infinite_cache_fops); -#else debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, kctx, &kbase_infinite_cache_fops); -#endif debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, kctx, &kbase_force_same_va_fops); @@ -1046,9 +1048,9 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) { u32 flags = timeinfo->in.request_flags; - struct timespec64 ts; - u64 timestamp; - u64 cycle_cnt; + struct timespec64 ts = { 0 }; + u64 timestamp = 0; + u64 cycle_cnt = 0; kbase_pm_context_active(kctx->kbdev); @@ -1077,11 +1079,7 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, static int kbase_api_hwcnt_set(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_values *values) { - gpu_model_set_dummy_prfcnt_sample( - (u32 __user *)(uintptr_t)values->data, - values->size); - - return 0; + return gpu_model_set_dummy_prfcnt_user_sample(u64_to_user_ptr(values->data), values->size); } #endif /* CONFIG_MALI_NO_MALI */ @@ -1569,9 +1567,22 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, kctx->jit_group_id = heap_init->in.group_id; return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, - heap_init->in.initial_chunks, heap_init->in.max_chunks, - heap_init->in.target_in_flight, - &heap_init->out.gpu_heap_va, &heap_init->out.first_chunk_va); + heap_init->in.initial_chunks, heap_init->in.max_chunks, + heap_init->in.target_in_flight, heap_init->in.buf_desc_va, + &heap_init->out.gpu_heap_va, + &heap_init->out.first_chunk_va); +} + +static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, + union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init) +{ + kctx->jit_group_id = heap_init->in.group_id; + + return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, + heap_init->in.initial_chunks, heap_init->in.max_chunks, + heap_init->in.target_in_flight, 0, + &heap_init->out.gpu_heap_va, + &heap_init->out.first_chunk_va); } static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, @@ -1653,6 +1664,31 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, cpu_queue_info->size); } +#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) +static int kbase_ioctl_read_user_page(struct kbase_context *kctx, + union kbase_ioctl_read_user_page *user_page) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + + /* As of now, only LATEST_FLUSH is supported */ + if (unlikely(user_page->in.offset != LATEST_FLUSH)) + return -EINVAL; + + /* Validating padding that must be zero */ + if (unlikely(user_page->in.padding != 0)) + return -EINVAL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!kbdev->pm.backend.gpu_powered) + user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE; + else + user_page->out.val_lo = kbase_reg_read(kbdev, USER_REG(LATEST_FLUSH)); + user_page->out.val_hi = 0; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} #endif /* MALI_USE_CSF */ static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, @@ -2110,6 +2146,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) union kbase_ioctl_cs_tiler_heap_init, kctx); break; + case KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13, + kbasep_cs_tiler_heap_init_1_13, + union kbase_ioctl_cs_tiler_heap_init_1_13, kctx); + break; case KBASE_IOCTL_CS_TILER_HEAP_TERM: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, kbasep_cs_tiler_heap_term, @@ -2128,6 +2169,10 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_cs_cpu_queue_info, kctx); break; + case KBASE_IOCTL_READ_USER_PAGE: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page, + union kbase_ioctl_read_user_page, kctx); + break; #endif /* MALI_USE_CSF */ #if MALI_UNIT_TEST case KBASE_IOCTL_TLSTREAM_STATS: @@ -2251,18 +2296,28 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof } #endif /* MALI_USE_CSF */ -static unsigned int kbase_poll(struct file *filp, poll_table *wait) +static __poll_t kbase_poll(struct file *filp, poll_table *wait) { struct kbase_file *const kfile = filp->private_data; struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile); - if (unlikely(!kctx)) + if (unlikely(!kctx)) { +#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) return POLLERR; +#else + return EPOLLERR; +#endif + } poll_wait(filp, &kctx->event_queue, wait); - if (kbase_event_pending(kctx)) + if (kbase_event_pending(kctx)) { +#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) return POLLIN | POLLRDNORM; +#else + return EPOLLIN | EPOLLRDNORM; +#endif + } return 0; } @@ -3267,22 +3322,20 @@ static ssize_t gpuinfo_show(struct device *dev, .name = "Mali-G510" }, { .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G310" }, - { .id = GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TTUX" }, - { .id = GPU_ID2_PRODUCT_LTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LTUX" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; u32 gpu_id; unsigned int product_id, product_id_mask; unsigned int i; + struct kbase_gpu_props *gpu_props; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_props = &kbdev->gpu_props; + gpu_id = gpu_props->props.raw_props.gpu_id; product_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; product_id_mask = GPU_ID2_PRODUCT_MODEL >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; @@ -3296,6 +3349,32 @@ static ssize_t gpuinfo_show(struct device *dev, } } +#if MALI_USE_CSF + if ((product_id & product_id_mask) == + ((GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) { + const bool rt_supported = + GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features); + const u8 nr_cores = gpu_props->num_cores; + + /* Mali-G715-Immortalis if 10 < number of cores with ray tracing supproted. + * Mali-G715 if 10 < number of cores without ray tracing supported. + * Mali-G715 if 7 <= number of cores <= 10 regardless ray tracing. + * Mali-G615 if number of cores < 7. + */ + if ((nr_cores > 10) && rt_supported) + product_name = "Mali-G715-Immortalis"; + else if (nr_cores >= 7) + product_name = "Mali-G715"; + + if (nr_cores < 7) { + dev_warn(kbdev->dev, "nr_cores(%u) GPU ID must be G615", nr_cores); + product_name = "Mali-G615"; + } else + dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name, + nr_cores); + } +#endif /* MALI_USE_CSF */ + return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name, kbdev->gpu_props.num_cores, (gpu_id & GPU_ID_VERSION_MAJOR) >> KBASE_GPU_ID_VERSION_MAJOR_SHIFT, @@ -3368,6 +3447,46 @@ static ssize_t dvfs_period_show(struct device *dev, static DEVICE_ATTR_RW(dvfs_period); +int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) +{ + /* Uses default reference frequency defined in below macro */ + u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; + + /* Only check lowest frequency in cases when OPPs are used and + * present in the device tree. + */ +#ifdef CONFIG_PM_OPP + struct dev_pm_opp *opp_ptr; + unsigned long found_freq = 0; + + /* find lowest frequency OPP */ + opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq); + if (IS_ERR(opp_ptr)) { + dev_err(kbdev->dev, "No OPPs found in device tree! Scaling timeouts using %llu kHz", + (unsigned long long)lowest_freq_khz); + } else { +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE + dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */ +#endif + /* convert found frequency to KHz */ + found_freq /= 1000; + + /* If lowest frequency in OPP table is still higher + * than the reference, then keep the reference frequency + * as the one to use for scaling . + */ + if (found_freq < lowest_freq_khz) + lowest_freq_khz = found_freq; + } +#else + dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT"); +#endif + + kbdev->lowest_gpu_freq_khz = lowest_freq_khz; + dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz); + return 0; +} + /** * pm_poweroff_store - Store callback for the pm_poweroff sysfs file. * @dev: The device with sysfs file is for @@ -4533,7 +4652,7 @@ int power_control_init(struct kbase_device *kbdev) } } if (err == -EPROBE_DEFER) { - while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) + while (i > 0) regulator_put(kbdev->regulators[--i]); return err; } @@ -4570,7 +4689,7 @@ int power_control_init(struct kbase_device *kbdev) } } if (err == -EPROBE_DEFER) { - while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) { + while (i > 0) { clk_disable_unprepare(kbdev->clocks[--i]); clk_put(kbdev->clocks[i]); } @@ -4591,6 +4710,11 @@ int power_control_init(struct kbase_device *kbdev) if (kbdev->nr_regulators > 0) { kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS); + + if (IS_ERR_OR_NULL(kbdev->opp_table)) { + err = PTR_ERR(kbdev->opp_table); + goto regulators_probe_defer; + } } #endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ err = dev_pm_opp_of_add_table(kbdev->dev); @@ -4598,6 +4722,20 @@ int power_control_init(struct kbase_device *kbdev) #endif /* CONFIG_PM_OPP */ return 0; +#if defined(CONFIG_PM_OPP) && \ + ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_REGULATOR)) +regulators_probe_defer: + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->clocks[i]) { + if (__clk_is_enabled(kbdev->clocks[i])) + clk_disable_unprepare(kbdev->clocks[i]); + clk_put(kbdev->clocks[i]); + kbdev->clocks[i] = NULL; + } else + break; + } +#endif + clocks_probe_defer: #if defined(CONFIG_REGULATOR) for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) @@ -4657,18 +4795,18 @@ static int type##_quirks_set(void *data, u64 val) \ kbdev = (struct kbase_device *)data; \ kbdev->hw_quirks_##type = (u32)val; \ trigger_reset(kbdev); \ - return 0;\ + return 0; \ } \ \ static int type##_quirks_get(void *data, u64 *val) \ { \ - struct kbase_device *kbdev;\ - kbdev = (struct kbase_device *)data;\ - *val = kbdev->hw_quirks_##type;\ - return 0;\ + struct kbase_device *kbdev; \ + kbdev = (struct kbase_device *)data; \ + *val = kbdev->hw_quirks_##type; \ + return 0; \ } \ -DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ - type##_quirks_set, "%llu\n") +DEFINE_DEBUGFS_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get, \ + type##_quirks_set, "%llu\n") MAKE_QUIRK_ACCESSORS(sc); MAKE_QUIRK_ACCESSORS(tiler); @@ -4698,8 +4836,7 @@ static int kbase_device_debugfs_reset_write(void *data, u64 wait_for_reset) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_trigger_reset, - NULL, &kbase_device_debugfs_reset_write, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_trigger_reset, NULL, &kbase_device_debugfs_reset_write, "%llu\n"); /** * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read @@ -4790,12 +4927,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) /* prevent unprivileged use of debug file system * in old kernel version */ -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) - /* only for newer kernel version debug file system is safe */ const mode_t mode = 0644; -#else - const mode_t mode = 0600; -#endif kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, NULL); @@ -4897,9 +5029,11 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory, kbdev, &kbasep_serialize_jobs_debugfs_fops); + kbase_timeline_io_debugfs_init(kbdev); #endif kbase_dvfs_status_debugfs_init(kbdev); + return 0; out: @@ -5096,10 +5230,11 @@ static ssize_t fw_timeout_store(struct device *dev, ret = kstrtouint(buf, 0, &fw_timeout); if (ret || fw_timeout == 0) { - dev_err(kbdev->dev, "%s\n%s\n%u", - "Couldn't process fw_timeout write operation.", - "Use format 'fw_timeout_ms', and fw_timeout_ms > 0", - FIRMWARE_PING_INTERVAL_MS); + dev_err(kbdev->dev, + "Couldn't process fw_timeout write operation.\n" + "Use format 'fw_timeout_ms', and fw_timeout_ms > 0\n" + "Default fw_timeout: %u", + kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)); return -EINVAL; } @@ -5203,6 +5338,66 @@ static ssize_t idle_hysteresis_time_show(struct device *dev, } static DEVICE_ATTR_RW(idle_hysteresis_time); + +/** + * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents + * + * Get the internally recorded MCU shader Core power-off (nominal) timeout value. + * The unit of the value is in micro-seconds. + * + * Return: The number of bytes output to @buf if the + * function succeeded. A Negative value on failure. + */ +static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_attribute *attr, + char *const buf) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + u32 pwroff; + + if (!kbdev) + return -ENODEV; + + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); +} + +/** + * mcu_shader_pwroff_timeout_store - Set the MCU shader core power-off time value. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * The duration value (unit: micro-seconds) for configuring MCU Shader Core + * timer, when the shader cores' power transitions are delegated to the + * MCU (normal operational mode) + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + u32 dur; + + if (!kbdev) + return -ENODEV; + + if (kstrtouint(buf, 0, &dur)) + return -EINVAL; + + kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur); + + return count; +} + +static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout); + #endif /* MALI_USE_CSF */ static struct attribute *kbase_scheduling_attrs[] = { @@ -5263,6 +5458,7 @@ static struct attribute *kbase_attrs[] = { &dev_attr_csg_scheduling_period.attr, &dev_attr_fw_timeout.attr, &dev_attr_idle_hysteresis_time.attr, + &dev_attr_mcu_shader_pwroff_timeout.attr, #endif /* !MALI_USE_CSF */ &dev_attr_power_policy.attr, &dev_attr_core_mask.attr, @@ -5401,7 +5597,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev) kbdev->dev = &pdev->dev; dev_set_drvdata(kbdev->dev, kbdev); - +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_lock(&kbase_probe_mutex); +#endif err = kbase_device_init(kbdev); if (err) { @@ -5413,10 +5611,16 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_set_drvdata(kbdev->dev, NULL); kbase_device_free(kbdev); +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_unlock(&kbase_probe_mutex); +#endif } else { dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); kbase_increment_device_id(); +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_unlock(&kbase_probe_mutex); +#endif #ifdef CONFIG_MALI_ARBITER_SUPPORT mutex_lock(&kbdev->pm.lock); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); @@ -5488,13 +5692,8 @@ static int kbase_device_resume(struct device *dev) #ifdef CONFIG_MALI_DEVFREQ dev_dbg(dev, "Callback %s\n", __func__); - if (kbdev->devfreq) { - mutex_lock(&kbdev->pm.lock); - if (kbdev->pm.active_count > 0) - kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); - mutex_unlock(&kbdev->pm.lock); - flush_workqueue(kbdev->devfreq_queue.workq); - } + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); #endif return 0; } @@ -5649,41 +5848,43 @@ static struct platform_driver kbase_platform_driver = { }, }; -/* - * The driver will not provide a shortcut to create the Mali platform device - * anymore when using Device Tree. - */ -#if IS_ENABLED(CONFIG_OF) +#if (KERNEL_VERSION(5, 3, 0) > LINUX_VERSION_CODE) && IS_ENABLED(CONFIG_OF) module_platform_driver(kbase_platform_driver); #else - static int __init kbase_driver_init(void) { int ret; +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_init(&kbase_probe_mutex); +#endif + +#ifndef CONFIG_OF ret = kbase_platform_register(); if (ret) return ret; - +#endif ret = platform_driver_register(&kbase_platform_driver); - - if (ret) +#ifndef CONFIG_OF + if (ret) { kbase_platform_unregister(); - + return ret; + } +#endif return ret; } static void __exit kbase_driver_exit(void) { platform_driver_unregister(&kbase_platform_driver); +#ifndef CONFIG_OF kbase_platform_unregister(); +#endif } module_init(kbase_driver_init); module_exit(kbase_driver_exit); - -#endif /* CONFIG_OF */ - +#endif MODULE_LICENSE("GPL"); MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ __stringify(BASE_UK_VERSION_MAJOR) "." \ diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h index 4dc09e4..7e885ca 100644 --- a/mali_kbase/mali_kbase_cs_experimental.h +++ b/mali_kbase/mali_kbase_cs_experimental.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,9 @@ */ static inline void mali_kbase_print_cs_experimental(void) { -#if MALI_INCREMENTAL_RENDERING - pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); -#endif /* MALI_INCREMENTAL_RENDERING */ +#if MALI_INCREMENTAL_RENDERING_JM + pr_info("mali_kbase: INCREMENTAL_RENDERING_JM (experimental) enabled"); +#endif /* MALI_INCREMENTAL_RENDERING_JM */ } #endif /* _KBASE_CS_EXPERIMENTAL_H_ */ diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index 53da266..66149f9 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,9 @@ #include <mali_kbase_defs.h> #include "mali_kbase_ctx_sched.h" #include "tl/mali_kbase_tracepoints.h" -#if !MALI_USE_CSF +#if MALI_USE_CSF +#include "mali_kbase_reset_gpu.h" +#else #include <mali_kbase_hwaccess_jm.h> #endif @@ -152,7 +154,19 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) struct kbase_device *const kbdev = kctx->kbdev; lockdep_assert_held(&kbdev->hwaccess_lock); - WARN_ON(atomic_read(&kctx->refcount) == 0); +#if MALI_USE_CSF + /* We expect the context to be active when this function is called, + * except for the case where a page fault is reported for it during + * the GPU reset sequence, in which case we can expect the refcount + * to be 0. + */ + WARN_ON(!atomic_read(&kctx->refcount) && !kbase_reset_gpu_is_active(kbdev)); +#else + /* We expect the context to be active (and thus refcount should be non-zero) + * when this function is called + */ + WARN_ON(!atomic_read(&kctx->refcount)); +#endif WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); diff --git a/mali_kbase/mali_kbase_debug.h b/mali_kbase/mali_kbase_debug.h index 10a3c85..c43d15d 100644 --- a/mali_kbase/mali_kbase_debug.h +++ b/mali_kbase/mali_kbase_debug.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2015, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -65,7 +65,7 @@ struct kbasep_debug_assert_cb { #endif /** - * KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - (Private) system printing + * KBASEP_DEBUG_ASSERT_OUT() - (Private) system printing * function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. * @trace: location in the code from where the message is printed * @function: function from where the message is printed @@ -125,7 +125,7 @@ struct kbasep_debug_assert_cb { #endif /* KBASE_DEBUG_DISABLE_ASSERTS */ /** - * KBASE_DEBUG_CODE( X ) - Executes the code inside the macro only in debug mode + * KBASE_DEBUG_CODE() - Executes the code inside the macro only in debug mode * @X: Code to compile only in debug mode. */ #ifdef CONFIG_MALI_DEBUG diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c index 4f021b3..d6518b4 100644 --- a/mali_kbase/mali_kbase_debug_job_fault.c +++ b/mali_kbase/mali_kbase_debug_job_fault.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,8 +87,7 @@ static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) static int wait_for_job_fault(struct kbase_device *kbdev) { -#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \ - KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq, kbase_is_job_fault_event_pending(kbdev), msecs_to_jiffies(2000)); diff --git a/mali_kbase/mali_kbase_debug_mem_zones.c b/mali_kbase/mali_kbase_debug_mem_zones.c new file mode 100644 index 0000000..1f8db32 --- /dev/null +++ b/mali_kbase/mali_kbase_debug_mem_zones.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Debugfs interface to dump information about GPU_VA memory zones + */ + +#include "mali_kbase_debug_mem_zones.h" +#include "mali_kbase.h" + +#include <linux/list.h> +#include <linux/file.h> + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** + * debug_mem_zones_show - Show information about GPU_VA memory zones + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to get the contents of the @c mem_zones debugfs file. + * This lists the start address and size (in pages) of each initialized memory + * zone within GPU_VA memory. + * + * Return: + * 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ +static int debug_mem_zones_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *const kctx = sfile->private; + size_t i; + + const char *zone_names[KBASE_REG_ZONE_MAX] = { + "SAME_VA", + "CUSTOM_VA", + "EXEC_VA" +#if MALI_USE_CSF + , + "MCU_SHARED_VA", + "EXEC_FIXED_VA", + "FIXED_VA" +#endif + }; + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < KBASE_REG_ZONE_MAX; i++) { + struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i]; + + if (reg_zone->base_pfn) { + seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i, + reg_zone->base_pfn, reg_zone->va_size_pages); + } + } + + kbase_gpu_vm_unlock(kctx); + return 0; +} + +/* + * File operations related to debugfs entry for mem_zones + */ +static int debug_mem_zones_open(struct inode *in, struct file *file) +{ + return single_open(file, debug_mem_zones_show, in->i_private); +} + +static const struct file_operations kbase_debug_mem_zones_fops = { + .owner = THIS_MODULE, + .open = debug_mem_zones_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * Initialize debugfs entry for mem_zones + */ +void kbase_debug_mem_zones_init(struct kbase_context *const kctx) +{ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + debugfs_create_file("mem_zones", 0400, kctx->kctx_dentry, kctx, + &kbase_debug_mem_zones_fops); +} +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_debug_mem_zones_init(struct kbase_context *const kctx) +{ +} +#endif diff --git a/mali_kbase/mali_kbase_debug_mem_zones.h b/mali_kbase/mali_kbase_debug_mem_zones.h new file mode 100644 index 0000000..acf349b --- /dev/null +++ b/mali_kbase/mali_kbase_debug_mem_zones.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_MEM_ZONES_H +#define _KBASE_DEBUG_MEM_ZONES_H + +#include <mali_kbase.h> + +/** + * kbase_debug_mem_zones_init() - Initialize the mem_zones sysfs file + * @kctx: Pointer to kernel base context + * + * This function creates a "mem_zones" file which can be used to determine the + * address ranges of GPU memory zones, in the GPU Virtual-Address space. + * + * The file is cleaned up by a call to debugfs_remove_recursive() deleting the + * parent directory. + */ +void kbase_debug_mem_zones_init(struct kbase_context *kctx); + +#endif diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index df373cb..6c4e3e8 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -978,11 +978,8 @@ struct kbase_process { * @total_gpu_pages for both native and dma-buf imported * allocations. * @job_done_worker: Worker for job_done work. - * @job_done_worker_thread: Thread for job_done work. * @event_worker: Worker for event work. - * @event_worker_thread: Thread for event work. * @apc.worker: Worker for async power control work. - * @apc.thread: Thread for async power control work. * @apc.power_on_work: Work struct for powering on the GPU. * @apc.power_off_work: Work struct for powering off the GPU. * @apc.end_ts: The latest end timestamp to power off the GPU. @@ -1189,11 +1186,8 @@ struct kbase_device { #endif bool poweroff_pending; -#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE) bool infinite_cache_active_default; -#else - u32 infinite_cache_active_default; -#endif + struct kbase_mem_pool_group_config mem_pool_defaults; u32 current_gpu_coherency_mode; @@ -1242,9 +1236,7 @@ struct kbase_device { struct kbasep_js_device_data js_data; struct kthread_worker job_done_worker; - struct task_struct *job_done_worker_thread; struct kthread_worker event_worker; - struct task_struct *event_worker_thread; /* See KBASE_JS_*_PRIORITY_MODE for details. */ u32 js_ctx_scheduling_mode; @@ -1260,7 +1252,6 @@ struct kbase_device { struct { struct kthread_worker worker; - struct task_struct *thread; struct kthread_work power_on_work; struct kthread_work power_off_work; ktime_t end_ts; @@ -2042,5 +2033,7 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con #define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ #define KBASE_AS_INACTIVE_MAX_LOOPS 100000 +/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ +#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 #endif /* _KBASE_DEFS_H_ */ diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c index c4129ff..ca3863f 100644 --- a/mali_kbase/mali_kbase_dma_fence.c +++ b/mali_kbase/mali_kbase_dma_fence.c @@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { /* Wait was cancelled - zap the atom */ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (jd_done_nolock(katom, true)) + if (kbase_jd_done_nolock(katom, true)) kbase_js_sched_all(katom->kctx->kbdev); } } @@ -193,10 +193,10 @@ kbase_dma_fence_work(struct work_struct *pwork) kbase_fence_free_callbacks(katom); /* * Queue atom on GPU, unless it has already completed due to a failing - * dependency. Run jd_done_nolock() on the katom if it is completed. + * dependency. Run kbase_jd_done_nolock() on the katom if it is completed. */ if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) - jd_done_nolock(katom, true); + kbase_jd_done_nolock(katom, true); else kbase_jd_dep_clear_locked(katom); diff --git a/mali_kbase/mali_kbase_dma_fence.h b/mali_kbase/mali_kbase_dma_fence.h index be69118..53effbc 100644 --- a/mali_kbase/mali_kbase_dma_fence.h +++ b/mali_kbase/mali_kbase_dma_fence.h @@ -88,7 +88,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, struct kbase_dma_fence_resv_info *info); /** - * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx + * kbase_dma_fence_cancel_all_atoms() - Cancel all dma-fences blocked atoms on kctx * @kctx: Pointer to kbase context * * This function will cancel and clean up all katoms on @kctx that is waiting @@ -105,7 +105,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); * This function cancels all dma-buf fence callbacks on @katom, but does not * cancel the katom itself. * - * The caller is responsible for ensuring that jd_done_nolock is called on + * The caller is responsible for ensuring that kbase_jd_done_nolock is called on * @katom. * * Locking: jctx.lock must be held when calling this function. diff --git a/mali_kbase/mali_kbase_dvfs_debugfs.c b/mali_kbase/mali_kbase_dvfs_debugfs.c index 1e584de..e4cb716 100644 --- a/mali_kbase/mali_kbase_dvfs_debugfs.c +++ b/mali_kbase/mali_kbase_dvfs_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,11 +68,7 @@ static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = { void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) { struct dentry *file; -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) return; diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h index 2842280..0f9b73a 100644 --- a/mali_kbase/mali_kbase_fence.h +++ b/mali_kbase/mali_kbase_fence.h @@ -104,7 +104,7 @@ static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) #if defined(CONFIG_SYNC_FILE) /** - * kbase_fence_out_remove() - Removes the input fence from atom + * kbase_fence_in_remove() - Removes the input fence from atom * @katom: Atom to remove input fence for * * This will also release the reference to this fence which the atom keeps @@ -272,6 +272,16 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); #endif /* !MALI_USE_CSF */ /** + * kbase_fence_get() - Retrieve fence for a KCPUQ fence command. + * @fence_info: KCPUQ fence command + * + * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * + * Return: The fence, or NULL if there is no fence for KCPUQ fence command + */ +#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence) + +/** * kbase_fence_put() - Releases a reference to a fence * @fence: Fence to release reference for. */ diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c index 14ddf03..be14155 100644 --- a/mali_kbase/mali_kbase_fence_ops.c +++ b/mali_kbase/mali_kbase_fence_ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -69,9 +69,11 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) } #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, #else +extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, #endif diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index 5e490b6..c5ed338 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -198,7 +198,6 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, gpu_props->raw_props.mem_features = regdump.mem_features; gpu_props->raw_props.mmu_features = regdump.mmu_features; gpu_props->raw_props.l2_features = regdump.l2_features; - gpu_props->raw_props.core_features = regdump.core_features; gpu_props->raw_props.as_present = regdump.as_present; gpu_props->raw_props.js_present = regdump.js_present; @@ -326,9 +325,6 @@ static void kbase_gpuprops_calculate_props( totalram_pages() << PAGE_SHIFT; #endif - gpu_props->core_props.num_exec_engines = - KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; @@ -507,6 +503,21 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev) if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) gpu_props->thread_props.max_thread_group_split = 0; + /* + * The CORE_FEATURES register has different meanings depending on GPU. + * On tGOx, bits[3:0] encode num_exec_engines. + * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed, + * instead. + * GPUs like tTIx have additional fields like LSC_SIZE that are + * otherwise reserved/RAZ on older GPUs. + */ + gpu_props->raw_props.core_features = regdump.core_features; + +#if !MALI_USE_CSF + gpu_props->core_props.num_exec_engines = + KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); +#endif + return err; } @@ -694,94 +705,102 @@ static struct { #define PROP(name, member) \ {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ sizeof(((struct base_gpu_props *)0)->member)} - PROP(PRODUCT_ID, core_props.product_id), - PROP(VERSION_STATUS, core_props.version_status), - PROP(MINOR_REVISION, core_props.minor_revision), - PROP(MAJOR_REVISION, core_props.major_revision), - PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), - PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), - PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), - PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), - PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), - PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), - PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), - PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), - - PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), - PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), - PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), - - PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), - PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), - - PROP(MAX_THREADS, thread_props.max_threads), - PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), - PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), - PROP(MAX_REGISTERS, thread_props.max_registers), - PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), - PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), - PROP(IMPL_TECH, thread_props.impl_tech), - PROP(TLS_ALLOC, thread_props.tls_alloc), - - PROP(RAW_SHADER_PRESENT, raw_props.shader_present), - PROP(RAW_TILER_PRESENT, raw_props.tiler_present), - PROP(RAW_L2_PRESENT, raw_props.l2_present), - PROP(RAW_STACK_PRESENT, raw_props.stack_present), - PROP(RAW_L2_FEATURES, raw_props.l2_features), - PROP(RAW_CORE_FEATURES, raw_props.core_features), - PROP(RAW_MEM_FEATURES, raw_props.mem_features), - PROP(RAW_MMU_FEATURES, raw_props.mmu_features), - PROP(RAW_AS_PRESENT, raw_props.as_present), - PROP(RAW_JS_PRESENT, raw_props.js_present), - PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), - PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), - PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), - PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), - PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), - PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), - PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), - PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), - PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), - PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), - PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), - PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), - PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), - PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), - PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), - PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), - PROP(RAW_TILER_FEATURES, raw_props.tiler_features), - PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), - PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), - PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), - PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), - PROP(RAW_GPU_ID, raw_props.gpu_id), - PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), - PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, - raw_props.thread_max_workgroup_size), +#define BACKWARDS_COMPAT_PROP(name, type) \ + { \ + KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \ + } + PROP(PRODUCT_ID, core_props.product_id), + PROP(VERSION_STATUS, core_props.version_status), + PROP(MINOR_REVISION, core_props.minor_revision), + PROP(MAJOR_REVISION, core_props.major_revision), + PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), + PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), + PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), + PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), + PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), + PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), + PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), + +#if MALI_USE_CSF + BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8), +#else + PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), +#endif + + PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), + PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), + PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), + + PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), + PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), + + PROP(MAX_THREADS, thread_props.max_threads), + PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), + PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), + PROP(MAX_REGISTERS, thread_props.max_registers), + PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), + PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), + PROP(IMPL_TECH, thread_props.impl_tech), + PROP(TLS_ALLOC, thread_props.tls_alloc), + + PROP(RAW_SHADER_PRESENT, raw_props.shader_present), + PROP(RAW_TILER_PRESENT, raw_props.tiler_present), + PROP(RAW_L2_PRESENT, raw_props.l2_present), + PROP(RAW_STACK_PRESENT, raw_props.stack_present), + PROP(RAW_L2_FEATURES, raw_props.l2_features), + PROP(RAW_CORE_FEATURES, raw_props.core_features), + PROP(RAW_MEM_FEATURES, raw_props.mem_features), + PROP(RAW_MMU_FEATURES, raw_props.mmu_features), + PROP(RAW_AS_PRESENT, raw_props.as_present), + PROP(RAW_JS_PRESENT, raw_props.js_present), + PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), + PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), + PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), + PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), + PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), + PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), + PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), + PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), + PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), + PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), + PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), + PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), + PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), + PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), + PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), + PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), + PROP(RAW_TILER_FEATURES, raw_props.tiler_features), + PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), + PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), + PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), + PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), + PROP(RAW_GPU_ID, raw_props.gpu_id), + PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), + PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size), PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), - PROP(RAW_THREAD_FEATURES, raw_props.thread_features), - PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), - PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), - PROP(RAW_GPU_FEATURES, raw_props.gpu_features), - PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), - PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), - PROP(COHERENCY_COHERENCY, coherency_info.coherency), - PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), - PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), - PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), - PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), - PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), - PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), - PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), - PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), - PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), - PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), - PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), - PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), - PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), - PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), - PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), - PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), + PROP(RAW_THREAD_FEATURES, raw_props.thread_features), + PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), + PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), + PROP(RAW_GPU_FEATURES, raw_props.gpu_features), + PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), + PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), + PROP(COHERENCY_COHERENCY, coherency_info.coherency), + PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), + PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), + PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), + PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), + PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), + PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), + PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), + PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), + PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), + PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), + PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), + PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), + PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), + PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), + PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), + PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), #undef PROP }; @@ -818,7 +837,14 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) for (i = 0; i < count; i++) { u32 type = gpu_property_mapping[i].type; u8 type_size; - void *field = ((u8 *)props) + gpu_property_mapping[i].offset; + const size_t offset = gpu_property_mapping[i].offset; + const u64 dummy_backwards_compat_value = (u64)0; + const void *field; + + if (likely(offset < sizeof(struct base_gpu_props))) + field = ((const u8 *)props) + offset; + else + field = &dummy_backwards_compat_value; switch (gpu_property_mapping[i].size) { case 1: @@ -844,16 +870,16 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) switch (type_size) { case KBASE_GPUPROP_VALUE_SIZE_U8: - WRITE_U8(*((u8 *)field)); + WRITE_U8(*((const u8 *)field)); break; case KBASE_GPUPROP_VALUE_SIZE_U16: - WRITE_U16(*((u16 *)field)); + WRITE_U16(*((const u16 *)field)); break; case KBASE_GPUPROP_VALUE_SIZE_U32: - WRITE_U32(*((u32 *)field)); + WRITE_U32(*((const u32 *)field)); break; case KBASE_GPUPROP_VALUE_SIZE_U64: - WRITE_U64(*((u64 *)field)); + WRITE_U64(*((const u64 *)field)); break; default: /* Cannot be reached */ WARN_ON(1); diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index 75e4aaf..f205617 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -137,8 +137,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( static const struct base_hw_product base_hw_products[] = { { GPU_ID2_PRODUCT_TMIX, - { { GPU_ID2_VERSION_MAKE(0, 0, 1), - base_hw_issues_tMIx_r0p0_05dev0 }, + { { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tMIx_r0p0_05dev0 }, { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, { U32_MAX /* sentinel value */, NULL } } }, @@ -233,11 +232,15 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_PRODUCT_TTUX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, + { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p1 }, { U32_MAX, NULL } } }, { GPU_ID2_PRODUCT_LTUX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, + { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p1 }, { U32_MAX, NULL } } }, }; @@ -294,12 +297,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( */ issues = fallback_issues; -#if MALI_CUSTOMER_RELEASE dev_warn(kbdev->dev, "GPU hardware issue table may need updating:\n" -#else - dev_info(kbdev->dev, -#endif "r%dp%d status %d is unknown; treating as r%dp%d status %d", (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h index 95d7624..124a6d6 100644 --- a/mali_kbase/mali_kbase_hwaccess_jm.h +++ b/mali_kbase/mali_kbase_hwaccess_jm.h @@ -236,7 +236,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev); int kbase_backend_slot_free(struct kbase_device *kbdev, int js); /** - * kbase_job_check_enter_disjoint - potentially leave disjoint state + * kbase_job_check_leave_disjoint - potentially leave disjoint state * @kbdev: kbase device * @target_katom: atom which is finishing * diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h index 1c153c4..effb2ff 100644 --- a/mali_kbase/mali_kbase_hwaccess_pm.h +++ b/mali_kbase/mali_kbase_hwaccess_pm.h @@ -209,7 +209,7 @@ int kbase_pm_list_policies(struct kbase_device *kbdev, const struct kbase_pm_policy * const **list); /** - * kbase_protected_most_enable - Enable protected mode + * kbase_pm_protected_mode_enable() - Enable protected mode * * @kbdev: Address of the instance of a GPU platform device. * @@ -218,7 +218,7 @@ int kbase_pm_list_policies(struct kbase_device *kbdev, int kbase_pm_protected_mode_enable(struct kbase_device *kbdev); /** - * kbase_protected_mode_disable - Disable protected mode + * kbase_pm_protected_mode_disable() - Disable protected mode * * @kbdev: Address of the instance of a GPU platform device. * diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c index c42f2a0..99e8be7 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c @@ -36,8 +36,13 @@ #define BASE_MAX_NR_CLOCKS_REGULATORS 2 #endif +#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI) +/* Backend watch dog timer interval in milliseconds: 18 seconds. */ +#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000) +#else /* Backend watch dog timer interval in milliseconds: 1 second. */ #define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) +#endif /* IS_FPGA && !NO_MALI */ /** * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. @@ -168,23 +173,29 @@ struct kbase_hwcnt_backend_csf_info { /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout * information. + * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are + * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. + * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. - * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. + * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. - * @block_cnt: Total block count (sum of all other block counts). + * @fw_block_cnt: Total number of firmware counters blocks. + * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). * @shader_avail_mask: Bitmap of all shader cores in the system. * @enable_mask_offset: Offset in array elements of enable mask in each block * starting from the beginning of block. - * @headers_per_block: Header size per block. - * @counters_per_block: Counters size per block. - * @values_per_block: Total size per block. + * @headers_per_block: For any block, the number of counters designated as block's header. + * @counters_per_block: For any block, the number of counters designated as block's payload. + * @values_per_block: For any block, the number of counters in total (header + payload). */ struct kbase_hwcnt_csf_physical_layout { + u8 hw_block_cnt; u8 fe_cnt; u8 tiler_cnt; u8 mmu_l2_cnt; u8 shader_cnt; + u8 fw_block_cnt; u8 block_cnt; u64 shader_avail_mask; size_t enable_mask_offset; @@ -361,29 +372,38 @@ static void kbasep_hwcnt_backend_csf_init_layout( const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, struct kbase_hwcnt_csf_physical_layout *phys_layout) { - u8 shader_core_cnt; + size_t shader_core_cnt; size_t values_per_block; + size_t fw_blocks_count; + size_t hw_blocks_count; WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); shader_core_cnt = fls64(prfcnt_info->core_mask); - values_per_block = - prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; + values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; + fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); + hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + + /* The number of hardware counters reported by the GPU matches the legacy guess-work we + * have done in the past + */ + WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + prfcnt_info->l2_count + shader_core_cnt); *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, .mmu_l2_cnt = prfcnt_info->l2_count, .shader_cnt = shader_core_cnt, - .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + - KBASE_HWCNT_V5_TILER_BLOCK_COUNT + - prfcnt_info->l2_count + shader_core_cnt, + .fw_block_cnt = fw_blocks_count, + .hw_block_cnt = hw_blocks_count, + .block_cnt = fw_blocks_count + hw_blocks_count, .shader_avail_mask = prfcnt_info->core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, - .counters_per_block = - values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, }; } @@ -458,7 +478,15 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( u64 *acc_block = accum_buf; const size_t values_per_block = phys_layout->values_per_block; - for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + /* Performance counter blocks for firmware are stored before blocks for hardware. + * We skip over the firmware's performance counter blocks (counters dumping is not + * supported for firmware blocks, only hardware ones). + */ + old_block += values_per_block * phys_layout->fw_block_cnt; + new_block += values_per_block * phys_layout->fw_block_cnt; + + for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; + block_idx++) { const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = @@ -546,8 +574,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); - WARN_ON(acc_block != - accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - + (values_per_block * phys_layout->fw_block_cnt)); (void)dump_bytes; } @@ -562,7 +590,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; - u32 *new_sample_buf; + u32 *new_sample_buf = old_sample_buf; if (extract_index_to_start == insert_index_to_stop) /* No samples to accumulate. Early out. */ @@ -1434,7 +1462,6 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, *out_backend = backend_csf; return 0; - destroy_workqueue(backend_csf->hwc_dump_workq); err_alloc_workqueue: backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, backend_csf->ring_buf); @@ -1938,7 +1965,6 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable( int kbase_hwcnt_backend_csf_metadata_init( struct kbase_hwcnt_backend_interface *iface) { - int errcode; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_gpu_info gpu_info; @@ -1964,19 +1990,8 @@ int kbase_hwcnt_backend_csf_metadata_init( gpu_info.prfcnt_values_per_block = csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; - errcode = kbase_hwcnt_csf_metadata_create( - &gpu_info, csf_info->counter_set, &csf_info->metadata); - if (errcode) - return errcode; - - /* - * Dump abstraction size should be exactly twice the size and layout as - * the physical dump size since 64-bit per value used in metadata. - */ - WARN_ON(csf_info->prfcnt_info.dump_bytes * 2 != - csf_info->metadata->dump_buf_bytes); - - return 0; + return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set, + &csf_info->metadata); } void kbase_hwcnt_backend_csf_metadata_term( diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h index 9c4fef5..24b26c2 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,8 +55,12 @@ struct kbase_hwcnt_backend_csf_if_enable { /** * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter * information. + * @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware + * counters are sub-divided into 4 classes: front-end, shader, tiler, and + * memory system (l2 cache + MMU). + * @prfcnt_fw_size: Total length in bytes of all the firmware counters data. * @dump_bytes: Bytes of GPU memory required to perform a performance - * counter dump. + * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size. * @prfcnt_block_size: Bytes of each performance counter block. * @l2_count: The MMU L2 cache count. * @core_mask: Shader core mask. @@ -65,6 +69,8 @@ struct kbase_hwcnt_backend_csf_if_enable { * is taken. */ struct kbase_hwcnt_backend_csf_if_prfcnt_info { + size_t prfcnt_hw_size; + size_t prfcnt_fw_size; size_t dump_bytes; size_t prfcnt_block_size; size_t l2_count; @@ -273,8 +279,6 @@ typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn( * @timestamp_ns: Function ptr to get the current CSF interface * timestamp. * @dump_enable: Function ptr to enable dumping. - * @dump_enable_nolock: Function ptr to enable dumping while the - * backend-specific spinlock is already held. * @dump_disable: Function ptr to disable dumping. * @dump_request: Function ptr to request a dump. * @get_indexes: Function ptr to get extract and insert indexes of the diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c index 15ffbfa..bc1d719 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -221,30 +221,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) { #if IS_ENABLED(CONFIG_MALI_NO_MALI) - size_t dummy_model_blk_count; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; - prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - prfcnt_info->core_mask = - (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; - /* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */ - dummy_model_blk_count = - 2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask); - prfcnt_info->dump_bytes = - dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE; - prfcnt_info->prfcnt_block_size = - KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * - KBASE_HWCNT_VALUE_HW_BYTES; - prfcnt_info->clk_cnt = 1; - prfcnt_info->clearing_samples = true; + *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ + .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, + .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, + .prfcnt_hw_size = + KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, + .prfcnt_fw_size = + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, + .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE, + .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE, + .clk_cnt = 1, + .clearing_samples = true, + }; + fw_ctx->buf_bytes = prfcnt_info->dump_bytes; #else struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; u32 prfcnt_size; - u32 prfcnt_hw_size = 0; - u32 prfcnt_fw_size = 0; + u32 prfcnt_hw_size; + u32 prfcnt_fw_size; u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; @@ -254,8 +253,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; prfcnt_size = kbdev->csf.global_iface.prfcnt_size; - prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; - prfcnt_fw_size = (prfcnt_size >> 16) << 8; + prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size); + prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size); fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; /* Read the block size if the GPU has the register PRFCNT_FEATURES @@ -269,14 +268,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( << 8; } - prfcnt_info->dump_bytes = fw_ctx->buf_bytes; - prfcnt_info->prfcnt_block_size = prfcnt_block_size; - prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; - prfcnt_info->core_mask = - kbdev->gpu_props.props.coherency_info.group[0].core_mask; - - prfcnt_info->clk_cnt = fw_ctx->clk_cnt; - prfcnt_info->clearing_samples = true; + *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ + .prfcnt_hw_size = prfcnt_hw_size, + .prfcnt_fw_size = prfcnt_fw_size, + .dump_bytes = fw_ctx->buf_bytes, + .prfcnt_block_size = prfcnt_block_size, + .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices, + .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask, + .clk_cnt = fw_ctx->clk_cnt, + .clearing_samples = true, + }; /* Block size must be multiple of counter size. */ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != @@ -368,7 +369,11 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( kfree(page_list); +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr; +#else fw_ring_buf->gpu_dump_base = gpu_va_base; +#endif /* CONFIG_MALI_NO_MALI */ fw_ring_buf->cpu_dump_base = cpu_addr; fw_ring_buf->phys = phys; fw_ring_buf->num_pages = num_pages; @@ -379,12 +384,6 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; -#if IS_ENABLED(CONFIG_MALI_NO_MALI) - /* The dummy model needs the CPU mapping. */ - gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev, - phys, num_pages); -#endif /* CONFIG_MALI_NO_MALI */ - return 0; mmu_insert_failed: @@ -422,6 +421,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( WARN_ON(!ctx); WARN_ON(!ring_buf); +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* When using the dummy backend syncing the ring buffer is unnecessary as + * the ring buffer is only accessed by the CPU. It may also cause data loss + * due to cache invalidation so return early. + */ + return; +#endif /* CONFIG_MALI_NO_MALI */ + /* The index arguments for this function form an inclusive, exclusive * range. * However, when masking back to the available buffers we will make this @@ -500,10 +507,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( if (fw_ring_buf->phys) { u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; - WARN_ON(kbase_mmu_teardown_pages( - fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, - gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages, - MCU_AS_NR)); + WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, + gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, + fw_ring_buf->num_pages, MCU_AS_NR)); vunmap(fw_ring_buf->cpu_dump_base); @@ -540,8 +546,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( global_iface = &kbdev->csf.global_iface; /* Configure */ - prfcnt_config = fw_ring_buf->buf_count; - prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; + prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count); + prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set); /* Configure the ring buffer base address */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c index e418212..98019e7 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c @@ -414,7 +414,12 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock( enable.tiler_bm = phys_enable_map.tiler_bm; enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; enable.counter_set = phys_counter_set; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* The dummy model needs the CPU mapping. */ + enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va; +#else enable.dump_buffer = backend_jm->gpu_dump_va; +#endif /* CONFIG_MALI_NO_MALI */ enable.dump_buffer_bytes = backend_jm->info->dump_bytes; timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); @@ -733,9 +738,6 @@ static int kbasep_hwcnt_backend_jm_create( int errcode; struct kbase_device *kbdev; struct kbase_hwcnt_backend_jm *backend = NULL; -#if IS_ENABLED(CONFIG_MALI_NO_MALI) - size_t page_count; -#endif WARN_ON(!info); WARN_ON(!out_backend); @@ -775,14 +777,6 @@ static int kbasep_hwcnt_backend_jm_create( kbase_ccswe_init(&backend->ccswe_shader_cores); backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; -#if IS_ENABLED(CONFIG_MALI_NO_MALI) - /* The dummy model needs the CPU mapping. */ - page_count = PFN_UP(info->dump_bytes); - gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va, kbdev, - backend->vmap->cpu_pages, - page_count); -#endif /* CONFIG_MALI_NO_MALI */ - *out_backend = backend; return 0; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c index cdf3cd9..3d786ca 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,13 @@ #include <mali_kbase_hwcnt_backend.h> #include <mali_kbase_hwcnt_watchdog_if.h> +#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI) +/* Backend watch dog timer interval in milliseconds: 18 seconds. */ +static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000; +#else +/* Backend watch dog timer interval in milliseconds: 1 second. */ static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000; +#endif /* IS_FPGA && !NO_MALI */ /* * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY on dump_request. diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c index 752d096..5f5c36f 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/mali_kbase_hwcnt_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,6 @@ #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" -#include <linux/bug.h> #include <linux/err.h> /** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements @@ -44,13 +43,13 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, if (is_csf) *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; break; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; break; default: WARN_ON(true); @@ -66,7 +65,7 @@ static void kbasep_get_tiler_block_type(u64 *dst, break; case KBASE_HWCNT_SET_SECONDARY: case KBASE_HWCNT_SET_TERTIARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; break; default: WARN_ON(true); @@ -87,7 +86,7 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, if (is_csf) *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; break; default: WARN_ON(true); @@ -105,7 +104,7 @@ static void kbasep_get_memsys_block_type(u64 *dst, *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; break; case KBASE_HWCNT_SET_TERTIARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; break; default: WARN_ON(true); @@ -320,7 +319,8 @@ static bool is_block_type_shader( if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3) + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED) is_shader = true; return is_shader; @@ -335,7 +335,8 @@ static bool is_block_type_l2_cache( switch (grp_type) { case KBASE_HWCNT_GPU_GROUP_TYPE_V5: if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2) + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) is_l2_cache = true; break; default: @@ -383,6 +384,8 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const bool is_l2_cache = is_block_type_l2_cache( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined( + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); bool hw_res_available = true; /* @@ -414,8 +417,23 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; + bool blk_powered; + + if (!is_shader_core) { + /* Under the current PM system, counters will + * only be enabled after all non shader core + * blocks are powered up. + */ + blk_powered = true; + } else { + /* Check the PM core mask to see if the shader + * core is powered up. + */ + blk_powered = core_mask & 1; + } - if ((!is_shader_core || (core_mask & 1)) && hw_res_available) { + if (blk_powered && !is_undefined && hw_res_available) { + /* Only powered and defined blocks have valid data. */ if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, @@ -425,9 +443,18 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, dst_blk, src_blk, (hdr_cnt + ctr_cnt)); } - } else if (!accumulate) { - kbase_hwcnt_dump_buffer_block_zero( - dst_blk, (hdr_cnt + ctr_cnt)); + } else { + /* Even though the block might be undefined, the + * user has enabled counter collection for it. + * We should not propagate garbage data. + */ + if (accumulate) { + /* No-op to preserve existing values */ + } else { + /* src is garbage, so zero the dst */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, + (hdr_cnt + ctr_cnt)); + } } } @@ -462,6 +489,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined( + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); /* * Skip block if no values in the destination block are enabled. @@ -472,12 +502,26 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, dst, grp, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; - if (accumulate) { - kbase_hwcnt_dump_buffer_block_accumulate( - dst_blk, src_blk, hdr_cnt, ctr_cnt); + if (!is_undefined) { + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, + hdr_cnt, ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); + } } else { - kbase_hwcnt_dump_buffer_block_copy( - dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + /* Even though the block might be undefined, the + * user has enabled counter collection for it. + * We should not propagate garbage data. + */ + if (accumulate) { + /* No-op to preserve existing values */ + } else { + /* src is garbage, so zero the dst */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, + (hdr_cnt + ctr_cnt)); + } } } @@ -564,7 +608,10 @@ void kbase_hwcnt_gpu_enable_map_to_physical( break; switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: /* Nothing to do in this case. */ break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: @@ -664,7 +711,10 @@ void kbase_hwcnt_gpu_enable_map_from_physical( break; switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: /* Nothing to do in this case. */ break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h index 648f85f..f890d45 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/mali_kbase_hwcnt_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,7 @@ #ifndef _KBASE_HWCNT_GPU_H_ #define _KBASE_HWCNT_GPU_H_ +#include <linux/bug.h> #include <linux/types.h> struct kbase_device; @@ -60,33 +61,40 @@ enum kbase_hwcnt_gpu_group_type { /** * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, * used to identify metadata blocks. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a - * counter set that a block - * doesn't support is used). * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager * or CSF HW). * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job * manager or CSF HW). * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job * manager or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block + * (e.g. if a counter set that + * a block doesn't support is + * used). * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block. */ enum kbase_hwcnt_gpu_v5_block_type { - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED, }; /** @@ -188,6 +196,27 @@ struct kbase_hwcnt_curr_config { }; /** + * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined. + * + * @grp_type: Hardware counter group type. + * @blk_type: Hardware counter block type. + * + * Return: true if the block type is undefined, else false. + */ +static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, + const uint64_t blk_type) +{ + /* Warn on unknown group type */ + if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) + return false; + + return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED); +} + +/** * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the * JM GPUs. * @info: Non-NULL pointer to info struct. diff --git a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c index e2caa1c..2a1cde7 100644 --- a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c +++ b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -161,7 +161,7 @@ void kbase_hwcnt_dump_buffer_narrow_free( return; kfree(dump_buf_narrow->dump_buf); - *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ 0 }; + *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL }; } int kbase_hwcnt_dump_buffer_narrow_array_alloc( diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index 79c8ebb..8667819 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -82,7 +82,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom) * Returns whether the JS needs a reschedule. * * Note that the caller must also check the atom status and - * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock + * if it is KBASE_JD_ATOM_STATE_COMPLETED must call kbase_jd_done_nolock */ static bool jd_run_atom(struct kbase_jd_atom *katom) { @@ -148,7 +148,7 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { /* The atom has already finished */ - resched |= jd_done_nolock(katom, true); + resched |= kbase_jd_done_nolock(katom, true); } if (resched) @@ -778,7 +778,7 @@ static void jd_mark_simple_gfx_frame_atoms(struct kbase_jd_atom *katom) * * The caller must hold the kbase_jd_context.lock. */ -bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) +bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) { struct kbase_context *kctx = katom->kctx; struct list_head completed_jobs; @@ -786,6 +786,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) bool need_to_try_schedule_context = false; int i; + lockdep_assert_held(&kctx->jctx.lock); + KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(kctx->kbdev, katom); INIT_LIST_HEAD(&completed_jobs); @@ -918,7 +920,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) return need_to_try_schedule_context; } -KBASE_EXPORT_TEST_API(jd_done_nolock); +KBASE_EXPORT_TEST_API(kbase_jd_done_nolock); #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) enum { @@ -1026,7 +1028,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->jobslot = user_atom->jobslot; katom->seq_nr = user_atom->seq_nr; katom->atom_flags = 0; - katom->retry_count = 0; katom->need_cache_flush_cores_retained = 0; katom->pre_dep = NULL; katom->post_dep = NULL; @@ -1087,7 +1088,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, * dependencies. */ jd_trace_atom_submit(kctx, katom, NULL); - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } } } @@ -1151,7 +1152,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if (err >= 0) kbase_finish_soft_job(katom); } - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } katom->will_fail_event_code = katom->event_code; @@ -1177,7 +1178,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Create a new atom. */ jd_trace_atom_submit(kctx, katom, &katom->sched_priority); -#if !MALI_INCREMENTAL_RENDERING +#if !MALI_INCREMENTAL_RENDERING_JM /* Reject atoms for incremental rendering if not supported */ if (katom->core_req & (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { @@ -1185,9 +1186,9 @@ static bool jd_submit_atom(struct kbase_context *const kctx, "Rejecting atom with unsupported core_req 0x%x\n", katom->core_req); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } -#endif /* !MALI_INCREMENTAL_RENDERING */ +#endif /* !MALI_INCREMENTAL_RENDERING_JM */ if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { WARN_ON(katom->jc != 0); @@ -1199,7 +1200,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, */ dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } /* Reject atoms with an invalid device_nr */ @@ -1209,7 +1210,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, "Rejecting atom with invalid device_nr %d\n", katom->device_nr); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } /* Reject atoms with invalid core requirements */ @@ -1219,7 +1220,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, "Rejecting atom with invalid core requirements\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } /* Reject soft-job atom of certain types from accessing external resources */ @@ -1230,7 +1231,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, dev_err(kctx->kbdev->dev, "Rejecting soft-job atom accessing external resources\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { @@ -1238,7 +1239,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { /* setup failed (no access, bad resource, unknown resource types, etc.) */ katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } } @@ -1249,7 +1250,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, * JIT IDs - atom is invalid. */ katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ @@ -1263,13 +1264,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } } else { /* Soft-job */ if (kbase_prepare_soft_job(katom) != 0) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } } @@ -1293,7 +1294,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } return false; } @@ -1323,7 +1324,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, } /* This is a pure dependency. Resolve it immediately */ - return jd_done_nolock(katom, true); + return kbase_jd_done_nolock(katom, true); } int kbase_jd_submit(struct kbase_context *kctx, @@ -1580,8 +1581,8 @@ void kbase_jd_done_worker(struct kthread_work *data) kbasep_js_remove_job(kbdev, kctx, katom); rt_mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); rt_mutex_unlock(&js_devdata->queue_mutex); - /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ - jd_done_nolock(katom, false); + /* kbase_jd_done_nolock() requires the jsctx_mutex lock to be dropped */ + kbase_jd_done_nolock(katom, false); /* katom may have been freed now, do not use! */ @@ -1647,7 +1648,7 @@ void kbase_jd_done_worker(struct kthread_work *data) kbase_js_sched_all(kbdev); if (!atomic_dec_return(&kctx->work_count)) { - /* If worker now idle then post all events that jd_done_nolock() + /* If worker now idle then post all events that kbase_jd_done_nolock() * has queued */ rt_mutex_lock(&jctx->lock); @@ -1693,6 +1694,7 @@ static void jd_cancel_worker(struct kthread_work *data) struct kbase_jd_context *jctx; struct kbase_context *kctx; struct kbasep_js_kctx_info *js_kctx_info; + bool need_to_try_schedule_context; bool attr_state_changed; struct kbase_device *kbdev; @@ -1720,12 +1722,13 @@ static void jd_cancel_worker(struct kthread_work *data) rt_mutex_lock(&jctx->lock); - jd_done_nolock(katom, true); + need_to_try_schedule_context = kbase_jd_done_nolock(katom, true); /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to * schedule the context. There's also no need for the jsctx_mutex to have been taken * around this too. */ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); + CSTD_UNUSED(need_to_try_schedule_context); /* katom may have been freed now, do not use! */ rt_mutex_unlock(&jctx->lock); @@ -1764,6 +1767,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev); + lockdep_assert_held(&kbdev->hwaccess_lock); + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c index f9b41d5..0d6230d 100644 --- a/mali_kbase/mali_kbase_jd_debugfs.c +++ b/mali_kbase/mali_kbase_jd_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -72,9 +72,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, #endif seq_printf(sfile, -#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) - "Sd(%u#%u: %s) ", -#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) "Sd(%llu#%u: %s) ", #else "Sd(%llu#%llu: %s) ", @@ -93,9 +91,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, #endif seq_printf(sfile, -#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) - "Wd(%u#%u: %s) ", -#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) "Wd(%llu#%u: %s) ", #else "Wd(%llu#%llu: %s) ", @@ -164,7 +160,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) BASE_UK_VERSION_MINOR); /* Print table heading */ - seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n"); + seq_puts(sfile, " ID, Core req, St, Predeps, Start time, Additional info...\n"); atoms = kctx->jctx.atoms; /* General atom states */ @@ -184,8 +180,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) * it is valid */ if (ktime_to_ns(atom->start_timestamp)) - start_timestamp = ktime_to_ns( - ktime_sub(ktime_get(), atom->start_timestamp)); + start_timestamp = + ktime_to_ns(ktime_sub(ktime_get_raw(), atom->start_timestamp)); kbasep_jd_debugfs_atom_deps(deps, atom); @@ -230,11 +226,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = { void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) { -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif /* Caller already ensures this, but we keep the pattern for * maintenance safety. diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c index 84efbb3..34ba196 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.c +++ b/mali_kbase/mali_kbase_kinstr_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,6 +45,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/version.h> +#include <linux/version_compat_defs.h> #include <linux/wait.h> /* Define static_assert(). @@ -60,10 +61,6 @@ #define __static_assert(e, msg, ...) _Static_assert(e, msg) #endif -#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE -typedef unsigned int __poll_t; -#endif - #ifndef ENOTSUP #define ENOTSUP EOPNOTSUPP #endif @@ -637,11 +634,11 @@ static __poll_t reader_poll(struct file *const file, struct reader_changes *changes; if (unlikely(!file || !wait)) - return -EINVAL; + return (__poll_t)-EINVAL; reader = file->private_data; if (unlikely(!reader)) - return -EBADF; + return (__poll_t)-EBADF; changes = &reader->changes; @@ -666,7 +663,7 @@ static const struct file_operations file_operations = { static const size_t kbase_kinstr_jm_readers_max = 16; /** - * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped + * kbase_kinstr_jm_release() - Invoked when the reference count is dropped * @ref: the context reference count */ static void kbase_kinstr_jm_release(struct kref *const ref) @@ -737,7 +734,7 @@ static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, } /** - * readers_del() - Deletes a reader from the list of readers + * kbase_kinstr_jm_readers_del() - Deletes a reader from the list of readers * @ctx: the instrumentation context * @reader: the reader to delete */ diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h index 2c904e5..84fabac 100644 --- a/mali_kbase/mali_kbase_kinstr_jm.h +++ b/mali_kbase/mali_kbase_kinstr_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,8 +71,6 @@ #else /* empty wrapper macros for userspace */ #define static_branch_unlikely(key) (1) -#define KERNEL_VERSION(a, b, c) (0) -#define LINUX_VERSION_CODE (1) #endif /* __KERNEL__ */ /* Forward declarations */ diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c index afc008b..b7c8a16 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.c +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c @@ -36,6 +36,7 @@ #include <linux/mutex.h> #include <linux/poll.h> #include <linux/slab.h> +#include <linux/version_compat_defs.h> #include <linux/workqueue.h> /* The minimum allowed interval between dumps, in nanoseconds @@ -87,16 +88,13 @@ struct kbase_kinstr_prfcnt_sample { /** * struct kbase_kinstr_prfcnt_sample_array - Array of sample data. - * @page_addr: Address of allocated pages. A single allocation is used + * @user_buf: Address of allocated userspace buffer. A single allocation is used * for all Dump Buffers in the array. - * @page_order: The allocation order of the pages, the order is on a - * logarithmic scale. * @sample_count: Number of allocated samples. * @samples: Non-NULL pointer to the array of Dump Buffers. */ struct kbase_kinstr_prfcnt_sample_array { - u64 page_addr; - unsigned int page_order; + u8 *user_buf; size_t sample_count; struct kbase_kinstr_prfcnt_sample *samples; }; @@ -229,25 +227,19 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { * Return: POLLIN if data can be read without blocking, 0 if data can not be * read without blocking, else error code. */ -#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE -static unsigned int -kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, - struct poll_table_struct *wait) -#else static __poll_t kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, struct poll_table_struct *wait) -#endif { struct kbase_kinstr_prfcnt_client *cli; if (!filp || !wait) - return -EINVAL; + return (__poll_t)-EINVAL; cli = filp->private_data; if (!cli) - return -EINVAL; + return (__poll_t)-EINVAL; poll_wait(filp, &cli->waitq, wait); @@ -392,7 +384,10 @@ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) block_type = PRFCNT_BLOCK_TYPE_MEMORY; break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: default: block_type = PRFCNT_BLOCK_TYPE_RESERVED; break; @@ -429,7 +424,7 @@ static int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map, struct kbase_hwcnt_dump_buffer *dst, struct prfcnt_metadata **block_meta_base, - u64 base_addr, u8 counter_set) + u8 *base_addr, u8 counter_set) { size_t grp, blk, blk_inst; struct prfcnt_metadata **ptr_md = block_meta_base; @@ -440,7 +435,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena metadata = dst->metadata; kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u64 *dst_blk; + u8 *dst_blk; /* Skip unavailable or non-enabled blocks */ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || @@ -448,7 +443,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK; (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION; (*ptr_md)->u.block_md.block_type = @@ -458,7 +453,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena (*ptr_md)->u.block_md.block_idx = (u8)blk_inst; (*ptr_md)->u.block_md.set = counter_set; (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; - (*ptr_md)->u.block_md.values_offset = (u32)((u64)(uintptr_t)dst_blk - base_addr); + (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr); /* update the buf meta data block pointer to next item */ (*ptr_md)++; @@ -504,7 +499,7 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata( /* Dealing with counter blocks */ ptr_md++; if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(&cli->enable_map, dump_buf, &ptr_md, - cli->sample_arr.page_addr, + cli->sample_arr.user_buf, cli->config.counter_set))) return; @@ -1017,12 +1012,8 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, } read_idx %= cli->sample_arr.sample_count; - sample_offset_bytes = - (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta - - (u64)(uintptr_t)cli->sample_arr.page_addr; - sample_meta = - (struct prfcnt_metadata *)cli->sample_arr.samples[read_idx] - .sample_meta; + sample_meta = cli->sample_arr.samples[read_idx].sample_meta; + sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf; /* Verify that a valid sample has been dumped in the read_idx. * There are situations where this may not be the case, @@ -1067,8 +1058,7 @@ kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, read_idx %= cli->sample_arr.sample_count; sample_offset_bytes = - (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta - - (u64)(uintptr_t)cli->sample_arr.page_addr; + (u8 *)cli->sample_arr.samples[read_idx].sample_meta - cli->sample_arr.user_buf; if (sample_access->sample_offset_bytes != sample_offset_bytes) { err = -EINVAL; @@ -1160,40 +1150,15 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, struct vm_area_struct *vma) { struct kbase_kinstr_prfcnt_client *cli; - unsigned long vm_size, size, addr, pfn, offset; if (!filp || !vma) return -EINVAL; - cli = filp->private_data; + cli = filp->private_data; if (!cli) return -EINVAL; - vm_size = vma->vm_end - vma->vm_start; - - /* The mapping is allowed to span the entirety of the page allocation, - * not just the chunk where the dump buffers are allocated. - * This accommodates the corner case where the combined size of the - * dump buffers is smaller than a single page. - * This does not pose a security risk as the pages are zeroed on - * allocation, and anything out of bounds of the dump buffers is never - * written to. - */ - size = (1ull << cli->sample_arr.page_order) * PAGE_SIZE; - - if (vma->vm_pgoff > (size >> PAGE_SHIFT)) - return -EINVAL; - - offset = vma->vm_pgoff << PAGE_SHIFT; - - if (vm_size > size - offset) - return -EINVAL; - - addr = __pa(cli->sample_arr.page_addr + offset); - pfn = addr >> PAGE_SHIFT; - - return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, - vma->vm_page_prot); + return remap_vmalloc_range(vma, cli->sample_arr.user_buf, 0); } static void kbasep_kinstr_prfcnt_sample_array_free( @@ -1202,8 +1167,8 @@ static void kbasep_kinstr_prfcnt_sample_array_free( if (!sample_arr) return; - kfree((void *)sample_arr->samples); - kfree((void *)(size_t)sample_arr->page_addr); + kfree(sample_arr->samples); + vfree(sample_arr->user_buf); memset(sample_arr, 0, sizeof(*sample_arr)); } @@ -1443,8 +1408,6 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) if (!kinstr_ctx) return; - cancel_work_sync(&kinstr_ctx->dump_work); - /* Non-zero client count implies client leak */ if (WARN_ON(kinstr_ctx->client_count > 0)) { struct kbase_kinstr_prfcnt_client *pos, *n; @@ -1456,6 +1419,8 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) } } + cancel_work_sync(&kinstr_ctx->dump_work); + WARN_ON(kinstr_ctx->client_count > 0); kfree(kinstr_ctx); } @@ -1530,8 +1495,6 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr; struct kbase_kinstr_prfcnt_sample *samples; size_t sample_idx; - u64 addr; - unsigned int order; size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; size_t sample_meta_bytes; @@ -1554,16 +1517,13 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl if (!samples) return -ENOMEM; - order = get_order(sample_size * buffer_count); - addr = (u64)(uintptr_t)kzalloc(sample_size * buffer_count, GFP_KERNEL); + sample_arr->user_buf = vmalloc_user(sample_size * buffer_count); - if (!addr) { - kfree((void *)samples); + if (!sample_arr->user_buf) { + kfree(samples); return -ENOMEM; } - sample_arr->page_addr = addr; - sample_arr->page_order = order; sample_arr->sample_count = buffer_count; sample_arr->samples = samples; @@ -1577,12 +1537,11 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */ samples[sample_idx].dump_buf.metadata = metadata; samples[sample_idx].sample_meta = - (struct prfcnt_metadata *)(uintptr_t)( - addr + sample_meta_offset); + (struct prfcnt_metadata *)(sample_arr->user_buf + sample_meta_offset); samples[sample_idx].dump_buf.dump_buf = - (u64 *)(uintptr_t)(addr + dump_buf_offset); + (u64 *)(sample_arr->user_buf + dump_buf_offset); samples[sample_idx].dump_buf.clk_cnt_buf = - (u64 *)(uintptr_t)(addr + clk_cnt_buf_offset); + (u64 *)(sample_arr->user_buf + clk_cnt_buf_offset); } return 0; @@ -2033,7 +1992,6 @@ static int kbasep_kinstr_prfcnt_enum_info_count( struct kbase_kinstr_prfcnt_context *kinstr_ctx, struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) { - int err = 0; uint32_t count = 0; size_t block_info_count = 0; const struct kbase_hwcnt_metadata *metadata; @@ -2054,7 +2012,7 @@ static int kbasep_kinstr_prfcnt_enum_info_count( enum_info->info_item_size = sizeof(struct prfcnt_enum_item); kinstr_ctx->info_item_count = count; - return err; + return 0; } static int kbasep_kinstr_prfcnt_enum_info_list( @@ -2167,15 +2125,10 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, } bytes = item_count * sizeof(*req_arr); - req_arr = kmalloc(bytes, GFP_KERNEL); + req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes); - if (!req_arr) - return -ENOMEM; - - if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr), bytes)) { - err = -EFAULT; - goto free_buf; - } + if (IS_ERR(req_arr)) + return PTR_ERR(req_arr); err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli, req_arr); diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h index ec42ce0..e834926 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.h +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -124,7 +124,7 @@ size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadat int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map, struct kbase_hwcnt_dump_buffer *dst, struct prfcnt_metadata **block_meta_base, - u64 base_addr, u8 counter_set); + u8 *base_addr, u8 counter_set); /** * kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client. diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 989ce1e..fcbaf2b 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -1803,9 +1803,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, return err; bad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, - kctx->as_nr); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + reg->nr_pages, kctx->as_nr); kbase_remove_va_region(kctx->kbdev, reg); @@ -1820,6 +1819,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { int err = 0; + struct kbase_mem_phy_alloc *alloc; if (reg->start_pfn == 0) return 0; @@ -1827,11 +1827,12 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (!reg->gpu_alloc) return -EINVAL; + alloc = reg->gpu_alloc; + /* Tear down GPU page tables, depending on memory type. */ - switch (reg->gpu_alloc->type) { + switch (alloc->type) { case KBASE_MEM_TYPE_ALIAS: { size_t i = 0; - struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped @@ -1843,9 +1844,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (alloc->imported.alias.aliased[i].alloc) { int err_loop = kbase_mmu_teardown_pages( kctx->kbdev, &kctx->mmu, - reg->start_pfn + - (i * - alloc->imported.alias.stride), + reg->start_pfn + (i * alloc->imported.alias.stride), + alloc->pages + (i * alloc->imported.alias.stride), alloc->imported.alias.aliased[i].length, kctx->as_nr); if (WARN_ON_ONCE(err_loop)) @@ -1855,32 +1855,32 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) } break; case KBASE_MEM_TYPE_IMPORTED_UMM: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, kctx->as_nr); break; default: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, kbase_reg_current_backed_size(reg), - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), + kctx->as_nr); break; } /* Update tracking, and other cleanup, depending on memory type. */ - switch (reg->gpu_alloc->type) { + switch (alloc->type) { case KBASE_MEM_TYPE_ALIAS: /* We mark the source allocs as unmapped from the GPU when * putting reg's allocs */ break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - struct kbase_alloc_import_user_buf *user_buf = ®->gpu_alloc->imported.user_buf; + struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf; if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT; /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, reg, + kbase_jd_user_buf_unmap(kctx, alloc, reg, (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); } @@ -3422,7 +3422,7 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, } /** - * Acquire the per-context region list lock + * kbase_gpu_vm_lock() - Acquire the per-context region list lock * @kctx: KBase context */ void kbase_gpu_vm_lock(struct kbase_context *kctx) @@ -3434,7 +3434,7 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); /** - * Release the per-context region list lock + * kbase_gpu_vm_unlock() - Release the per-context region list lock * @kctx: KBase context */ void kbase_gpu_vm_unlock(struct kbase_context *kctx) @@ -3672,12 +3672,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) /* prevent unprivileged use of debug file system * in old kernel version */ -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) - /* only for newer kernel version debug file system is safe */ const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif /* Caller already ensures this, but we keep the pattern for * maintenance safety. @@ -3766,6 +3761,7 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); #if MALI_USE_CSF + spin_lock_init(&kctx->csf.kcpu_queues.jit_lock); INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); #else /* !MALI_USE_CSF */ @@ -4203,9 +4199,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { -#if MALI_USE_CSF - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); -#else +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); #endif @@ -4298,9 +4292,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; -#if MALI_USE_CSF - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); -#else +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); #endif @@ -4813,18 +4805,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE - pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, -#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ -KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE - write ? FOLL_WRITE : 0, pages, NULL); -#else - write, 0, pages, NULL); -#endif -#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE - pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - write, 0, pages, NULL); -#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE +#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, write ? FOLL_WRITE : 0, pages, NULL); #elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE @@ -4860,11 +4841,11 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc; struct page **pages; struct tagged_addr *pa; - long i; + long i, dma_mapped_pages; unsigned long address; struct device *dev; - unsigned long offset; - unsigned long local_size; + unsigned long offset_within_page; + unsigned long remaining_size; unsigned long gwt_mask = ~0; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -4884,17 +4865,16 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; + offset_within_page = address & ~PAGE_MASK; + remaining_size = alloc->imported.user_buf.size; for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, + unsigned long map_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = dma_map_page(dev, pages[i], + offset_within_page, map_size, DMA_BIDIRECTIONAL); + err = dma_mapping_error(dev, dma_addr); if (err) goto unwind; @@ -4902,8 +4882,8 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + remaining_size -= map_size; + offset_within_page = 0; } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4921,10 +4901,19 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, /* fall down */ unwind: alloc->nents = 0; - while (i--) { + offset_within_page = address & ~PAGE_MASK; + remaining_size = alloc->imported.user_buf.size; + dma_mapped_pages = i; + /* Run the unmap loop in the same order as map loop */ + for (i = 0; i < dma_mapped_pages; i++) { + unsigned long unmap_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + unmap_size, DMA_BIDIRECTIONAL); + remaining_size -= unmap_size; + offset_within_page = 0; } /* The user buffer could already have been previously pinned before @@ -4950,7 +4939,8 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem { long i; struct page **pages; - unsigned long size = alloc->imported.user_buf.size; + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; + unsigned long remaining_size = alloc->imported.user_buf.size; lockdep_assert_held(&kctx->reg_lock); @@ -4964,11 +4954,11 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem #endif for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; + unsigned long unmap_size = + MIN(remaining_size, PAGE_SIZE - offset_within_page); dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size, DMA_BIDIRECTIONAL); if (writeable) set_page_dirty_lock(pages[i]); @@ -4977,7 +4967,8 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem pages[i] = NULL; #endif - size -= local_size; + remaining_size -= unmap_size; + offset_within_page = 0; } #if !MALI_USE_CSF alloc->nents = 0; @@ -5089,6 +5080,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r if (!kbase_is_region_invalid_or_free(reg)) { kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), kctx->as_nr); } diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 1c7169b..2013d38 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -1735,8 +1735,8 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, unsigned int flags); /** - * jit_trim_necessary_pages() - calculate and trim the least pages possible to - * satisfy a new JIT allocation + * kbase_jit_trim_necessary_pages() - calculate and trim the least pages + * possible to satisfy a new JIT allocation * * @kctx: Pointer to the kbase context * @needed_pages: Number of JIT physical pages by which trimming is requested. @@ -1983,7 +1983,7 @@ static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) } /** - * kbase_mem_pool_lock - Release a memory pool + * kbase_mem_pool_unlock - Release a memory pool * @pool: Memory pool to lock */ static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 327b7dc..c0ee10c 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -31,13 +31,11 @@ #include <linux/fs.h> #include <linux/version.h> #include <linux/dma-mapping.h> -#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) -#include <linux/dma-attrs.h> -#endif /* LINUX_VERSION_CODE < 4.8.0 */ #include <linux/dma-buf.h> #include <linux/shrinker.h> #include <linux/cache.h> #include <linux/memory_group_manager.h> +#include <linux/math64.h> #include <mali_kbase.h> #include <mali_kbase_mem_linux.h> @@ -84,10 +82,8 @@ #define IR_THRESHOLD_STEPS (256u) #if MALI_USE_CSF -static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, - struct vm_area_struct *vma); -static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, - struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma); #endif static int kbase_vmap_phy_pages(struct kbase_context *kctx, @@ -115,6 +111,7 @@ static bool is_process_exiting(struct vm_area_struct *vma) */ if (atomic_read(&vma->vm_mm->mm_users)) return false; + return true; } @@ -1120,19 +1117,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, ret = 0; } #else - /* Though the below version check could be superfluous depending upon the version condition - * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow - * ease of modification for non-ION systems or systems where ION has been patched. - */ -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) - dma_buf_end_cpu_access(dma_buf, - 0, dma_buf->size, - dir); - ret = 0; -#else - ret = dma_buf_end_cpu_access(dma_buf, - dir); -#endif + ret = dma_buf_end_cpu_access(dma_buf, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; case KBASE_SYNC_TO_CPU: @@ -1149,11 +1134,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, ret = 0; } #else - ret = dma_buf_begin_cpu_access(dma_buf, -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) - 0, dma_buf->size, -#endif - dir); + ret = dma_buf_begin_cpu_access(dma_buf, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; } @@ -1329,11 +1310,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, return 0; bad_pad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - alloc->nents, - kctx->as_nr); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + alloc->nents, kctx->as_nr); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1361,11 +1339,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { int err; - err = kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - reg->nr_pages, - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, kctx->as_nr); WARN_ON(err); } @@ -1558,13 +1533,15 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct kbase_context *kctx, unsigned long address, unsigned long size, u64 *va_pages, u64 *flags) { - long i; + long i, dma_mapped_pages; struct kbase_va_region *reg; struct rb_root *rbtree; long faulted_pages; int zone = KBASE_REG_ZONE_CUSTOM_VA; bool shared_zone = false; u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); + unsigned long offset_within_page; + unsigned long remaining_size; struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; int write; @@ -1683,18 +1660,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE - faulted_pages = get_user_pages(current, current->mm, address, *va_pages, -#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ -KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE - write ? FOLL_WRITE : 0, pages, NULL); -#else - write, 0, pages, NULL); -#endif -#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE - faulted_pages = get_user_pages(address, *va_pages, - write, 0, pages, NULL); -#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE +#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE faulted_pages = get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); #else @@ -1727,29 +1693,27 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE if (pages) { struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + offset_within_page = user_buf->address & ~PAGE_MASK; + remaining_size = user_buf->size; for (i = 0; i < faulted_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; + unsigned long map_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = dma_map_page(dev, pages[i], + offset_within_page, map_size, DMA_BIDIRECTIONAL); - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + remaining_size -= map_size; + offset_within_page = 0; } reg->gpu_alloc->nents = faulted_pages; @@ -1758,10 +1722,19 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE return reg; unwind_dma_map: - while (i--) { + offset_within_page = user_buf->address & ~PAGE_MASK; + remaining_size = user_buf->size; + dma_mapped_pages = i; + /* Run the unmap loop in the same order as map loop */ + for (i = 0; i < dma_mapped_pages; i++) { + unsigned long unmap_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_unmap_page(kctx->kbdev->dev, user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + unmap_size, DMA_BIDIRECTIONAL); + remaining_size -= unmap_size; + offset_within_page = 0; } fault_mismatch: if (pages) { @@ -1793,6 +1766,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 gpu_va; size_t i; bool coherent; + uint64_t max_stride; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -1825,7 +1799,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, if (!nents) goto bad_nents; - if (stride > U64_MAX / nents) + max_stride = div64_u64(U64_MAX, nents); + + if (stride > max_stride) goto bad_size; if ((nents * stride) > (U64_MAX / PAGE_SIZE)) @@ -2217,10 +2193,11 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, u64 const new_pages, u64 const old_pages) { u64 delta = old_pages - new_pages; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; int ret = 0; - ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + new_pages, delta, kctx->as_nr); + ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, + alloc->pages + new_pages, delta, kctx->as_nr); return ret; } @@ -3434,13 +3411,6 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) /* Always map the doorbell page as uncached */ doorbell_pgprot = pgprot_device(vma->vm_page_prot); -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) - vma->vm_page_prot = doorbell_pgprot; - input_page_pgprot = doorbell_pgprot; - output_page_pgprot = doorbell_pgprot; -#else if (kbdev->system_coherency == COHERENCY_NONE) { input_page_pgprot = pgprot_writecombine(vma->vm_page_prot); output_page_pgprot = pgprot_writecombine(vma->vm_page_prot); @@ -3448,7 +3418,6 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) input_page_pgprot = vma->vm_page_prot; output_page_pgprot = vma->vm_page_prot; } -#endif doorbell_cpu_addr = vma->vm_start; @@ -3572,13 +3541,71 @@ map_failed: return err; } +/** + * kbase_csf_user_reg_vm_open - VMA open function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + * Note: + * This function isn't expected to be called. If called (i.e> mremap), + * set private_data as NULL to indicate to close() and fault() functions. + */ +static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma) +{ + pr_debug("Unexpected call to the open method for USER register mapping"); + vma->vm_private_data = NULL; +} + +/** + * kbase_csf_user_reg_vm_close - VMA close function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + */ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; - WARN_ON(!kctx->csf.user_reg_vma); + if (!kctx) { + pr_debug("Close function called for the unexpected mapping"); + return; + } + + if (unlikely(!kctx->csf.user_reg_vma)) + dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL"); kctx->csf.user_reg_vma = NULL; + + mutex_lock(&kctx->kbdev->csf.reg_lock); + if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0)) + dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter"); + else + kctx->kbdev->csf.nr_user_page_mapped--; + mutex_unlock(&kctx->kbdev->csf.reg_lock); +} + +/** + * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + * + * Return: -EINVAL + * + * Note: + * User space must not attempt mremap on USER page mapping. + * This function will return an error to fail the attempt. + */ +static int +#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) +kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma) +#else +kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags) +#endif +{ + pr_debug("Unexpected call to mremap method for USER page mapping vma\n"); + return -EINVAL; } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) @@ -3591,19 +3618,24 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; #endif struct kbase_context *kctx = vma->vm_private_data; - struct kbase_device *kbdev = kctx->kbdev; - struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; - unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); + struct kbase_device *kbdev; + struct memory_group_manager_device *mgm_dev; + unsigned long pfn; size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); vm_fault_t ret = VM_FAULT_SIGBUS; unsigned long flags; /* Few sanity checks up front */ - if (WARN_ON(nr_pages != 1) || - WARN_ON(vma != kctx->csf.user_reg_vma) || - WARN_ON(vma->vm_pgoff != - PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) + if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) || + (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) { + pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", + current->comm, current->tgid, current->pid); return VM_FAULT_SIGBUS; + } + + kbdev = kctx->kbdev; + mgm_dev = kbdev->mgm_dev; + pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); mutex_lock(&kbdev->csf.reg_lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3628,14 +3660,31 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) } static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { + .open = kbase_csf_user_reg_vm_open, .close = kbase_csf_user_reg_vm_close, + .mremap = kbase_csf_user_reg_vm_mremap, .fault = kbase_csf_user_reg_vm_fault }; +/** + * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page. + * + * @kctx: Pointer of the kernel context. + * @vma: Pointer to the struct containing the information about + * the userspace mapping of USER page. + * + * Return: 0 on success, error code otherwise. + * + * Note: + * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf. + * But this function needs to be kept for backward-compatibility as old Base (<=1.12) + * will try to mmap USER page for direct access when it creates a base context. + */ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma) { size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + struct kbase_device *kbdev = kctx->kbdev; /* Few sanity checks */ if (kctx->csf.user_reg_vma) @@ -3659,6 +3708,17 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, kctx->csf.user_reg_vma = vma; + mutex_lock(&kbdev->csf.reg_lock); + kbdev->csf.nr_user_page_mapped++; + + if (!kbdev->csf.mali_file_inode) + kbdev->csf.mali_file_inode = kctx->filp->f_inode; + + if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode)) + dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts"); + + mutex_unlock(&kbdev->csf.reg_lock); + vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h index 1f6877a..5e5d991 100644 --- a/mali_kbase/mali_kbase_mem_linux.h +++ b/mali_kbase/mali_kbase_mem_linux.h @@ -439,18 +439,7 @@ u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { - int err; - -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) - if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - err = vm_insert_pfn(vma, addr, pfn); -#else - err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); -#endif + int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); if (unlikely(err == -ENOMEM)) return VM_FAULT_OOM; diff --git a/mali_kbase/mali_kbase_mem_pool_debugfs.c b/mali_kbase/mali_kbase_mem_pool_debugfs.c index cfb43b0..3b1b2ba 100644 --- a/mali_kbase/mali_kbase_mem_pool_debugfs.c +++ b/mali_kbase/mali_kbase_mem_pool_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -168,13 +168,7 @@ static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { void kbase_mem_pool_debugfs_init(struct dentry *parent, struct kbase_context *kctx) { - /* prevent unprivileged use of debug file in old kernel version */ -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) - /* only for newer kernel version debug file system is safe */ const mode_t mode = 0644; -#else - const mode_t mode = 0600; -#endif debugfs_create_file("mem_pool_size", mode, parent, &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); diff --git a/mali_kbase/mali_kbase_mem_pool_group.h b/mali_kbase/mali_kbase_mem_pool_group.h index c50ffdb..f97f47d 100644 --- a/mali_kbase/mali_kbase_mem_pool_group.h +++ b/mali_kbase/mali_kbase_mem_pool_group.h @@ -49,8 +49,8 @@ static inline struct kbase_mem_pool *kbase_mem_pool_group_select( } /** - * kbase_mem_pool_group_config_init - Set the initial configuration for a - * set of memory pools + * kbase_mem_pool_group_config_set_max_size - Set the initial configuration for + * a set of memory pools * * @configs: Initial configuration for the set of memory pools * @max_size: Maximum number of free 4 KiB pages each pool can hold @@ -92,7 +92,7 @@ int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, struct kbase_mem_pool_group *next_pools); /** - * kbase_mem_pool_group_term - Mark a set of memory pools as dying + * kbase_mem_pool_group_mark_dying - Mark a set of memory pools as dying * * @mem_pools: Set of memory pools to mark * diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs.c b/mali_kbase/mali_kbase_mem_profile_debugfs.c index 92ab1b8..9317023 100644 --- a/mali_kbase/mali_kbase_mem_profile_debugfs.c +++ b/mali_kbase/mali_kbase_mem_profile_debugfs.c @@ -69,11 +69,7 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = { int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, size_t size) { -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif int err = 0; mutex_lock(&kctx->mem_profile_lock); diff --git a/mali_kbase/mali_kbase_pbha_debugfs.c b/mali_kbase/mali_kbase_pbha_debugfs.c index 47eab63..4130dd6 100644 --- a/mali_kbase/mali_kbase_pbha_debugfs.c +++ b/mali_kbase/mali_kbase_pbha_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -120,14 +120,10 @@ static const struct file_operations pbha_int_id_overrides_fops = { void kbase_pbha_debugfs_init(struct kbase_device *kbdev) { if (kbasep_pbha_supported(kbdev)) { -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) - /* only for newer kernel version debug file system is safe */ const mode_t mode = 0644; -#else - const mode_t mode = 0600; -#endif struct dentry *debugfs_pbha_dir = debugfs_create_dir( "pbha", kbdev->mali_debugfs_directory); + if (IS_ERR_OR_NULL(debugfs_pbha_dir)) { dev_err(kbdev->dev, "Couldn't create mali debugfs page-based hardware attributes directory\n"); diff --git a/mali_kbase/mali_kbase_pbha_debugfs.h b/mali_kbase/mali_kbase_pbha_debugfs.h index 3f477b4..508ecdf 100644 --- a/mali_kbase/mali_kbase_pbha_debugfs.h +++ b/mali_kbase/mali_kbase_pbha_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ #include <mali_kbase.h> /** - * kbasep_pbha_debugfs_init - Initialize pbha debugfs directory + * kbase_pbha_debugfs_init - Initialize pbha debugfs directory * * @kbdev: Device pointer */ diff --git a/mali_kbase/mali_kbase_platform_fake.c b/mali_kbase/mali_kbase_platform_fake.c index bf525ed..761a636 100644 --- a/mali_kbase/mali_kbase_platform_fake.c +++ b/mali_kbase/mali_kbase_platform_fake.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,7 +39,8 @@ static struct platform_device *mali_device; #ifndef CONFIG_OF /** - * Convert data in struct kbase_io_resources struct to Linux-specific resources + * kbasep_config_parse_io_resources - Convert data in struct kbase_io_resources + * struct to Linux-specific resources * @io_resources: Input IO resource data * @linux_resources: Pointer to output array of Linux resource structures * diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index de2422c..1545f3e 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -462,11 +462,12 @@ static enum hrtimer_restart kbase_pm_apc_timer_callback(struct hrtimer *timer) int kbase_pm_apc_init(struct kbase_device *kbdev) { - kthread_init_worker(&kbdev->apc.worker); - kbdev->apc.thread = kbase_create_realtime_thread(kbdev, + int ret; + + ret = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &kbdev->apc.worker, "mali_apc_thread"); - if (IS_ERR(kbdev->apc.thread)) - return PTR_ERR(kbdev->apc.thread); + if (ret) + return ret; /* * We initialize power off and power on work on init as they will each @@ -486,6 +487,5 @@ int kbase_pm_apc_init(struct kbase_device *kbdev) void kbase_pm_apc_term(struct kbase_device *kbdev) { hrtimer_cancel(&kbdev->apc.timer); - kthread_flush_worker(&kbdev->apc.worker); - kthread_stop(kbdev->apc.thread); + kbase_destroy_kworker_stack(&kbdev->apc.worker); } diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.c b/mali_kbase/mali_kbase_regs_history_debugfs.c index f8dec6b..c19b4a3 100644 --- a/mali_kbase/mali_kbase_regs_history_debugfs.c +++ b/mali_kbase/mali_kbase_regs_history_debugfs.c @@ -25,6 +25,7 @@ #if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) #include <linux/debugfs.h> +#include <linux/version_compat_defs.h> /** * kbase_io_history_resize - resize the register access history buffer. @@ -158,11 +159,8 @@ static int regs_history_size_set(void *data, u64 val) return kbase_io_history_resize(h, (u16)val); } - -DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, - regs_history_size_get, - regs_history_size_set, - "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(regs_history_size_fops, regs_history_size_get, regs_history_size_set, + "%llu\n"); /** * regs_history_show - show callback for the register access history file. diff --git a/mali_kbase/mali_kbase_smc.h b/mali_kbase/mali_kbase_smc.h index 91eb9ee..40a3483 100644 --- a/mali_kbase/mali_kbase_smc.h +++ b/mali_kbase/mali_kbase_smc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,7 +49,7 @@ u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); /** - * kbase_invoke_smc_fid - Perform a secure monitor call + * kbase_invoke_smc - Perform a secure monitor call * @oen: Owning Entity number (SIP, STD etc). * @function_number: The function number within the OEN. * @smc64: use SMC64 calling convention instead of SMC32. diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index ae3b9ad..665bc09 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -75,7 +75,7 @@ static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) /* Record the start time of this atom so we could cancel it at * the right time. */ - katom->start_timestamp = ktime_get(); + katom->start_timestamp = ktime_get_raw(); /* Add the atom to the waiting list before the timer is * (re)started to make sure that it gets processed. @@ -215,7 +215,7 @@ void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) rt_mutex_lock(&kctx->jctx.lock); kbasep_remove_waiting_soft_job(katom); kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, true)) + if (kbase_jd_done_nolock(katom, true)) kbase_js_sched_all(kctx->kbdev); rt_mutex_unlock(&kctx->jctx.lock); } @@ -229,7 +229,7 @@ static void kbasep_soft_event_complete_job(struct kthread_work *work) int resched; rt_mutex_lock(&kctx->jctx.lock); - resched = jd_done_nolock(katom, true); + resched = kbase_jd_done_nolock(katom, true); rt_mutex_unlock(&kctx->jctx.lock); if (resched) @@ -390,7 +390,7 @@ void kbasep_soft_job_timeout_worker(struct timer_list *timer) soft_job_timeout); u32 timeout_ms = (u32)atomic_read( &kctx->kbdev->js_data.soft_job_timeout_ms); - ktime_t cur_time = ktime_get(); + ktime_t cur_time = ktime_get_raw(); bool restarting = false; unsigned long lflags; struct list_head *entry, *tmp; @@ -500,7 +500,7 @@ out: static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) { katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (jd_done_nolock(katom, true)) + if (kbase_jd_done_nolock(katom, true)) kbase_js_sched_all(katom->kctx->kbdev); } @@ -812,11 +812,7 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, dma_to_copy = min(dma_buf->size, (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); - ret = dma_buf_begin_cpu_access(dma_buf, -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) - 0, dma_to_copy, -#endif - DMA_FROM_DEVICE); + ret = dma_buf_begin_cpu_access(dma_buf, DMA_FROM_DEVICE); if (ret) goto out_unlock; @@ -843,11 +839,7 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, break; } } - dma_buf_end_cpu_access(dma_buf, -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) - 0, dma_to_copy, -#endif - DMA_FROM_DEVICE); + dma_buf_end_cpu_access(dma_buf, DMA_FROM_DEVICE); break; } default: @@ -1357,7 +1349,7 @@ static void kbasep_jit_finish_worker(struct kthread_work *work) rt_mutex_lock(&kctx->jctx.lock); kbase_finish_soft_job(katom); - resched = jd_done_nolock(katom, true); + resched = kbase_jd_done_nolock(katom, true); rt_mutex_unlock(&kctx->jctx.lock); if (resched) @@ -1798,7 +1790,7 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); - resched |= jd_done_nolock(katom_iter, true); + resched |= kbase_jd_done_nolock(katom_iter, true); #ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_dec(&kbdev->pm.gpu_users_waiting); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/mali_kbase/mali_kbase_sync_android.c b/mali_kbase/mali_kbase_sync_android.c index fa17877..ae6e669 100644 --- a/mali_kbase/mali_kbase_sync_android.c +++ b/mali_kbase/mali_kbase_sync_android.c @@ -441,7 +441,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) kbasep_remove_waiting_soft_job(katom); kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, true)) + if (kbase_jd_done_nolock(katom, true)) kbase_js_sched_all(katom->kctx->kbdev); } diff --git a/mali_kbase/mali_kbase_sync_file.c b/mali_kbase/mali_kbase_sync_file.c index 1462a6b..649a862 100644 --- a/mali_kbase/mali_kbase_sync_file.c +++ b/mali_kbase/mali_kbase_sync_file.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -251,7 +251,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) kbasep_remove_waiting_soft_job(katom); kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, true)) + if (kbase_jd_done_nolock(katom, true)) kbase_js_sched_all(katom->kctx->kbdev); } @@ -298,10 +298,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, info->status = 0; /* still active (unsignaled) */ } -#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) - scnprintf(info->name, sizeof(info->name), "%u#%u", - fence->context, fence->seqno); -#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) scnprintf(info->name, sizeof(info->name), "%llu#%u", fence->context, fence->seqno); #else diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index d7a6c98..e9f843b 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -38,6 +38,7 @@ #include <linux/mutex.h> #include <linux/poll.h> #include <linux/slab.h> +#include <linux/version_compat_defs.h> #include <linux/workqueue.h> /* Hwcnt reader API version */ @@ -113,9 +114,7 @@ struct kbase_vinstr_client { wait_queue_head_t waitq; }; -static unsigned int kbasep_vinstr_hwcnt_reader_poll( - struct file *filp, - poll_table *wait); +static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait); static long kbasep_vinstr_hwcnt_reader_ioctl( struct file *filp, @@ -517,8 +516,6 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx) if (!vctx) return; - cancel_work_sync(&vctx->dump_work); - /* Non-zero client count implies client leak */ if (WARN_ON(vctx->client_count != 0)) { struct kbase_vinstr_client *pos, *n; @@ -530,6 +527,7 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx) } } + cancel_work_sync(&vctx->dump_work); kbase_hwcnt_gpu_metadata_narrow_destroy(vctx->metadata_user); WARN_ON(vctx->client_count != 0); @@ -1039,18 +1037,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl( * Return: POLLIN if data can be read without blocking, 0 if data can not be * read without blocking, else error code. */ -static unsigned int kbasep_vinstr_hwcnt_reader_poll( - struct file *filp, - poll_table *wait) +static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait) { struct kbase_vinstr_client *cli; if (!filp || !wait) - return -EINVAL; + return (__poll_t)-EINVAL; cli = filp->private_data; if (!cli) - return -EINVAL; + return (__poll_t)-EINVAL; poll_wait(filp, &cli->waitq, wait); if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h index fc8dcbc..d25c29f 100644 --- a/mali_kbase/mali_malisw.h +++ b/mali_kbase/mali_malisw.h @@ -19,7 +19,7 @@ * */ -/** +/* * Kernel-wide include for common macros and types. */ diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index c9ba3fc..04f5cdf 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -152,8 +152,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at VA 0x%016llX\n" - "VA_VALID: %s\n" + "GPU bus fault in AS%d at PA 0x%016llX\n" + "PA_VALID: %s\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index fad5554..3130b33 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,7 +66,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at VA 0x%016llX\n" + "GPU bus fault in AS%d at PA 0x%016llX\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "exception data 0x%X\n" diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index fbdb7a9..c98d830 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -49,8 +49,25 @@ #include <mali_kbase_trace_gpu_mem.h> #include <backend/gpu/mali_kbase_pm_internal.h> +/* Threshold used to decide whether to flush full caches or just a physical range */ +#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20 #define MGM_DEFAULT_PTE_GROUP (0) +/* Macro to convert updated PDGs to flags indicating levels skip in flush */ +#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) + +/* Small wrapper function to factor out GPU-dependent context releasing */ +static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +#if MALI_USE_CSF + CSTD_UNUSED(kbdev); + kbase_ctx_sched_release_ctx_lock(kctx); +#else /* MALI_USE_CSF */ + kbasep_js_runpool_release_ctx(kbdev, kctx); +#endif /* MALI_USE_CSF */ +} + static void mmu_hw_operation_begin(struct kbase_device *kbdev) { #if !IS_ENABLED(CONFIG_MALI_NO_MALI) @@ -110,94 +127,66 @@ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) return arch_maj_cur > 11; } -/* Small wrapper function to factor out GPU-dependent context releasing */ -static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -{ -#if MALI_USE_CSF - CSTD_UNUSED(kbdev); - kbase_ctx_sched_release_ctx_lock(kctx); -#else /* MALI_USE_CSF */ - kbasep_js_runpool_release_ctx(kbdev, kctx); -#endif /* MALI_USE_CSF */ -} - /** - * mmu_flush_invalidate_on_gpu_ctrl() - Flush and invalidate the GPU caches - * through GPU_CONTROL interface. - * @kbdev: kbase device to issue the MMU operation on. - * @as: address space to issue the MMU operation on. - * @op_param: parameters for the operation. - * - * This wrapper function alternates AS_COMMAND_FLUSH_PT and AS_COMMAND_FLUSH_MEM - * to equivalent GPU_CONTROL command FLUSH_CACHES. - * The function first issue LOCK to MMU-AS with kbase_mmu_hw_do_operation(). - * And issues cache-flush with kbase_gpu_cache_flush_and_busy_wait() function - * then issue UNLOCK to MMU-AS with kbase_mmu_hw_do_operation(). + * mmu_invalidate() - Perform an invalidate operation on MMU caches. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. * - * Return: Zero if the operation was successful, non-zero otherwise. + * Perform an MMU invalidate operation on a particual address space + * by issuing a UNLOCK command. */ -static int -mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, - struct kbase_as *as, - struct kbase_mmu_hw_op_param *op_param) +static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, + const struct kbase_mmu_hw_op_param *op_param) { - u32 flush_op; - int ret, ret2; - - if (WARN_ON(kbdev == NULL) || - WARN_ON(as == NULL) || - WARN_ON(op_param == NULL)) - return -EINVAL; + int err = 0; + unsigned long flags; - lockdep_assert_held(&kbdev->hwaccess_lock); - lockdep_assert_held(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - /* Translate operation to command */ - if (op_param->op == KBASE_MMU_OP_FLUSH_PT) { - flush_op = GPU_COMMAND_CACHE_CLN_INV_L2; - } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { - flush_op = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; - } else { - dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", - op_param->op); - return -EINVAL; + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + as_nr = kctx ? kctx->as_nr : as_nr; + err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param); } - /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ - op_param->op = KBASE_MMU_OP_LOCK; - ret = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param); - if (ret) - return ret; - - /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ - ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, flush_op); - - /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ - op_param->op = KBASE_MMU_OP_UNLOCK; - ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); + if (err) { + dev_err(kbdev->dev, + "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover"); + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } - return ret ?: ret2; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +/* Perform a flush/invalidate on a particular address space + */ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { int err = 0; unsigned long flags; + /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbdev->pm.backend.gpu_powered) err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param); + if (err) { /* Flush failed to complete, assume the GPU has hung and * perform a reset to recover. */ - dev_err(kbdev->dev, - "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu_locked(kbdev); + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); + + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ @@ -221,14 +210,19 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as * invalidating the TLBs. + * + * If operation is set to KBASE_MMU_OP_UNLOCK then this function will only + * invalidate the MMU caches and TLBs. */ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, const struct kbase_mmu_hw_op_param *op_param) { bool ctx_is_in_runpool; + /* Early out if there is nothing to do */ if (op_param->nr == 0) return; + /* If no context is provided then MMU operation is performed on address * space which does not belong to user space context. Otherwise, retain * refcount to context provided and release after flush operation. @@ -243,49 +237,67 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex #else ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); #endif /* !MALI_USE_CSF */ + if (ctx_is_in_runpool) { KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param); + release_ctx(kbdev, kctx); } } } /** - * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. - * - * @kbdev: Pointer to kbase device. - * @kctx: Pointer to kbase context. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @op_param: Non-NULL pointer to struct containing information about the flush - * operation to perform. + * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via + * the GPU_CONTROL interface + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which flush + invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. * - * This function will do one of three things: - * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the - * individual pages that were unmapped if feature is supported on GPU. - * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is - * supported on GPU or, - * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. + * Perform a flush/invalidate on a particular address space via the GPU_CONTROL + * interface. */ -static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, - struct kbase_context *kctx, int as_nr, - struct kbase_mmu_hw_op_param *op_param) +static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, + int as_nr, const struct kbase_mmu_hw_op_param *op_param) { - /* Full cache flush through the MMU_COMMAND */ - mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); + int err = 0; + unsigned long flags; + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + as_nr = kctx ? kctx->as_nr : as_nr; + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], + op_param); + } + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover. + */ + dev_err(kbdev->dev, + "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } -/** - * kbase_mmu_sync_pgd() - sync page directory to memory when needed. - * @kbdev: Device pointer. - * @handle: Address of DMA region. - * @size: Size of the region to sync. - * - * This should be called after each page directory update. - */ -static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, - dma_addr_t handle, size_t size) +static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, size_t size, + enum kbase_mmu_op_type flush_op) +{ +} + +static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { /* In non-coherent system, ensure the GPU can read * the pages from memory @@ -295,6 +307,34 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, DMA_TO_DEVICE); } +/** + * kbase_mmu_sync_pgd() - sync page directory to memory when needed. + * @kbdev: Device pointer. + * @kctx: Context pointer. + * @phys: Starting physical address of the destination region. + * @handle: Address of DMA region. + * @size: Size of the region to sync. + * @flush_op: MMU cache flush operation to perform on the physical address + * range, if GPU control is available. + * + * This function is called whenever the association between a virtual address + * range and a physical address range changes, because a mapping is created or + * destroyed. + * One of the effects of this operation is performing an MMU cache flush + * operation only on the physical address range affected by this function, if + * GPU control is available. + * + * This should be called after each page directory update. + */ +static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, dma_addr_t handle, size_t size, + enum kbase_mmu_op_type flush_op) +{ + + kbase_mmu_sync_pgd_cpu(kbdev, handle, size); + kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op); +} + /* * Definitions: * - PGD: Page Directory. @@ -305,8 +345,8 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, */ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id); + struct tagged_addr *phys, size_t nr, unsigned long flags, + int group_id, u64 *dirty_pgds); /** * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and @@ -317,14 +357,15 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, * @pgds: Physical addresses of page directories to be freed. * @vpfn: The virtual page frame number. * @level: The level of MMU page table. + * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. * @free_pgds_list: Linked list of the page directory pages to free. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, - u64 vpfn, int level, u64 *dirty_pgds, + u64 vpfn, int level, + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, struct list_head *free_pgds_list); - /** * kbase_mmu_free_pgd() - Free memory of the page directory * @@ -470,8 +511,10 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, struct kbase_as *faulting_as, u64 start_pfn, size_t nr, - u32 kctx_id) + u32 kctx_id, u64 dirty_pgds) { + int err; + /* Calls to this function are inherently synchronous, with respect to * MMU operations. */ @@ -484,22 +527,23 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, KBASE_MMU_FAULT_TYPE_PAGE); /* flush L2 and unlock the VA (resumes the MMU) */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = start_pfn, - .nr = nr, - .op = KBASE_MMU_OP_FLUSH_PT, - .kctx_id = kctx_id, - .mmu_sync_info = mmu_sync_info, - }; + op_param.vpfn = start_pfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.kctx_id = kctx_id; + op_param.mmu_sync_info = mmu_sync_info; if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { unsigned long irq_flags; spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, &op_param); + op_param.flush_skip_levels = + pgd_level_to_skip_flush(dirty_pgds); + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, + &op_param); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } else { mmu_hw_operation_begin(kbdev); - kbase_mmu_hw_do_flush_locked(kbdev, faulting_as, &op_param); + err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } @@ -539,6 +583,7 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, u64 fault_pfn, pfn_offset; int ret; int as_no; + u64 dirty_pgds = 0; as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); @@ -597,12 +642,11 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, } /* Now make this faulting page writable to GPU. */ - ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, - fault_phys_addr, - 1, region->flags, region->gpu_alloc->group_id); + ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, + region->gpu_alloc->group_id, &dirty_pgds); kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, - kctx->id); + kctx->id, dirty_pgds); kbase_gpu_vm_unlock(kctx); } @@ -837,7 +881,6 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return true; } - void kbase_mmu_page_fault_worker(struct work_struct *data) { u64 fault_pfn; @@ -1052,16 +1095,29 @@ page_fault_retry: * transaction (which should cause the other page fault to be * raised again). */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = 0, - .nr = 0, - .op = KBASE_MMU_OP_UNLOCK, - .kctx_id = kctx->id, - .mmu_sync_info = mmu_sync_info, - }; - mmu_hw_operation_begin(kbdev); - kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, &op_param); - mmu_hw_operation_end(kbdev); + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = kctx->id; + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } else { + /* Can safely skip the invalidate for all levels in case + * of duplicate page faults. + */ + op_param.flush_skip_levels = 0xF; + op_param.vpfn = fault_pfn; + op_param.nr = 1; + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -1089,16 +1145,29 @@ page_fault_retry: KBASE_MMU_FAULT_TYPE_PAGE); /* See comment [1] about UNLOCK usage */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = 0, - .nr = 0, - .op = KBASE_MMU_OP_UNLOCK, - .kctx_id = kctx->id, - .mmu_sync_info = mmu_sync_info, - }; - mmu_hw_operation_begin(kbdev); - kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, &op_param); - mmu_hw_operation_end(kbdev); + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = kctx->id; + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } else { + /* Can safely skip the invalidate for all levels in case + * of duplicate page faults. + */ + op_param.flush_skip_levels = 0xF; + op_param.vpfn = fault_pfn; + op_param.nr = 1; + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -1164,7 +1233,7 @@ page_fault_retry: (u64)new_pages); trace_mali_mmu_page_fault_grow(region, fault, new_pages); -#if MALI_INCREMENTAL_RENDERING +#if MALI_INCREMENTAL_RENDERING_JM /* Switch to incremental rendering if we have nearly run out of * memory in a JIT memory allocation. */ @@ -1200,24 +1269,22 @@ page_fault_retry: kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); - /* flush L2 and unlock the VA (resumes the MMU) */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = fault->addr >> PAGE_SHIFT, - .nr = new_pages, - .op = KBASE_MMU_OP_FLUSH_PT, - .kctx_id = kctx->id, - .mmu_sync_info = mmu_sync_info, - }; + op_param.vpfn = region->start_pfn + pfn_offset; + op_param.nr = new_pages; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { - unsigned long irq_flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, - &op_param); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + /* Unlock to invalidate the TLB (and resume the MMU) */ + op_param.flush_skip_levels = + pgd_level_to_skip_flush(dirty_pgds); + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); } else { + /* flush L2 and unlock the VA (resumes the MMU) */ mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_flush_locked(kbdev, faulting_as, &op_param); + err = kbase_mmu_hw_do_flush(kbdev, faulting_as, + &op_param); mmu_hw_operation_end(kbdev); } @@ -1335,15 +1402,18 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, { u64 *page; struct page *p; + phys_addr_t pgd; p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); if (!p) - return 0; + return KBASE_MMU_INVALID_PGD_ADDRESS; page = kmap(p); if (page == NULL) goto alloc_free; + pgd = page_to_phys(p); + /* If the MMU tables belong to a context then account the memory usage * to that context, otherwise the MMU tables are device wide and are * only accounted to the device. @@ -1366,23 +1436,26 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); - kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); + /* As this page is newly created, therefore there is no content to + * clean or invalidate in the GPU caches. + */ + kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap(p); - return page_to_phys(p); + return pgd; alloc_free: kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); - return 0; + return KBASE_MMU_INVALID_PGD_ADDRESS; } /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the * new table from the pool if needed and possible */ -static int mmu_get_next_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - phys_addr_t *pgd, u64 vpfn, int level) +static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd, + u64 *dirty_pgds) { u64 *page; phys_addr_t target_pgd; @@ -1406,15 +1479,13 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, return -EINVAL; } - target_pgd = kbdev->mmu_mode->pte_to_phy_addr( - kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); - - if (!target_pgd) { + if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; unsigned int current_valid_entries; u64 managed_pte; + target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - if (!target_pgd) { + if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", __func__); kunmap(p); @@ -1427,8 +1498,30 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1); - kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); /* Rely on the caller to update the address space flags. */ + if (newly_created_pgd && !*newly_created_pgd) { + *newly_created_pgd = true; + /* If code reaches here we know parent PGD of target PGD was + * not newly created and should be flushed. + */ + flush_op = KBASE_MMU_OP_FLUSH_PT; + + if (dirty_pgds) + *dirty_pgds |= 1ULL << level; + } + + /* A new valid entry is added to an existing PGD. Perform the + * invalidate operation for GPU cache as it could be having a + * cacheline that contains the entry (in an invalid form). + */ + kbase_mmu_sync_pgd(kbdev, mmut->kctx, + *pgd + (vpfn * sizeof(u64)), + kbase_dma_addr(p) + (vpfn * sizeof(u64)), + sizeof(u64), flush_op); + } else { + target_pgd = kbdev->mmu_mode->pte_to_phy_addr( + kbdev->mgm_dev->ops.mgm_pte_to_original_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); } kunmap(p); @@ -1440,11 +1533,9 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, /* * Returns the PGD for the specified level of translation */ -static int mmu_get_pgd_at_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - int level, - phys_addr_t *out_pgd) +static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + int level, phys_addr_t *out_pgd, bool *newly_created_pgd, + u64 *dirty_pgds) { phys_addr_t pgd; int l; @@ -1453,7 +1544,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, pgd = mmut->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + int err = + mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); /* Handle failure condition */ if (err) { dev_dbg(kbdev->dev, @@ -1468,13 +1560,11 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, return 0; } -static int mmu_get_bottom_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - phys_addr_t *out_pgd) +static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds) { - return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, - out_pgd); + return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd, + newly_created_pgd, dirty_pgds); } static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, @@ -1538,6 +1628,9 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, goto next; } + if (dirty_pgds && pcount > 0) + *dirty_pgds |= 1ULL << level; + num_of_valid_entries = mmu_mode->get_num_valid_entries(page); if (WARN_ON_ONCE(num_of_valid_entries < pcount)) num_of_valid_entries = 0; @@ -1553,19 +1646,21 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, list_add(&p->lru, free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - dirty_pgds, free_pgds_list); - + KBASE_MMU_OP_NONE, dirty_pgds, + free_pgds_list); vpfn += count; continue; } mmu_mode->set_num_valid_entries(page, num_of_valid_entries); - - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(phys_to_page(pgd)) + sizeof(u64) * idx, - sizeof(u64) * pcount); - kunmap(phys_to_page(pgd)); + /* MMU cache flush strategy is NONE because GPU cache maintenance is + * going to be done by the caller + */ + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), + kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, + KBASE_MMU_OP_NONE); + kunmap(p); next: vpfn += count; } @@ -1584,6 +1679,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, op_param.op = KBASE_MMU_OP_FLUSH_PT; op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); #if MALI_USE_CSF as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; @@ -1591,7 +1687,18 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, WARN_ON(!mmut->kctx); #endif - mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + /* MMU cache flush strategy depends on whether GPU control commands for + * flushing physical address ranges are supported. The new physical pages + * are not present in GPU caches therefore they don't need any cache + * maintenance, but PGDs in the page table may or may not be created anew. + * + * Operations that affect the whole GPU cache shall only be done if it's + * impossible to update physical ranges. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + else + mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); } /* @@ -1613,6 +1720,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, size_t remain = nr; int err; struct kbase_device *kbdev; + enum kbase_mmu_op_type flush_op; u64 dirty_pgds = 0; LIST_HEAD(free_pgds_list); @@ -1636,6 +1744,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; register unsigned int num_of_valid_entries; + bool newly_created_pgd = false; if (count > remain) count = remain; @@ -1648,8 +1757,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, - vpfn, &pgd); + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd, + &dirty_pgds); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for @@ -1669,7 +1778,6 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, &dirty_pgds, &free_pgds_list); @@ -1712,9 +1820,21 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, vpfn += count; remain -= count; - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), - count * sizeof(u64)); + if (count > 0 && !newly_created_pgd) + dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL; + + /* MMU cache flush operation here will depend on whether bottom level + * PGD is newly created or not. + * + * If bottom level PGD is newly created then no cache maintenance is + * required as the PGD will not exist in GPU cache. Otherwise GPU cache + * maintenance is required for existing PGD. + */ + flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; + + kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64), + flush_op); kunmap(p); /* We have started modifying the page table. @@ -1737,7 +1857,6 @@ fail_unlock: mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, mmu_sync_info); kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); - return err; } @@ -1783,6 +1902,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu struct page *p; int cur_level; register unsigned int num_of_valid_entries; + enum kbase_mmu_op_type flush_op; + bool newly_created_pgd = false; if (count > remain) count = remain; @@ -1800,8 +1921,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, - cur_level, &pgd); + err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd, + &newly_created_pgd, dirty_pgds); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for @@ -1815,8 +1936,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu } while (!err); if (err) { - dev_warn(kbdev->dev, - "%s: mmu_get_bottom_pgd failure\n", __func__); + dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed @@ -1837,7 +1957,6 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, &free_pgds_list); @@ -1877,13 +1996,28 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + if (dirty_pgds && count > 0 && !newly_created_pgd) + *dirty_pgds |= 1ULL << cur_level; + phys += count; insert_vpfn += count; remain -= count; - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (vindex * sizeof(u64)), - count * sizeof(u64)); + /* For the most part, the creation of a new virtual memory mapping does + * not require cache flush operations, because the operation results + * into the creation of new memory pages which are not present in GPU + * caches. Therefore the defaul operation is NONE. + * + * However, it is quite common for the mapping to start and/or finish + * at an already existing PGD. Moreover, the PTEs modified are not + * necessarily aligned with GPU cache lines. Therefore, GPU cache + * maintenance is required for existing PGDs. + */ + flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; + + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), + kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), + flush_op); kunmap(p); } @@ -1916,9 +2050,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, u64 dirty_pgds = 0; LIST_HEAD(free_pgds_list); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds); - if (err) return err; @@ -1930,7 +2067,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); /** - * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches * without retaining the kbase context. * @kctx: The KBase context. * @vpfn: The virtual page frame number to start the flush on. @@ -1939,17 +2076,15 @@ KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any * other locking. */ -static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, - u64 vpfn, size_t nr) +static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) { struct kbase_device *kbdev = kctx->kbdev; - struct kbase_mmu_hw_op_param op_param; int err; - /* Calls to this function are inherently asynchronous, with respect to * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_mmu_hw_op_param op_param; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); @@ -1959,155 +2094,32 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, return; /* flush L2 and unlock the VA (resumes the MMU) */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = vpfn, - .nr = nr, - .op = KBASE_MMU_OP_FLUSH_MEM, - .kctx_id = kctx->id, - .mmu_sync_info = mmu_sync_info, - }; - + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { - err = mmu_flush_invalidate_on_gpu_ctrl( - kbdev, &kbdev->as[kctx->as_nr], &op_param); + /* Value used to prevent skipping of any levels when flushing */ + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], + &op_param); } else { - err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], &op_param); + err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], + &op_param); } if (err) { /* Flush failed to complete, assume the * GPU has hung and perform a reset to recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } -/* Perform a flush/invalidate on a particular address space - */ -static void -kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, - u64 vpfn, size_t nr, bool sync, u32 kctx_id, - enum kbase_caller_mmu_sync_info mmu_sync_info) -{ - int err; - bool gpu_powered; - unsigned long flags; - struct kbase_mmu_hw_op_param op_param; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - gpu_powered = kbdev->pm.backend.gpu_powered; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* GPU is off so there's no need to perform flush/invalidate. - * But even if GPU is not actually powered down, after gpu_powered flag - * was set to false, it is still safe to skip the flush/invalidate. - * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE - * which is sent when address spaces are restored after gpu_powered flag - * is set to true. Flushing of L2 cache is certainly not required as L2 - * cache is definitely off if gpu_powered is false. - */ - if (!gpu_powered) - return; - - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* GPU has just been powered off due to system suspend. - * So again, no need to perform flush/invalidate. - */ - return; - } - - /* - * Taking a pm reference does not guarantee that the GPU has finished powering up. - * It's possible that the power up has been deferred until after a scheduled power down. - * We must wait here for the L2 to be powered up, and holding a pm reference guarantees that - * it will not be powered down afterwards. - */ - err = kbase_pm_wait_for_l2_powered(kbdev); - if (err) { - dev_err(kbdev->dev, "Wait for L2 power up failed, skipping MMU command"); - /* Drop the pm ref */ - goto idle; - } - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = vpfn, - .nr = nr, - .kctx_id = kctx_id, - .mmu_sync_info = mmu_sync_info, - }; - - if (sync) - op_param.op = KBASE_MMU_OP_FLUSH_MEM; - else - op_param.op = KBASE_MMU_OP_FLUSH_PT; - - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, as, &op_param); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } else { - mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_flush_locked(kbdev, as, &op_param); - mmu_hw_operation_end(kbdev); - } - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - } - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - -idle: - kbase_pm_context_idle(kbdev); -} - -static void -kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr, - bool sync, - enum kbase_caller_mmu_sync_info mmu_sync_info) -{ - struct kbase_device *kbdev; - bool ctx_is_in_runpool; - - /* Early out if there is nothing to do */ - if (nr == 0) - return; - - kbdev = kctx->kbdev; -#if !MALI_USE_CSF - rt_mutex_lock(&kbdev->js_data.queue_mutex); - ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); - rt_mutex_unlock(&kbdev->js_data.queue_mutex); -#else - ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); -#endif /* !MALI_USE_CSF */ - - if (ctx_is_in_runpool) { - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], - vpfn, nr, sync, kctx->id, - mmu_sync_info); - - release_ctx(kbdev, kctx); - } -} - void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) @@ -2147,7 +2159,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) * The job scheduler code will already be holding the locks and context * so just do the flush. */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0); + kbase_mmu_flush_noretain(kctx, 0, ~0); kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); #if !MALI_USE_CSF @@ -2164,7 +2176,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, - u64 vpfn, int level, u64 *dirty_pgds, + u64 vpfn, int level, + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, struct list_head *free_pgds_list) { int current_level; @@ -2180,11 +2193,23 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, kbdev->mmu_mode->get_num_valid_entries(current_page); int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; + /* We need to track every level that needs updating */ + if (dirty_pgds) + *dirty_pgds |= 1ULL << current_level; + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { kunmap(p); + /* Ensure the cacheline containing the last valid entry + * of PGD is invalidated from the GPU cache, before the + * PGD page is freed. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, + current_pgd + (index * sizeof(u64)), + sizeof(u64), flush_op); + list_add(&p->lru, free_pgds_list); } else { current_valid_entries--; @@ -2193,14 +2218,62 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_page, current_valid_entries); kunmap(p); - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64)); + + kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), + flush_op); break; } } } -/* +/** + * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. + * + * @kbdev: Pointer to kbase device. + * @kctx: Pointer to kbase context. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @phys: Array of physical pages to flush. + * @op_param: Non-NULL pointer to struct containing information about the flush + * operation to perform. + * + * This function will do one of three things: + * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the + * individual pages that were unmapped if feature is supported on GPU. + * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is + * supported on GPU or, + * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. + */ +static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, + struct kbase_context *kctx, int as_nr, + struct tagged_addr *phys, + struct kbase_mmu_hw_op_param *op_param) +{ + + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); + return; + } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { + mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); + return; + } + +} + +/** + * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table + * + * @kbdev: Pointer to kbase device. + * @mmut: Pointer to GPU MMU page table. + * @vpfn: Start page frame number of the GPU virtual pages to unmap. + * @phys: Array of physical pages currently mapped to the virtual + * pages to unmap, or NULL. This is only used for GPU cache + * maintenance. + * @nr: Number of pages to unmap. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * * We actually discard the ATE and free the page table pages if no valid entries * exist in PGD. * @@ -2209,14 +2282,22 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, * These locks must be taken in the correct order with respect to others * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more * information. + * + * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, + * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, + * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches + * instead of specific physical address ranges. + * + * Return: 0 on success, otherwise an error code. */ -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, int as_nr) { u64 start_vpfn = vpfn; size_t requested_nr = nr; - struct kbase_mmu_hw_op_param op_param; + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; + struct kbase_mmu_hw_op_param op_param; int err = -EFAULT; u64 dirty_pgds = 0; LIST_HEAD(free_pgds_list); @@ -2230,6 +2311,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, /* early out if nothing to do */ return 0; } + /* MMU cache flush strategy depends on the number of pages to unmap. In both cases + * the operation is invalidate but the granularity of cache maintenance may change + * according to the situation. + * + * If GPU control command operations are present and the number of pages is "small", + * then the optimal strategy is flushing on the physical address range of the pages + * which are affected by the operation. That implies both the PGDs which are modified + * or removed from the page table and the physical pages which are freed from memory. + * + * Otherwise, there's no alternative to invalidating the whole GPU cache. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) + flush_op = KBASE_MMU_OP_FLUSH_PT; if (!rt_mutex_trylock(&mmut->mmu_lock)) { /* @@ -2329,6 +2423,9 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, continue; } + if (pcount > 0) + dirty_pgds |= 1ULL << level; + num_of_valid_entries = mmu_mode->get_num_valid_entries(page); if (WARN_ON_ONCE(num_of_valid_entries < pcount)) num_of_valid_entries = 0; @@ -2341,10 +2438,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, if (!num_of_valid_entries) { kunmap(p); + /* Ensure the cacheline(s) containing the last valid entries + * of PGD is invalidated from the GPU cache, before the + * PGD page is freed. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, + pgd + (index * sizeof(u64)), + pcount * sizeof(u64), flush_op); + list_add(&p->lru, &free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - &dirty_pgds, &free_pgds_list); + flush_op, &dirty_pgds, + &free_pgds_list); vpfn += count; nr -= count; @@ -2353,11 +2459,9 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, mmu_mode->set_num_valid_entries(page, num_of_valid_entries); - - kbase_mmu_sync_pgd( - kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), - pcount * sizeof(u64)); - + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), + flush_op); next: kunmap(p); vpfn += count; @@ -2369,13 +2473,14 @@ out: /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ .vpfn = start_vpfn, - .mmu_sync_info = mmu_sync_info, .nr = requested_nr, + .mmu_sync_info = mmu_sync_info, .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, - .op = KBASE_MMU_OP_FLUSH_MEM, + .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : + KBASE_MMU_OP_FLUSH_MEM, + .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; - - mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, &op_param); + mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); @@ -2397,6 +2502,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); * @flags: Flags * @group_id: The physical memory group in which the page was allocated. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @dirty_pgds: Flags to track every level where a PGD has been updated. * * This will update page table entries that already exist on the GPU based on * the new flags that are passed (the physical pages pointed to by the page @@ -2409,8 +2515,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); * successfully, otherwise an error code. */ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id) + struct tagged_addr *phys, size_t nr, unsigned long flags, + int const group_id, u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; @@ -2444,7 +2550,8 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, if (is_huge(*phys) && (index == index_in_large_page(*phys))) cur_level = MIDGARD_MMU_LEVEL(2); - err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd); + err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL, + dirty_pgds); if (WARN_ON(err)) goto fail_unlock; @@ -2471,9 +2578,9 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, pgd_page[level_index] = kbase_mmu_create_ate(kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id); - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (level_index * sizeof(u64)), - sizeof(u64)); + kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)), + kbase_dma_addr(p) + (level_index * sizeof(u64)), + sizeof(u64), KBASE_MMU_OP_NONE); } else { for (i = 0; i < count; i++) { #ifdef CONFIG_MALI_DEBUG @@ -2485,14 +2592,21 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); } - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), - count * sizeof(u64)); + + /* MMU cache flush strategy is NONE because GPU cache maintenance + * will be done by the caller. + */ + kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64), KBASE_MMU_OP_NONE); } kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + if (dirty_pgds && count > 0) + *dirty_pgds |= 1ULL << cur_level; + phys += count; vpfn += count; nr -= count; @@ -2513,15 +2627,29 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, unsigned long flags, int const group_id) { int err; + struct kbase_mmu_hw_op_param op_param; + u64 dirty_pgds = 0; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, - group_id); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, true, mmu_sync_info); + err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds); + + op_param = (const struct kbase_mmu_hw_op_param){ + .vpfn = vpfn, + .nr = nr, + .op = KBASE_MMU_OP_FLUSH_MEM, + .kctx_id = kctx->id, + .mmu_sync_info = mmu_sync_info, + .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), + }; + + if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev)) + mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param); + else + mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param); return err; } @@ -2583,7 +2711,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, mmut->group_id = group_id; rt_mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; - mmut->pgd = 0; + mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */ for (level = MIDGARD_MMU_TOPLEVEL; @@ -2601,7 +2729,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. */ - while (!mmut->pgd) { + while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { int err; err = kbase_mem_pool_grow( @@ -2624,7 +2752,7 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { int level; - if (mmut->pgd) { + if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { rt_mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); rt_mutex_unlock(&mmut->mmu_lock); diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 1c8e8b0..5330306 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -25,6 +25,7 @@ #include <uapi/gpu/arm/midgard/mali_base_kernel.h> #define KBASE_MMU_PAGE_ENTRIES 512 +#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0) struct kbase_context; struct kbase_mmu_table; @@ -142,9 +143,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, unsigned long flags, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info); -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - size_t nr, int as_nr); +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, int as_nr); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h index 4f73380..438dd5e 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw.h +++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h @@ -75,12 +75,14 @@ enum kbase_mmu_op_type { }; /** - * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_operation() - * @vpfn: MMU Virtual Page Frame Number to start the operation on. - * @nr: Number of pages to work on. - * @op: Operation type (written to ASn_COMMAND). - * @kctx_id: Kernel context ID for MMU command tracepoint - * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions + * @vpfn: MMU Virtual Page Frame Number to start the operation on. + * @nr: Number of pages to work on. + * @op: Operation type (written to ASn_COMMAND). + * @kctx_id: Kernel context ID for MMU command tracepoint. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + * @flush_skip_levels: Page table levels to skip flushing. (Only + * applicable if GPU supports feature) */ struct kbase_mmu_hw_op_param { u64 vpfn; @@ -88,6 +90,7 @@ struct kbase_mmu_hw_op_param { enum kbase_mmu_op_type op; u32 kctx_id; enum kbase_caller_mmu_sync_info mmu_sync_info; + u64 flush_skip_levels; }; /** @@ -111,13 +114,11 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, * @op_param: Pointer to struct containing information about the MMU * operation to perform. * - * This function should be called for GPU where GPU command is used to flush - * the cache(s) instead of MMU command. - * * Return: 0 if issuing the command was successful, otherwise an error code. */ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it * to complete before returning. @@ -144,10 +145,13 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, * GPUs where MMU command to flush the cache(s) is deprecated. * mmu_hw_mutex needs to be held when calling this function. * - * Return: 0 if the operation was successful, non-zero otherwise. + * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex + * + * Return: Zero if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU. * @@ -162,12 +166,29 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this * function. * - * Return: 0 if the operation was successful, non-zero otherwise. + * Return: Zero if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); /** + * kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. GPU command is used to flush the cache(s) + * instead of the MMU command. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by * the MMU. * @kbdev: kbase device to clear the fault from. diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index cf89c0e..1a6157a 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -26,12 +26,17 @@ #include <mali_kbase_mem.h> #include <mmu/mali_kbase_mmu_hw.h> #include <tl/mali_kbase_tracepoints.h> +#include <linux/delay.h> + /** * lock_region() - Generate lockaddr to lock memory region in MMU - * @gpu_props: GPU properties for finding the MMU lock region size - * @lockaddr: Address and size of memory region to lock - * @op_param: Pointer to a struct containing information about the MMU operation. + * + * @gpu_props: GPU properties for finding the MMU lock region size. + * @lockaddr: Address and size of memory region to lock. + * @op_param: Pointer to a struct containing the starting page frame number of + * the region to lock, the number of pages to lock and page table + * levels to skip when flushing (if supported). * * The lockaddr value is a combination of the starting address and * the size of the region that encompasses all the memory pages to lock. @@ -62,13 +67,13 @@ * * Return: 0 if success, or an error code on failure. */ - static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, const struct kbase_mmu_hw_op_param *op_param) { const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT; const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1; u64 lockaddr_size_log2; + if (op_param->nr == 0) return -EINVAL; @@ -121,14 +126,13 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, */ *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); *lockaddr |= lockaddr_size_log2 - 1; - return 0; } static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) { - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; /* Wait for the MMU status to indicate there is no active command. */ while (--max_loops && @@ -167,6 +171,100 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) return status; } +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) +static int wait_cores_power_trans_complete(struct kbase_device *kbdev) +{ +#define WAIT_TIMEOUT 1000 /* 1ms timeout */ +#define DELAY_TIME_IN_US 1 + const int max_iterations = WAIT_TIMEOUT; + int loop; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (loop = 0; loop < max_iterations; loop++) { + u32 lo = + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO)); + u32 hi = + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI)); + + if (!lo && !hi) + break; + + udelay(DELAY_TIME_IN_US); + } + + if (loop == max_iterations) { + dev_warn(kbdev->dev, "SHADER_PWRTRANS set for too long"); + return -ETIMEDOUT; + } + + return 0; +} + +/** + * apply_hw_issue_GPU2019_3901_wa - Apply WA for the HW issue GPU2019_3901 + * + * @kbdev: Kbase device to issue the MMU operation on. + * @mmu_cmd: Pointer to the variable contain the value of MMU command + * that needs to be sent to flush the L2 cache and do an + * implicit unlock. + * @as_nr: Address space number for which MMU command needs to be + * sent. + * @hwaccess_locked: Flag to indicate if hwaccess_lock is held by the caller. + * + * This functions ensures that the flush of LSC is not missed for the pages that + * were unmapped from the GPU, due to the power down transition of shader cores. + * + * Return: 0 if the WA was successfully applied, non-zero otherwise. + */ +static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, + u32 *mmu_cmd, unsigned int as_nr, bool hwaccess_locked) +{ + unsigned long flags = 0; + int ret = 0; + + if (!hwaccess_locked) + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so + * the workaround can be safely skipped. + */ + if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { + if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) { + dev_warn(kbdev->dev, + "Unexpected mmu command received"); + ret = -EINVAL; + goto unlock; + } + + /* Wait for the LOCK MMU command to complete, issued by the caller */ + ret = wait_ready(kbdev, as_nr); + if (ret) + goto unlock; + + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_LSC); + if (ret) + goto unlock; + + ret = wait_cores_power_trans_complete(kbdev); + if (ret) + goto unlock; + + /* As LSC is guaranteed to have been flushed we can use FLUSH_PT + * MMU command to only flush the L2. + */ + *mmu_cmd = AS_COMMAND_FLUSH_PT; + } + +unlock: + if (!hwaccess_locked) + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} +#endif + void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) { struct kbase_mmu_setup *current_setup = &as->current_setup; @@ -224,6 +322,27 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) #endif } +/** + * mmu_command_instr - Record an MMU command for instrumentation purposes. + * + * @kbdev: Kbase device used to issue MMU operation on. + * @kctx_id: Kernel context ID for MMU command tracepoint. + * @cmd: Command issued to the MMU. + * @lock_addr: Address of memory region locked for the operation. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + */ +static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr); + u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr); + + bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC); + + KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base, + lock_addr_size); +} + /* Helper function to program the LOCKADDR register before LOCK/UNLOCK command * is issued. */ @@ -231,7 +350,9 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock const struct kbase_mmu_hw_op_param *op_param) { int ret; + ret = lock_region(&kbdev->gpu_props, lock_addr, op_param); + if (!ret) { /* Set the region that needs to be updated */ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), @@ -241,6 +362,7 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock } return ret; } + /** * mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without * waiting for it's completion. @@ -256,74 +378,191 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a const struct kbase_mmu_hw_op_param *op_param) { int ret; + ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); + if (!ret) write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + return ret; } + +static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret; + u64 lock_addr = 0x0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); + + if (!ret) + ret = wait_ready(kbdev, as->number); + + if (!ret) + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr, + op_param->mmu_sync_info); + else + dev_err(kbdev->dev, "AS_ACTIVE bit stuck after sending UNLOCK command"); + + return ret; +} + int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { int ret = 0; + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) return -EINVAL; + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + /* Wait for UNLOCK command to complete */ if (!ret) ret = wait_ready(kbdev, as->number); + + if (!ret) { + u64 lock_addr = 0x0; + /* read MMU_AS_CONTROL.LOCKADDR register */ + lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) + << 32; + lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); + + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, + lock_addr, op_param->mmu_sync_info); + } + return ret; } + int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { int ret = 0; u64 lock_addr = 0x0; + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) return -EINVAL; + ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param); + if (!ret) - ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, + op_param); + return ret; } + static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, - const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) + const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) { int ret; u64 lock_addr = 0x0; u32 mmu_cmd = AS_COMMAND_FLUSH_MEM; + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) return -EINVAL; + /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at * this point would be unexpected. */ - if (op_param->op != KBASE_MMU_OP_FLUSH_PT && op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && + op_param->op != KBASE_MMU_OP_FLUSH_MEM) { dev_err(kbdev->dev, "Unexpected flush operation received"); return -EINVAL; } + lockdep_assert_held(&kbdev->mmu_hw_mutex); + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) mmu_cmd = AS_COMMAND_FLUSH_PT; + /* Lock the region that needs to be updated */ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); if (ret) return ret; + +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) + /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here + * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is + * supported, and this function doesn't gets called for the GPUs where + * FLUSH_MEM/PT command is deprecated. + */ + if (mmu_cmd == AS_COMMAND_FLUSH_MEM) { + ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, + as->number, hwaccess_locked); + if (ret) + return ret; + } +#endif + write_cmd(kbdev, as->number, mmu_cmd); + /* Wait for the command to complete */ ret = wait_ready(kbdev, as->number); + + if (!ret) + mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, + op_param->mmu_sync_info); + return ret; } + int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { lockdep_assert_held(&kbdev->hwaccess_lock); + return mmu_hw_do_flush(kbdev, as, op_param, true); } + int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { return mmu_hw_do_flush(kbdev, as, op_param, false); } +int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret, ret2; + u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at + * this point would be unexpected. + */ + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && + op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + dev_err(kbdev->dev, "Unexpected flush operation received"); + return -EINVAL; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) + gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2; + + /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ + ret = mmu_hw_do_lock(kbdev, as, op_param); + if (ret) + return ret; + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd); + + /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ + ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); + + return ret ?: ret2; +} + void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, enum kbase_mmu_fault_type type) { diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c index 0dd8a55..ff1d902 100644 --- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c +++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include "mali_kbase_config_platform.h" + static void enable_gpu_power_control(struct kbase_device *kbdev) { unsigned int i; @@ -50,7 +51,6 @@ static void enable_gpu_power_control(struct kbase_device *kbdev) } } - static void disable_gpu_power_control(struct kbase_device *kbdev) { unsigned int i; @@ -99,9 +99,8 @@ static int pm_callback_power_on(struct kbase_device *kbdev) #else spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#ifdef KBASE_PM_RUNTIME error = pm_runtime_get_sync(kbdev->dev); - enable_gpu_power_control(kbdev); - if (error == 1) { /* * Let core know that the chip has not been @@ -109,8 +108,11 @@ static int pm_callback_power_on(struct kbase_device *kbdev) */ ret = 0; } - dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); +#else + enable_gpu_power_control(kbdev); +#endif /* KBASE_PM_RUNTIME */ + #endif /* MALI_USE_CSF */ return ret; @@ -126,7 +128,9 @@ static void pm_callback_power_off(struct kbase_device *kbdev) WARN_ON(kbdev->pm.backend.gpu_powered); #if MALI_USE_CSF if (likely(kbdev->csf.firmware_inited)) { +#ifdef CONFIG_MALI_DEBUG WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); +#endif WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -241,7 +245,9 @@ static int pm_callback_runtime_on(struct kbase_device *kbdev) { dev_dbg(kbdev->dev, "%s\n", __func__); +#if !MALI_USE_CSF enable_gpu_power_control(kbdev); +#endif return 0; } @@ -249,7 +255,9 @@ static void pm_callback_runtime_off(struct kbase_device *kbdev) { dev_dbg(kbdev->dev, "%s\n", __func__); +#if !MALI_USE_CSF disable_gpu_power_control(kbdev); +#endif } static void pm_callback_resume(struct kbase_device *kbdev) diff --git a/mali_kbase/platform/meson/Kbuild b/mali_kbase/platform/meson/Kbuild new file mode 100644 index 0000000..3f55378 --- /dev/null +++ b/mali_kbase/platform/meson/Kbuild @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_meson.o \ + platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o diff --git a/mali_kbase/platform/meson/mali_kbase_config_meson.c b/mali_kbase/platform/meson/mali_kbase_config_meson.c new file mode 100644 index 0000000..c999a52 --- /dev/null +++ b/mali_kbase/platform/meson/mali_kbase_config_meson.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2015, 2017, 2019, 2021, 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_config.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +static struct kbase_platform_config dummy_platform_config; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &dummy_platform_config; +} + +#ifndef CONFIG_OF +int kbase_platform_register(void) +{ + return 0; +} + +void kbase_platform_unregister(void) +{ +} +#endif + +#ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) +#else +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +#endif +{ + return 1; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ diff --git a/mali_kbase/platform/meson/mali_kbase_config_platform.h b/mali_kbase/platform/meson/mali_kbase_config_platform.h new file mode 100644 index 0000000..06279e2 --- /dev/null +++ b/mali_kbase/platform/meson/mali_kbase_config_platform.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; + +/** + * Autosuspend delay + * + * The delay time (in milliseconds) to be used for autosuspend + */ +#define AUTO_SUSPEND_DELAY (100) diff --git a/mali_kbase/platform/meson/mali_kbase_runtime_pm.c b/mali_kbase/platform/meson/mali_kbase_runtime_pm.c new file mode 100644 index 0000000..c00cbcb --- /dev/null +++ b/mali_kbase/platform/meson/mali_kbase_runtime_pm.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2015, 2017-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <device/mali_kbase_device.h> + +#include <linux/pm_runtime.h> +#include <linux/reset.h> +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/delay.h> +#include <linux/regulator/consumer.h> + +#include "mali_kbase_config_platform.h" + + +static struct reset_control **resets; +static int nr_resets; + +static int resets_init(struct kbase_device *kbdev) +{ + struct device_node *np; + int i; + int err = 0; + + np = kbdev->dev->of_node; + + nr_resets = of_count_phandle_with_args(np, "resets", "#reset-cells"); + if (nr_resets <= 0) { + dev_err(kbdev->dev, "Failed to get GPU resets from dtb\n"); + return nr_resets; + } + + resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets), + GFP_KERNEL); + if (!resets) + return -ENOMEM; + + for (i = 0; i < nr_resets; ++i) { + resets[i] = devm_reset_control_get_exclusive_by_index( + kbdev->dev, i); + if (IS_ERR(resets[i])) { + err = PTR_ERR(resets[i]); + nr_resets = i; + break; + } + } + + return err; +} + +static int pm_callback_soft_reset(struct kbase_device *kbdev) +{ + int ret, i; + + if (!resets) { + ret = resets_init(kbdev); + if (ret) + return ret; + } + + for (i = 0; i < nr_resets; ++i) + reset_control_assert(resets[i]); + + udelay(10); + + for (i = 0; i < nr_resets; ++i) + reset_control_deassert(resets[i]); + + udelay(10); + + /* Override Power Management Settings, values from manufacturer's defaults */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), + 0xfff | (0x20 << 16)); + + /* + * RESET_COMPLETED interrupt will be raised, so continue with + * the normal soft reset procedure + */ + return 0; +} + +static void enable_gpu_power_control(struct kbase_device *kbdev) +{ + unsigned int i; + +#if defined(CONFIG_REGULATOR) + for (i = 0; i < kbdev->nr_regulators; i++) { + if (WARN_ON(kbdev->regulators[i] == NULL)) + ; + else if (!regulator_is_enabled(kbdev->regulators[i])) + WARN_ON(regulator_enable(kbdev->regulators[i])); + } +#endif + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (WARN_ON(kbdev->clocks[i] == NULL)) + ; + else if (!__clk_is_enabled(kbdev->clocks[i])) + WARN_ON(clk_prepare_enable(kbdev->clocks[i])); + } +} + +static void disable_gpu_power_control(struct kbase_device *kbdev) +{ + unsigned int i; + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (WARN_ON(kbdev->clocks[i] == NULL)) + ; + else if (__clk_is_enabled(kbdev->clocks[i])) { + clk_disable_unprepare(kbdev->clocks[i]); + WARN_ON(__clk_is_enabled(kbdev->clocks[i])); + } + } + +#if defined(CONFIG_REGULATOR) + for (i = 0; i < kbdev->nr_regulators; i++) { + if (WARN_ON(kbdev->regulators[i] == NULL)) + ; + else if (regulator_is_enabled(kbdev->regulators[i])) + WARN_ON(regulator_disable(kbdev->regulators[i])); + } +#endif +} + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + int ret = 1; /* Assume GPU has been powered off */ + int error; + + dev_dbg(kbdev->dev, "%s %p\n", __func__, (void *)kbdev->dev->pm_domain); + +#ifdef KBASE_PM_RUNTIME + error = pm_runtime_get_sync(kbdev->dev); + if (error == 1) { + /* + * Let core know that the chip has not been + * powered off, so we can save on re-initialization. + */ + ret = 0; + } + dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); +#else + enable_gpu_power_control(kbdev); +#endif + + return ret; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "%s\n", __func__); + +#ifdef KBASE_PM_RUNTIME + pm_runtime_mark_last_busy(kbdev->dev); + pm_runtime_put_autosuspend(kbdev->dev); +#else + /* Power down the GPU immediately as runtime PM is disabled */ + disable_gpu_power_control(kbdev); +#endif +} + +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_init(struct kbase_device *kbdev) +{ + int ret = 0; + + dev_dbg(kbdev->dev, "%s\n", __func__); + + pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); + pm_runtime_use_autosuspend(kbdev->dev); + + pm_runtime_set_active(kbdev->dev); + pm_runtime_enable(kbdev->dev); + + if (!pm_runtime_enabled(kbdev->dev)) { + dev_warn(kbdev->dev, "pm_runtime not enabled"); + ret = -EINVAL; + } else if (atomic_read(&kbdev->dev->power.usage_count)) { + dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", + __func__, atomic_read(&kbdev->dev->power.usage_count)); + ret = -EINVAL; + } + + return ret; +} + +static void kbase_device_runtime_disable(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "%s\n", __func__); + + if (atomic_read(&kbdev->dev->power.usage_count)) + dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", + __func__, atomic_read(&kbdev->dev->power.usage_count)); + + pm_runtime_disable(kbdev->dev); +} +#endif /* KBASE_PM_RUNTIME */ + +static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "%s\n", __func__); + + enable_gpu_power_control(kbdev); + return 0; +} + +static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "%s\n", __func__); + + disable_gpu_power_control(kbdev); +} + +static void pm_callback_resume(struct kbase_device *kbdev) +{ + int ret = pm_callback_runtime_on(kbdev); + + WARN_ON(ret); +} + +static void pm_callback_suspend(struct kbase_device *kbdev) +{ + pm_callback_runtime_off(kbdev); +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = pm_callback_suspend, + .power_resume_callback = pm_callback_resume, + .soft_reset_callback = pm_callback_soft_reset, +#ifdef KBASE_PM_RUNTIME + .power_runtime_init_callback = kbase_device_runtime_init, + .power_runtime_term_callback = kbase_device_runtime_disable, + .power_runtime_on_callback = pm_callback_runtime_on, + .power_runtime_off_callback = pm_callback_runtime_off, +#else /* KBASE_PM_RUNTIME */ + .power_runtime_init_callback = NULL, + .power_runtime_term_callback = NULL, + .power_runtime_on_callback = NULL, + .power_runtime_off_callback = NULL, +#endif /* KBASE_PM_RUNTIME */ +}; diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c index 44a55d9..7a0885c 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c +++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c @@ -116,7 +116,7 @@ static void get_fw_trace(struct kbase_device *kbdev, struct sscd_segment *seg) .version = 1, }; - tb = kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); + tb = kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); if (tb == NULL) { dev_err(kbdev->dev, "pixel: failed to open firmware trace buffer"); diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild index ee3de7b..38e4dd4 100644 --- a/mali_kbase/tests/Kbuild +++ b/mali_kbase/tests/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -27,4 +27,5 @@ subdir-ccflags-y += -I$(src)/include \ obj-$(CONFIG_MALI_KUTF) += kutf/ obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ +obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/ diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig index a86e1ce..e6f0376 100644 --- a/mali_kbase/tests/Kconfig +++ b/mali_kbase/tests/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,18 @@ config MALI_KUTF_CLK_RATE_TRACE Modules: - mali_kutf_clk_rate_trace_test_portal.ko +config MALI_KUTF_MGM_INTEGRATION_TEST + bool "Build Mali KUTF MGM integration test module" + depends on MALI_KUTF + default y + help + This option will build the MGM integration test module. + It can test the implementation of PTE translation for specific + group ids. + + Modules: + - mali_kutf_mgm_integration_test.ko + comment "Enable MALI_DEBUG for KUTF modules support" depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig index 167facd..4203971 100644 --- a/mali_kbase/tests/Mconfig +++ b/mali_kbase/tests/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,18 @@ config MALI_KUTF_CLK_RATE_TRACE Modules: - mali_kutf_clk_rate_trace_test_portal.ko +config MALI_KUTF_MGM_INTEGRATION_TEST + bool "Build Mali KUTF MGM integration test module" + depends on MALI_KUTF + default y + help + This option will build the MGM integration test module. + It can test the implementation of PTE translation for specific + group ids. + + Modules: + - mali_kutf_mgm_integration_test.ko + # Enable MALI_DEBUG for KUTF modules support diff --git a/mali_kbase/tests/kutf/kutf_helpers_user.c b/mali_kbase/tests/kutf/kutf_helpers_user.c index f88e138..c4e2943 100644 --- a/mali_kbase/tests/kutf/kutf_helpers_user.c +++ b/mali_kbase/tests/kutf/kutf_helpers_user.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #include <linux/slab.h> #include <linux/export.h> -const char *valtype_names[] = { +static const char *const valtype_names[] = { "INVALID", "U64", "STR", diff --git a/mali_kbase/tests/kutf/kutf_suite.c b/mali_kbase/tests/kutf/kutf_suite.c index 91065b5..4468066 100644 --- a/mali_kbase/tests/kutf/kutf_suite.c +++ b/mali_kbase/tests/kutf/kutf_suite.c @@ -106,22 +106,16 @@ struct kutf_convert_table { enum kutf_result_status result; }; -struct kutf_convert_table kutf_convert[] = { -#define ADD_UTF_RESULT(_name) \ -{ \ - #_name, \ - _name, \ -}, -ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK) -ADD_UTF_RESULT(KUTF_RESULT_SKIP) -ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN) -ADD_UTF_RESULT(KUTF_RESULT_PASS) -ADD_UTF_RESULT(KUTF_RESULT_DEBUG) -ADD_UTF_RESULT(KUTF_RESULT_INFO) -ADD_UTF_RESULT(KUTF_RESULT_WARN) -ADD_UTF_RESULT(KUTF_RESULT_FAIL) -ADD_UTF_RESULT(KUTF_RESULT_FATAL) -ADD_UTF_RESULT(KUTF_RESULT_ABORT) +static const struct kutf_convert_table kutf_convert[] = { +#define ADD_UTF_RESULT(_name) \ + { \ +#_name, _name, \ + } + ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK), ADD_UTF_RESULT(KUTF_RESULT_SKIP), + ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN), ADD_UTF_RESULT(KUTF_RESULT_PASS), + ADD_UTF_RESULT(KUTF_RESULT_DEBUG), ADD_UTF_RESULT(KUTF_RESULT_INFO), + ADD_UTF_RESULT(KUTF_RESULT_WARN), ADD_UTF_RESULT(KUTF_RESULT_FAIL), + ADD_UTF_RESULT(KUTF_RESULT_FATAL), ADD_UTF_RESULT(KUTF_RESULT_ABORT), }; #define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) @@ -191,8 +185,7 @@ static void kutf_set_expected_result(struct kutf_context *context, * * Return: 1 if test result was successfully converted to string, 0 otherwise */ -static int kutf_result_to_string(char **result_str, - enum kutf_result_status result) +static int kutf_result_to_string(const char **result_str, enum kutf_result_status result) { int i; int ret = 0; @@ -382,7 +375,7 @@ static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, struct kutf_result *res; unsigned long bytes_not_copied; ssize_t bytes_copied = 0; - char *kutf_str_ptr = NULL; + const char *kutf_str_ptr = NULL; size_t kutf_str_len = 0; size_t message_len = 0; char separator = ':'; @@ -599,11 +592,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func, goto fail_file; } -#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE tmp = debugfs_create_file_unsafe( -#else - tmp = debugfs_create_file( -#endif "run", 0600, test_fix->dir, test_fix, &kutf_debugfs_run_ops); diff --git a/mali_kbase/tests/kutf/kutf_utils.c b/mali_kbase/tests/kutf/kutf_utils.c index 2ae1510..21f5fad 100644 --- a/mali_kbase/tests/kutf/kutf_utils.c +++ b/mali_kbase/tests/kutf/kutf_utils.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,7 @@ static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; -DEFINE_MUTEX(buffer_lock); +static DEFINE_MUTEX(buffer_lock); const char *kutf_dsprintf(struct kutf_mempool *pool, const char *fmt, ...) diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c index 935f8ca..2d7289d 100644 --- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -46,7 +46,7 @@ #define MINOR_FOR_FIRST_KBASE_DEV (-1) /* KUTF test application pointer for this test */ -struct kutf_application *kutf_app; +static struct kutf_application *kutf_app; enum portal_server_state { PORTAL_STATE_NO_CLK, @@ -113,7 +113,7 @@ struct kbasep_cmd_name_pair { const char *name; }; -struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { +static const struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { { PORTAL_CMD_GET_PLATFORM, GET_PLATFORM }, { PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR }, { PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE }, @@ -128,7 +128,7 @@ struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { * this pointer is engaged, new requests for create fixture will fail * hence limiting the use of the portal at any time to a singleton. */ -struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; +static struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; #define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) static char portal_msg_buf[PORTAL_MSG_LEN]; @@ -825,14 +825,14 @@ static void *mali_kutf_clk_rate_trace_create_fixture( if (!data) return NULL; - *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 }; + *data = (const struct kutf_clk_rate_trace_fixture_data){ NULL }; pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); spin_lock(&kbdev->pm.clk_rtm.lock); if (g_ptr_portal_data != NULL) { pr_warn("Test portal is already in use, run aborted\n"); - kutf_test_fail(context, "Portal allows single session only"); spin_unlock(&kbdev->pm.clk_rtm.lock); + kutf_test_fail(context, "Portal allows single session only"); return NULL; } @@ -909,7 +909,7 @@ static int __init mali_kutf_clk_rate_trace_test_module_init(void) { struct kutf_suite *suite; unsigned int filters; - union kutf_callback_data suite_data = { 0 }; + union kutf_callback_data suite_data = { NULL }; pr_debug("Creating app\n"); diff --git a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c index 5824a4c..2d6e689 100644 --- a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -40,7 +40,7 @@ */ /* KUTF test application pointer for this test */ -struct kutf_application *irq_app; +static struct kutf_application *irq_app; /** * struct kutf_irq_fixture_data - test fixture used by the test functions. diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild b/mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild new file mode 100644 index 0000000..e9bff98 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +ifeq ($(CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST),y) +obj-m += mali_kutf_mgm_integration_test.o + +mali_kutf_mgm_integration_test-y := mali_kutf_mgm_integration_test_main.o +endif diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp b/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp new file mode 100644 index 0000000..2e4a083 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +bob_kernel_module { + name: "mali_kutf_mgm_integration_test", + defaults: [ + "mali_kbase_shared_config_defaults", + "kernel_test_configs", + "kernel_test_includes", + ], + srcs: [ + "Kbuild", + "mali_kutf_mgm_integration_test_main.c", + ], + extra_symbols: [ + "mali_kbase", + "kutf", + ], + enabled: false, + mali_kutf_mgm_integration_test: { + kbuild_options: ["CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST=y"], + enabled: true, + }, +}
\ No newline at end of file diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c new file mode 100644 index 0000000..5a42bd6 --- /dev/null +++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#include <linux/module.h> +#include "mali_kbase.h" +#include <kutf/kutf_suite.h> +#include <kutf/kutf_utils.h> +#include <kutf/kutf_helpers.h> +#include <kutf/kutf_helpers_user.h> + +#define MINOR_FOR_FIRST_KBASE_DEV (-1) + +#define BASE_MEM_GROUP_COUNT (16) +#define PA_MAX ((1ULL << 48) - 1) +#define PA_START_BIT 12 +#define ENTRY_ACCESS_BIT (1ULL << 10) + +#define ENTRY_IS_ATE_L3 3ULL +#define ENTRY_IS_ATE_L02 1ULL + +#define MGM_INTEGRATION_SUITE_NAME "mgm_integration" +#define MGM_INTEGRATION_PTE_TRANSLATION "pte_translation" + +static char msg_buf[KUTF_MAX_LINE_LENGTH]; + +/* KUTF test application pointer for this test */ +struct kutf_application *mgm_app; + +/** + * struct kutf_mgm_fixture_data - test fixture used by test functions + * @kbdev: kbase device for the GPU. + * @group_id: Memory group ID to test based on fixture index. + */ +struct kutf_mgm_fixture_data { + struct kbase_device *kbdev; + int group_id; +}; + +/** + * mali_kutf_mgm_pte_translation_test() - Tests forward and reverse translation + * of PTE by the MGM module + * @context: KUTF context within which to perform the test. + * + * This test creates PTEs with physical addresses in the range + * 0x0000-0xFFFFFFFFF000 and tests that mgm_update_gpu_pte() returns a different + * PTE and mgm_pte_to_original_pte() returns the original PTE. This is tested + * at MMU level 2 and 3 as mgm_update_gpu_pte() is called for ATEs only. + * + * This test is run for a specific group_id depending on the fixture_id. + */ +static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context) +{ + struct kutf_mgm_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; + u64 addr; + + for (addr = 1 << (PA_START_BIT - 1); addr <= PA_MAX; addr <<= 1) { + /* Mask 1 << 11 by ~0xFFF to get 0x0000 at first iteration */ + phys_addr_t pa = addr; + u8 mmu_level; + + /* Test MMU level 3 and 2 (2MB pages) only */ + for (mmu_level = MIDGARD_MMU_LEVEL(2); mmu_level <= MIDGARD_MMU_LEVEL(3); + mmu_level++) { + u64 translated_pte; + u64 returned_pte; + u64 original_pte; + + if (mmu_level == MIDGARD_MMU_LEVEL(3)) + original_pte = + (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3; + else + original_pte = + (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02; + + dev_dbg(kbdev->dev, "Testing group_id=%u, mmu_level=%u, pte=0x%llx\n", + data->group_id, mmu_level, original_pte); + + translated_pte = mgm_dev->ops.mgm_update_gpu_pte(mgm_dev, data->group_id, + mmu_level, original_pte); + if (translated_pte == original_pte) { + snprintf( + msg_buf, sizeof(msg_buf), + "PTE unchanged. translated_pte (0x%llx) == original_pte (0x%llx) for mmu_level=%u, group_id=%d", + translated_pte, original_pte, mmu_level, data->group_id); + kutf_test_fail(context, msg_buf); + return; + } + + returned_pte = mgm_dev->ops.mgm_pte_to_original_pte( + mgm_dev, data->group_id, mmu_level, translated_pte); + dev_dbg(kbdev->dev, "\treturned_pte=%llx\n", returned_pte); + + if (returned_pte != original_pte) { + snprintf( + msg_buf, sizeof(msg_buf), + "Original PTE not returned. returned_pte (0x%llx) != origin al_pte (0x%llx) for mmu_level=%u, group_id=%d", + returned_pte, original_pte, mmu_level, data->group_id); + kutf_test_fail(context, msg_buf); + return; + } + } + } + snprintf(msg_buf, sizeof(msg_buf), "Translation passed for group_id=%d", data->group_id); + kutf_test_pass(context, msg_buf); +} + +/** + * mali_kutf_mgm_integration_create_fixture() - Creates the fixture data + * required for all tests in the mgm integration suite. + * @context: KUTF context. + * + * Return: Fixture data created on success or NULL on failure + */ +static void *mali_kutf_mgm_integration_create_fixture(struct kutf_context *context) +{ + struct kutf_mgm_fixture_data *data; + struct kbase_device *kbdev; + + pr_debug("Finding kbase device\n"); + kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); + if (kbdev == NULL) { + kutf_test_fail(context, "Failed to find kbase device"); + return NULL; + } + pr_debug("Creating fixture\n"); + + data = kutf_mempool_alloc(&context->fixture_pool, sizeof(struct kutf_mgm_fixture_data)); + if (!data) + return NULL; + data->kbdev = kbdev; + data->group_id = context->fixture_index; + + pr_debug("Fixture created\n"); + return data; +} + +/** + * mali_kutf_mgm_integration_remove_fixture() - Destroy fixture data previously + * created by mali_kutf_mgm_integration_create_fixture. + * @context: KUTF context. + */ +static void mali_kutf_mgm_integration_remove_fixture(struct kutf_context *context) +{ + struct kutf_mgm_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + + kbase_release_device(kbdev); +} + +/** + * mali_kutf_mgm_integration_test_main_init() - Module entry point for this test. + * + * Return: 0 on success, error code on failure. + */ +static int __init mali_kutf_mgm_integration_test_main_init(void) +{ + struct kutf_suite *suite; + + mgm_app = kutf_create_application("mgm"); + + if (mgm_app == NULL) { + pr_warn("Creation of mgm KUTF app failed!\n"); + return -ENOMEM; + } + suite = kutf_create_suite(mgm_app, MGM_INTEGRATION_SUITE_NAME, BASE_MEM_GROUP_COUNT, + mali_kutf_mgm_integration_create_fixture, + mali_kutf_mgm_integration_remove_fixture); + if (suite == NULL) { + pr_warn("Creation of %s suite failed!\n", MGM_INTEGRATION_SUITE_NAME); + kutf_destroy_application(mgm_app); + return -ENOMEM; + } + kutf_add_test(suite, 0x0, MGM_INTEGRATION_PTE_TRANSLATION, + mali_kutf_mgm_pte_translation_test); + return 0; +} + +/** + * mali_kutf_mgm_integration_test_main_exit() - Module exit point for this test. + */ +static void __exit mali_kutf_mgm_integration_test_main_exit(void) +{ + kutf_destroy_application(mgm_app); +} + +module_init(mali_kutf_mgm_integration_test_main_init); +module_exit(mali_kutf_mgm_integration_test_main_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ARM Ltd."); +MODULE_VERSION("1.0"); diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c index d656c03..09de3f0 100644 --- a/mali_kbase/tl/mali_kbase_timeline.c +++ b/mali_kbase/tl/mali_kbase_timeline.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,6 @@ #include <mali_kbase.h> #include <mali_kbase_jm.h> -#include <linux/anon_inodes.h> #include <linux/atomic.h> #include <linux/file.h> #include <linux/mutex.h> @@ -35,7 +34,7 @@ #include <linux/stringify.h> #include <linux/timer.h> #include <linux/wait.h> - +#include <linux/delay.h> /* The period of autoflush checker execution in milliseconds. */ #define AUTOFLUSH_INTERVAL 1000 /* ms */ @@ -184,90 +183,109 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) } #endif /* CONFIG_MALI_DEVFREQ */ -int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags) { - int ret = 0; + int err = 0; u32 timeline_flags = TLSTREAM_ENABLED | flags; - struct kbase_timeline *timeline = kbdev->timeline; + struct kbase_timeline *timeline; + int rcode; + + if (WARN_ON(!kbdev) || WARN_ON(flags & ~BASE_TLSTREAM_FLAGS_MASK)) + return -EINVAL; + + timeline = kbdev->timeline; + if (WARN_ON(!timeline)) + return -EFAULT; - if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { - int rcode; + if (atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) + return -EBUSY; #if MALI_USE_CSF - if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { - ret = kbase_csf_tl_reader_start( - &timeline->csf_tl_reader, kbdev); - if (ret) { - atomic_set(timeline->timeline_flags, 0); - return ret; - } - } -#endif - ret = anon_inode_getfd( - "[mali_tlstream]", - &kbasep_tlstream_fops, - timeline, - O_RDONLY | O_CLOEXEC); - if (ret < 0) { + if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { + err = kbase_csf_tl_reader_start(&timeline->csf_tl_reader, kbdev); + if (err) { atomic_set(timeline->timeline_flags, 0); -#if MALI_USE_CSF - kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); -#endif - return ret; + return err; } + } +#endif - /* Reset and initialize header streams. */ - kbase_tlstream_reset( - &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); + /* Reset and initialize header streams. */ + kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); - timeline->obj_header_btc = obj_desc_header_size; - timeline->aux_header_btc = aux_desc_header_size; + timeline->obj_header_btc = obj_desc_header_size; + timeline->aux_header_btc = aux_desc_header_size; #if !MALI_USE_CSF - /* If job dumping is enabled, readjust the software event's - * timeout as the default value of 3 seconds is often - * insufficient. - */ - if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { - dev_info(kbdev->dev, - "Job dumping is enabled, readjusting the software event's timeout\n"); - atomic_set(&kbdev->js_data.soft_job_timeout_ms, - 1800000); - } + /* If job dumping is enabled, readjust the software event's + * timeout as the default value of 3 seconds is often + * insufficient. + */ + if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { + dev_info(kbdev->dev, + "Job dumping is enabled, readjusting the software event's timeout\n"); + atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000); + } #endif /* !MALI_USE_CSF */ - /* Summary stream was cleared during acquire. - * Create static timeline objects that will be - * read by client. - */ - kbase_create_timeline_objects(kbdev); + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be + * read by client. + */ + kbase_create_timeline_objects(kbdev); #ifdef CONFIG_MALI_DEVFREQ - /* Devfreq target tracepoints are only fired when the target - * changes, so we won't know the current target unless we - * send it now. - */ - kbase_tlstream_current_devfreq_target(kbdev); + /* Devfreq target tracepoints are only fired when the target + * changes, so we won't know the current target unless we + * send it now. + */ + kbase_tlstream_current_devfreq_target(kbdev); #endif /* CONFIG_MALI_DEVFREQ */ - /* Start the autoflush timer. - * We must do this after creating timeline objects to ensure we - * don't auto-flush the streams which will be reset during the - * summarization process. - */ - atomic_set(&timeline->autoflush_timer_active, 1); - rcode = mod_timer(&timeline->autoflush_timer, - jiffies + - msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); - } else { - ret = -EBUSY; - } + /* Start the autoflush timer. + * We must do this after creating timeline objects to ensure we + * don't auto-flush the streams which will be reset during the + * summarization process. + */ + atomic_set(&timeline->autoflush_timer_active, 1); + rcode = mod_timer(&timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + + timeline->last_acquire_time = ktime_get_raw(); + + return err; +} + +void kbase_timeline_release(struct kbase_timeline *timeline) +{ + ktime_t elapsed_time; + s64 elapsed_time_ms, time_to_sleep; + + if (WARN_ON(!timeline) || WARN_ON(!atomic_read(timeline->timeline_flags))) + return; + + /* Get the amount of time passed since the timeline was acquired and ensure + * we sleep for long enough such that it has been at least + * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. + * This prevents userspace from spamming acquire and release too quickly. + */ + elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time); + elapsed_time_ms = ktime_to_ms(elapsed_time); + time_to_sleep = (elapsed_time_ms < 0 ? TIMELINE_HYSTERESIS_TIMEOUT_MS : + TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); + if (time_to_sleep > 0) + msleep_interruptible(time_to_sleep); - if (ret >= 0) - timeline->last_acquire_time = ktime_get(); +#if MALI_USE_CSF + kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); +#endif - return ret; + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&timeline->autoflush_timer_active, 0); + del_timer_sync(&timeline->autoflush_timer); + + atomic_set(timeline->timeline_flags, 0); } int kbase_timeline_streams_flush(struct kbase_timeline *timeline) @@ -275,11 +293,17 @@ int kbase_timeline_streams_flush(struct kbase_timeline *timeline) enum tl_stream_type stype; bool has_bytes = false; size_t nbytes = 0; + + if (WARN_ON(!timeline)) + return -EINVAL; + #if MALI_USE_CSF - int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); + { + int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); - if (ret > 0) - has_bytes = true; + if (ret > 0) + has_bytes = true; + } #endif for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { diff --git a/mali_kbase/tl/mali_kbase_timeline.h b/mali_kbase/tl/mali_kbase_timeline.h index 96a4b18..62be6c6 100644 --- a/mali_kbase/tl/mali_kbase_timeline.h +++ b/mali_kbase/tl/mali_kbase_timeline.h @@ -117,4 +117,12 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ +/** + * kbase_timeline_io_debugfs_init - Add a debugfs entry for reading timeline stream data + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + */ +void kbase_timeline_io_debugfs_init(struct kbase_device *kbdev); + #endif /* _KBASE_TIMELINE_H */ diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c index 3391e75..03178cc 100644 --- a/mali_kbase/tl/mali_kbase_timeline_io.c +++ b/mali_kbase/tl/mali_kbase_timeline_io.c @@ -24,27 +24,20 @@ #include "mali_kbase_tracepoints.h" #include "mali_kbase_timeline.h" -#include <linux/delay.h> +#include <device/mali_kbase_device.h> + #include <linux/poll.h> +#include <linux/version_compat_defs.h> +#include <linux/anon_inodes.h> /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); -static unsigned int kbasep_timeline_io_poll(struct file *filp, - poll_table *wait); +static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait); static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync); -/* The timeline stream file operations structure. */ -const struct file_operations kbasep_tlstream_fops = { - .owner = THIS_MODULE, - .release = kbasep_timeline_io_release, - .read = kbasep_timeline_io_read, - .poll = kbasep_timeline_io_poll, - .fsync = kbasep_timeline_io_fsync, -}; - /** * kbasep_timeline_io_packet_pending - check timeline streams for pending * packets @@ -292,7 +285,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, * * Return: POLLIN if data can be read without blocking, otherwise zero */ -static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) +static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait) { struct kbase_tlstream *stream; unsigned int rb_idx; @@ -302,20 +295,90 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) KBASE_DEBUG_ASSERT(wait); if (WARN_ON(!filp->private_data)) - return -EFAULT; + return (__force __poll_t)-EFAULT; timeline = (struct kbase_timeline *)filp->private_data; /* If there are header bytes to copy, read will not block */ if (kbasep_timeline_has_header_data(timeline)) - return POLLIN; + return (__force __poll_t)POLLIN; poll_wait(filp, &timeline->event_queue, wait); if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) - return POLLIN; + return (__force __poll_t)POLLIN; return 0; } +int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +{ + /* The timeline stream file operations structure. */ + static const struct file_operations kbasep_tlstream_fops = { + .owner = THIS_MODULE, + .release = kbasep_timeline_io_release, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, + .fsync = kbasep_timeline_io_fsync, + }; + int err; + + if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK)) + return -EINVAL; + + err = kbase_timeline_acquire(kbdev, flags); + if (err) + return err; + + err = anon_inode_getfd("[mali_tlstream]", &kbasep_tlstream_fops, kbdev->timeline, + O_RDONLY | O_CLOEXEC); + if (err < 0) + kbase_timeline_release(kbdev->timeline); + + return err; +} + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static int kbasep_timeline_io_open(struct inode *in, struct file *file) +{ + struct kbase_device *const kbdev = in->i_private; + + if (WARN_ON(!kbdev)) + return -EFAULT; + + file->private_data = kbdev->timeline; + return kbase_timeline_acquire(kbdev, BASE_TLSTREAM_FLAGS_MASK & + ~BASE_TLSTREAM_JOB_DUMPING_ENABLED); +} + +void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) +{ + static const struct file_operations kbasep_tlstream_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_timeline_io_open, + .release = kbasep_timeline_io_release, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, + .fsync = kbasep_timeline_io_fsync, + }; + struct dentry *file; + + if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) + return; + + file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_tlstream_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) + dev_warn(kbdev->dev, "Unable to create timeline debugfs entry"); +} +#else +/* + * Stub function for when debugfs is disabled + */ +void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) +{ +} +#endif + /** * kbasep_timeline_io_release - release timeline stream descriptor * @inode: Pointer to inode structure @@ -325,55 +388,18 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) */ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) { - struct kbase_timeline *timeline; - ktime_t elapsed_time; - s64 elapsed_time_ms, time_to_sleep; - - KBASE_DEBUG_ASSERT(inode); - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(filp->private_data); - CSTD_UNUSED(inode); - timeline = (struct kbase_timeline *)filp->private_data; - - /* Get the amount of time passed since the timeline was acquired and ensure - * we sleep for long enough such that it has been at least - * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. - * This prevents userspace from spamming acquire and release too quickly. - */ - elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time); - elapsed_time_ms = ktime_to_ms(elapsed_time); - time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS, - TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); - if (time_to_sleep > 0) - msleep(time_to_sleep); - -#if MALI_USE_CSF - kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); -#endif - - /* Stop autoflush timer before releasing access to streams. */ - atomic_set(&timeline->autoflush_timer_active, 0); - del_timer_sync(&timeline->autoflush_timer); - - atomic_set(timeline->timeline_flags, 0); + kbase_timeline_release(filp->private_data); return 0; } static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { - struct kbase_timeline *timeline; - CSTD_UNUSED(start); CSTD_UNUSED(end); CSTD_UNUSED(datasync); - if (WARN_ON(!filp->private_data)) - return -EFAULT; - - timeline = (struct kbase_timeline *)filp->private_data; - - return kbase_timeline_streams_flush(timeline); + return kbase_timeline_streams_flush(filp->private_data); } diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h index bf2c385..de30bcc 100644 --- a/mali_kbase/tl/mali_kbase_timeline_priv.h +++ b/mali_kbase/tl/mali_kbase_timeline_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,7 +51,7 @@ * @event_queue: Timeline stream event queue * @bytes_collected: Number of bytes read by user * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags - * otherwise. See kbase_timeline_io_acquire(). + * otherwise. See kbase_timeline_acquire(). * @obj_header_btc: Remaining bytes to copy for the object stream header * @aux_header_btc: Remaining bytes to copy for the aux stream header * @last_acquire_time: The time at which timeline was last acquired. @@ -77,8 +77,27 @@ struct kbase_timeline { #endif }; -extern const struct file_operations kbasep_tlstream_fops; - void kbase_create_timeline_objects(struct kbase_device *kbdev); +/** + * kbase_timeline_acquire - acquire timeline for a userspace client. + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @flags: Timeline stream flags + * + * Each timeline instance can be acquired by only one userspace client at a time. + * + * Return: Zero on success, error number on failure (e.g. if already acquired). + */ +int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags); + +/** + * kbase_timeline_release - release timeline for a userspace client. + * @timeline: Timeline instance to be stopped. It must be previously acquired + * with kbase_timeline_acquire(). + * + * Releasing the timeline instance allows it to be acquired by another userspace client. + */ +void kbase_timeline_release(struct kbase_timeline *timeline); + #endif /* _KBASE_TIMELINE_PRIV_H */ diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index 6aae4e0..3ac7850 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -305,11 +305,11 @@ enum tl_msg_id_obj { "@p", \ "atom") \ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \ - "Within function jd_done_nolock", \ + "Within function kbase_jd_done_nolock", \ "@p", \ "atom") \ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \ - "Within function jd_done_nolock - end", \ + "Within function kbase_jd_done_nolock - end", \ "@p", \ "atom") \ TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \ diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index b15fe6a..f01fc54 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -1686,7 +1686,7 @@ struct kbase_tlstream; } while (0) /** - * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function jd_done_nolock + * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function kbase_jd_done_nolock * * @kbdev: Kbase device * @atom: Atom identifier @@ -1705,7 +1705,7 @@ struct kbase_tlstream; } while (0) /** - * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function jd_done_nolock - end + * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function kbase_jd_done_nolock - end * * @kbdev: Kbase device * @atom: Atom identifier diff --git a/mali_pixel/memory_group_manager.c b/mali_pixel/memory_group_manager.c index faa414e..436404e 100644 --- a/mali_pixel/memory_group_manager.c +++ b/mali_pixel/memory_group_manager.c @@ -549,7 +549,7 @@ static u64 mgm_update_gpu_pte( switch (group_id) { case MGM_RESERVED_GROUP_ID: - case MGM_IMPORTED_MEMORY_GROUP_ID: + case MGM_IMPORTED_MEMORY_GROUP_ID: /* The reserved group doesn't set PBHA bits */ /* TODO: Determine what to do with imported memory */ break; @@ -745,13 +745,14 @@ static int memory_group_manager_probe(struct platform_device *pdev) return -ENOMEM; mgm_dev->owner = THIS_MODULE; - mgm_dev->ops.mgm_alloc_page = mgm_alloc_page; - mgm_dev->ops.mgm_free_page = mgm_free_page; - mgm_dev->ops.mgm_get_import_memory_id = - mgm_get_import_memory_id; - mgm_dev->ops.mgm_vmf_insert_pfn_prot = mgm_vmf_insert_pfn_prot; - mgm_dev->ops.mgm_update_gpu_pte = mgm_update_gpu_pte; - mgm_dev->ops.mgm_pte_to_original_pte = mgm_pte_to_original_pte; + mgm_dev->ops = (struct memory_group_manager_ops){ + .mgm_alloc_page = mgm_alloc_page, + .mgm_free_page = mgm_free_page, + .mgm_get_import_memory_id = mgm_get_import_memory_id, + .mgm_update_gpu_pte = mgm_update_gpu_pte, + .mgm_pte_to_original_pte = mgm_pte_to_original_pte, + .mgm_vmf_insert_pfn_prot = mgm_vmf_insert_pfn_prot, + }; mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL); if (!mgm_data) { |