diff options
author | Vamsidhar reddy Gaddam <gvamsi@google.com> | 2024-03-13 09:45:22 +0000 |
---|---|---|
committer | Vamsidhar reddy Gaddam <gvamsi@google.com> | 2024-03-13 09:45:22 +0000 |
commit | a999cd8fd398aed7390c8e5d99795e9b735d6ba7 (patch) | |
tree | 79503e1b07ccfd66140fb903be3a0f2e0ace147c | |
parent | 049a542207ed694271316782397b78b2e202086a (diff) | |
download | gpu-a999cd8fd398aed7390c8e5d99795e9b735d6ba7.tar.gz |
Update KMD to r48p0
Provenance: ipdelivery@02a6b5e039b17fd395ddc13d09efbe440223a56c
Change-Id: Ia6d72d40f5c57508d818ad24e57547c1a411d644
Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
91 files changed, 2048 insertions, 1198 deletions
diff --git a/common/include/linux/mali_hw_access.h b/common/include/linux/mali_hw_access.h new file mode 100644 index 0000000..ca73036 --- /dev/null +++ b/common/include/linux/mali_hw_access.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _MALI_HW_ACCESS_H_ +#define _MALI_HW_ACCESS_H_ + +#include <asm/arch_timer.h> +#include <linux/io.h> + + +#define mali_readl(addr) readl(addr) +#define mali_readq(addr) readq(addr) +#define mali_writel(val, addr) writel(val, addr) +#define mali_writeq(val, addr) writeq(val, addr) +#define mali_ioremap(addr, size) ioremap(addr, size) +#define mali_iounmap(addr) iounmap(addr) +#define mali_arch_timer_get_cntfrq() arch_timer_get_cntfrq() + + +#endif /* _MALI_HW_ACCESS_H_ */ diff --git a/common/include/linux/memory_group_manager.h b/common/include/linux/memory_group_manager.h index 3820f1b..e92d3de 100644 --- a/common/include/linux/memory_group_manager.h +++ b/common/include/linux/memory_group_manager.h @@ -32,6 +32,10 @@ typedef int vm_fault_t; #define MEMORY_GROUP_MANAGER_NR_GROUPS (16) +#define PTE_PBHA_SHIFT (59) +#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT) +#define PTE_RES_BIT_MULTI_AS_SHIFT (63) + struct memory_group_manager_device; struct memory_group_manager_import_data; diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h index 366b50c..f8594a6 100644 --- a/common/include/linux/version_compat_defs.h +++ b/common/include/linux/version_compat_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -348,4 +348,43 @@ static inline long kbase_pin_user_pages_remote(struct task_struct *tsk, struct m #endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */ +#if (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) +/* Null definition */ +#define ALLOW_ERROR_INJECTION(fname, err_type) +#endif /* (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) */ + +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim) + +#elif ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \ + !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE))) +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim, name) + +#else +#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) \ + do { \ + reclaim->private_data = priv_data; \ + shrinker_register(reclaim); \ + } while (0) + +#endif /* KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE */ + +#if ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \ + !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE))) +#define KBASE_UNREGISTER_SHRINKER(reclaim) unregister_shrinker(&reclaim) +#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) container_of(s, type, var) +#define DEFINE_KBASE_SHRINKER struct shrinker +#define KBASE_INIT_RECLAIM(var, attr, name) (&((var)->attr)) +#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = (*reclaim)) + +#else +#define KBASE_UNREGISTER_SHRINKER(reclaim) shrinker_free(reclaim) +#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) s->private_data +#define DEFINE_KBASE_SHRINKER struct shrinker * +#define KBASE_SHRINKER_ALLOC(name) shrinker_alloc(0, name) +#define KBASE_INIT_RECLAIM(var, attr, name) (KBASE_SHRINKER_ALLOC(name)) +#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = reclaim) + +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h index 564f477..b80817f 100644 --- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index 0fb8242..3fff8b2 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -46,7 +46,11 @@ */ #define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) -#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) +/* Unused bit for CSF, only used in JM for BASE_MEM_TILER_ALIGN_TOP */ +#define BASE_MEM_UNUSED_BIT_20 ((base_mem_alloc_flags)1 << 20) + +/* Unused bit for CSF, only used in JM for BASE_MEM_FLAG_MAP_FIXED */ +#define BASE_MEM_UNUSED_BIT_27 ((base_mem_alloc_flags)1 << 27) /* Must be FIXABLE memory: its GPU VA will be determined at a later point, * at which time it will be at a fixed GPU VA. @@ -62,9 +66,15 @@ */ #define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20 +/* A mask of all flags that should not be queried */ +#define BASE_MEM_DONT_QUERY \ + (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | BASE_MEM_SAME_VA) + +/* A mask of all currently reserved flags */ +#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0) + +/* A mask of all bits that are not used by a flag on CSF */ +#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_20 | BASE_MEM_UNUSED_BIT_27) /* Special base mem handles specific to CSF. */ @@ -474,7 +484,26 @@ struct base_gpu_queue_error_fatal_payload { }; /** - * enum base_gpu_queue_group_error_type - GPU Fatal error type. + * struct base_gpu_queue_error_fault_payload - Recoverable fault + * error information related to GPU command queue. + * + * @sideband: Additional information about this recoverable fault. + * @status: Recoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * INSTR_INVALID_PC (0x50). + * @csi_index: Index of the CSF interface the queue is bound to. + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_error_fault_payload { + __u64 sideband; + __u32 status; + __u8 csi_index; + __u8 padding[3]; +}; + +/** + * enum base_gpu_queue_group_error_type - GPU error type. * * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU * command queue group. @@ -484,7 +513,9 @@ struct base_gpu_queue_error_fatal_payload { * progress timeout. * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out * of tiler heap memory. - * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types + * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT: Fault error reported for GPU + * command queue. + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of GPU error types * * This type is used for &struct_base_gpu_queue_group_error.error_type. */ @@ -493,6 +524,7 @@ enum base_gpu_queue_group_error_type { BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT, BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT }; @@ -512,6 +544,7 @@ struct base_gpu_queue_group_error { union { struct base_gpu_queue_group_error_fatal_payload fatal_group; struct base_gpu_queue_error_fatal_payload fatal_queue; + struct base_gpu_queue_error_fault_payload fault_queue; } payload; }; diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index 537c90d..c56e071 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,15 +95,23 @@ * 1.22: * - Add comp_pri_threshold and comp_pri_ratio attributes to * kbase_ioctl_cs_queue_group_create. + * - Made the BASE_MEM_DONT_NEED memory flag queryable. * 1.23: * - Disallows changing the sharability on the GPU of imported dma-bufs to * BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE. * 1.24: * - Implement full block state support for hardware counters. + * 1.25: + * - Add support for CS_FAULT reporting to userspace + * 1.26: + * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory + * flags queryable. + * 1.27: + * - Implement support for HWC block state availability. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 24 +#define BASE_UK_VERSION_MINOR 27 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -340,6 +348,8 @@ union kbase_ioctl_cs_queue_group_create_1_18 { * @in.csi_handlers: Flags to signal that the application intends to use CSI * exception handlers in some linear buffers to deal with * the given exception types. + * @in.cs_fault_report_enable: Flag to indicate reporting of CS_FAULTs + * to userspace. * @in.padding: Currently unused, must be zero * @out: Output parameters * @out.group_handle: Handle of a newly created queue group. @@ -360,7 +370,8 @@ union kbase_ioctl_cs_queue_group_create { /** * @in.reserved: Reserved, currently unused, must be zero. */ - __u16 reserved; + __u8 reserved; + __u8 cs_fault_report_enable; /** * @in.dvs_buf: buffer for deferred vertex shader */ @@ -637,6 +648,22 @@ union kbase_ioctl_read_user_page { #define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page) +/** + * struct kbase_ioctl_queue_group_clear_faults - Re-enable CS FAULT reporting for the GPU queues + * + * @addr: CPU VA to an array of GPU VAs of the buffers backing the queues + * @nr_queues: Number of queues in the array + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_queue_group_clear_faults { + __u64 addr; + __u32 nr_queues; + __u8 padding[4]; +}; + +#define KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS \ + _IOW(KBASE_IOCTL_TYPE, 61, struct kbase_ioctl_queue_group_clear_faults) + /*************** * test ioctls * ***************/ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h index d347854..d4d12ae 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h index 9478334..25563e4 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -30,15 +30,11 @@ * See base_mem_alloc_flags. */ -/* Used as BASE_MEM_FIXED in other backends */ -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) +/* Unused bit for JM, only used in CSF for BASE_MEM_FIXED */ +#define BASE_MEM_UNUSED_BIT_8 ((base_mem_alloc_flags)1 << 8) -/** - * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) +/* Unused bit for JM, only used in CSF for BASE_CSF_EVENT */ +#define BASE_MEM_UNUSED_BIT_19 ((base_mem_alloc_flags)1 << 19) /** * BASE_MEM_TILER_ALIGN_TOP - Memory starting from the end of the initial commit is aligned @@ -64,9 +60,15 @@ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | BASE_MEM_FLAG_MAP_FIXED | \ BASEP_MEM_PERFORM_JIT_TRIM) -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) +/* A mask of all flags that should not be queried */ +#define BASE_MEM_DONT_QUERY \ + (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | BASE_MEM_SAME_VA) + +/* A mask of all currently reserved flags */ +#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0) + +/* A mask of all bits that are not used by a flag on JM */ +#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_8 | BASE_MEM_UNUSED_BIT_19) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 2a7a06a..43e35a7 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -149,15 +149,19 @@ * from the parent process. * 11.40: * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls. + * - Made the BASE_MEM_DONT_NEED memory flag queryable. * 11.41: * - Disallows changing the sharability on the GPU of imported dma-bufs to * BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE. * 11.42: * - Implement full block state support for hardware counters. + * 11.43: + * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory + * flags queryable. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 42 +#define BASE_UK_VERSION_MINOR 43 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h index cb1a1e8..19df2d1 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -89,10 +89,9 @@ typedef __u32 base_mem_alloc_flags; /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. */ -#define BASE_MEM_FLAGS_QUERYABLE \ - (BASE_MEM_FLAGS_INPUT_MASK & \ - ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \ - BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY)) +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_DONT_QUERY | BASE_MEM_FLAGS_RESERVED | \ + BASE_MEM_FLAGS_UNUSED | BASEP_MEM_FLAGS_KERNEL_ONLY)) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 598b021..8a85857 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"r47p0-01eac0"' +MALI_RELEASE_NAME ?= '"r48p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_DEBUG), y) MALI_UNIT_TEST = 1 diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index c49c49b..57e8e32 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -72,7 +72,6 @@ config MALI_NO_MALI_DEFAULT_GPU help This option sets the default GPU to identify as for No Mali builds. - endchoice menu "Platform specific options" @@ -217,16 +216,6 @@ config MALI_CORESTACK If unsure, say N. -comment "Platform options" - depends on MALI_MIDGARD && MALI_EXPERT - -config MALI_ERROR_INJECT - bool "Enable No Mali error injection" - depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI - default n - help - Enables insertion of errors to test module failure and recovery mechanisms. - comment "Debug options" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 07c3019..acbd4fa 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -76,7 +76,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) else # Prevent misuse when CONFIG_MALI_NO_MALI=n CONFIG_MALI_REAL_HW = y - CONFIG_MALI_ERROR_INJECT = n endif @@ -108,7 +107,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_JOB_DUMP = n CONFIG_MALI_NO_MALI = n CONFIG_MALI_REAL_HW = y - CONFIG_MALI_ERROR_INJECT = n CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n @@ -171,7 +169,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),) CONFIG_MALI_PWRSOFT_765 \ CONFIG_MALI_JOB_DUMP \ CONFIG_MALI_NO_MALI \ - CONFIG_MALI_ERROR_INJECT \ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ CONFIG_MALI_PRFCNT_SET_PRIMARY \ @@ -272,6 +269,8 @@ CFLAGS_MODULE += -Wmissing-field-initializers CFLAGS_MODULE += -Wno-type-limits CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized) CFLAGS_MODULE += $(call cc-option, -Wunused-macros) +# The following ensures the stack frame does not get larger than a page +CFLAGS_MODULE += -Wframe-larger-than=4096 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index b3c5323..07bb50e 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -64,7 +64,6 @@ config MALI_NO_MALI All calls to the simulated hardware will complete immediately as if the hardware completed the task. - endchoice @@ -206,45 +205,6 @@ config LARGE_PAGE_SUPPORT If in doubt, say Y. -choice - prompt "Error injection level" - depends on MALI_MIDGARD && MALI_EXPERT - default MALI_ERROR_INJECT_NONE - help - Enables insertion of errors to test module failure and recovery mechanisms. - -config MALI_ERROR_INJECT_NONE - bool "disabled" - depends on MALI_MIDGARD && MALI_EXPERT - help - Error injection is disabled. - -config MALI_ERROR_INJECT_TRACK_LIST - bool "error track list" - depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI - help - Errors to inject are pre-configured by the user. - -config MALI_ERROR_INJECT_RANDOM - bool "random error injection" - depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI - help - Injected errors are random, rather than user-driven. - -endchoice - -config MALI_ERROR_INJECT_ON - string - depends on MALI_MIDGARD && MALI_EXPERT - default "0" if MALI_ERROR_INJECT_NONE - default "1" if MALI_ERROR_INJECT_TRACK_LIST - default "2" if MALI_ERROR_INJECT_RANDOM - -config MALI_ERROR_INJECT - bool - depends on MALI_MIDGARD && MALI_EXPERT - default y if !MALI_ERROR_INJECT_NONE - config MALI_DEBUG bool "Enable debug build" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index a06b15d..c91f147 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -47,12 +47,7 @@ endif mali_kbase-$(CONFIG_MALI_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o -ifneq ($(CONFIG_MALI_REAL_HW),y) - mali_kbase-y += backend/gpu/mali_kbase_model_linux.o -endif +mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o # NO_MALI Dummy model interface mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -# HW error simulation -mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o - diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index 905d188..2c1feed 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -431,7 +431,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u64(node, "opp-hz-real", real_freqs); #endif if (err < 0) { - dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", + dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d", err); continue; } @@ -439,8 +439,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err = of_property_read_u32_array(node, "opp-microvolt", opp_volts, kbdev->nr_regulators); if (err < 0) { - dev_warn(kbdev->dev, - "Failed to read opp-microvolt property with error %d\n", err); + dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d", + err); continue; } #endif @@ -450,11 +450,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) if (core_mask != shader_present && corestack_driver_control) { dev_warn( kbdev->dev, - "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU", opp_freq); continue; } + core_count_p = of_get_property(node, "opp-core-count", NULL); if (core_count_p) { u64 remaining_core_mask = kbdev->gpu_props.shader_present; diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_internal.h b/mali_kbase/backend/gpu/mali_kbase_irq_internal.h index 4374793..4798df9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_irq_internal.h @@ -74,7 +74,7 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev); * Return: 0 on success. Error code (negative) on failure. */ int kbase_validate_interrupts(struct kbase_device *const kbdev); -#endif /* CONFIG_MALI_REAL_HW */ +#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_DEBUG */ /** diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 3868799..a8b75f2 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -1437,7 +1437,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * then leave it in the RB and next time we're kicked * it will be processed again from the starting state. */ - if (keep_in_jm_rb) { + if (!kbase_is_gpu_removed(kbdev) && keep_in_jm_rb) { katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* As the atom was not removed, increment the * index so that we read the correct atom in the diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index c340760..b034ffe 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,42 +25,8 @@ * insmod'ing mali_kbase.ko with no arguments after a build with "scons * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be * overridden by passing the 'no_mali_gpu' argument to insmod. - * - * - if CONFIG_MALI_ERROR_INJECT is defined the error injection system is - * activated. */ -/* Implementation of failure injection system: - * - * Error conditions are generated by gpu_generate_error(). - * According to CONFIG_MALI_ERROR_INJECT definition gpu_generate_error() either - * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or - * checks if there is (in error_track_list) an error configuration to be set for - * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). - * Each error condition will trigger a specific "state" for a certain set of - * registers as per Midgard Architecture Specifications doc. - * - * According to Midgard Architecture Specifications doc the following registers - * are always affected by error conditions: - * - * JOB Exception: - * JOB_IRQ_RAWSTAT - * JOB<n> STATUS AREA - * - * MMU Exception: - * MMU_IRQ_RAWSTAT - * AS<n>_FAULTSTATUS - * AS<n>_FAULTADDRESS - * - * GPU Exception: - * GPU_IRQ_RAWSTAT - * GPU_FAULTSTATUS - * GPU_FAULTADDRESS - * - * For further clarification on the model behaviour upon specific error - * conditions the user may refer to the Midgard Architecture Specification - * document - */ #include <mali_kbase.h> #include <device/mali_kbase_device.h> #include <hw_access/mali_kbase_hw_access_regmap.h> @@ -126,7 +92,7 @@ struct error_status_t hw_error_status; */ struct control_reg_values_t { const char *name; - u32 gpu_id; + u64 gpu_id; u32 as_present; u32 thread_max_threads; u32 thread_max_workgroup_size; @@ -524,7 +490,7 @@ MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx, bool is_low_word) { - u64 *counters_data; + u64 *counters_data = NULL; u32 core_count = 0; u32 event_index; u64 value = 0; @@ -580,6 +546,9 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cn break; } + if (unlikely(counters_data == NULL)) + return 0; + for (core = 0; core < core_count; core++) { value += counters_data[event_index]; event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; @@ -1172,9 +1141,6 @@ static void midgard_model_update(void *h) /*this job is done assert IRQ lines */ signal_int(dummy, i); -#ifdef CONFIG_MALI_ERROR_INJECT - midgard_set_error(i); -#endif /* CONFIG_MALI_ERROR_INJECT */ update_register_statuses(dummy, i); /*if this job slot returned failures we cannot use it */ if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) { @@ -1564,6 +1530,7 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) case L2_PWROFF_HI: case PWR_KEY: case PWR_OVERRIDE0: + case PWR_OVERRIDE1: #if MALI_USE_CSF case SHADER_PWRFEATURES: case CSF_CONFIG: @@ -1607,8 +1574,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) #else /* !MALI_USE_CSF */ if (addr == GPU_CONTROL_REG(GPU_ID)) { #endif /* !MALI_USE_CSF */ - - *value = dummy->control_reg_values->gpu_id; + *value = dummy->control_reg_values->gpu_id & U32_MAX; } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { *value = hw_error_status.job_irq_rawstat; pr_debug("%s", "JS_IRQ_RAWSTAT being read"); @@ -2166,9 +2132,3 @@ int gpu_model_control(void *model, struct kbase_model_control_params *params) return 0; } - -u64 midgard_model_arch_timer_get_cntfrq(void *h) -{ - CSTD_UNUSED(h); - return arch_timer_get_cntfrq(); -} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h index 65eb620..d38bb88 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h @@ -48,12 +48,8 @@ /* * Include Model definitions */ - -#if IS_ENABLED(CONFIG_MALI_NO_MALI) #include <backend/gpu/mali_kbase_model_dummy.h> -#endif /* IS_ENABLED(CONFIG_MALI_NO_MALI) */ -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) /** * kbase_gpu_device_create() - Generic create function. * @@ -117,15 +113,6 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value); void midgard_model_read_reg(void *h, u32 addr, u32 *const value); /** - * midgard_model_arch_timer_get_cntfrq - Get Model specific System Timer Frequency - * - * @h: Model handle. - * - * Return: Frequency in Hz - */ -u64 midgard_model_arch_timer_get_cntfrq(void *h); - -/** * gpu_device_raise_irq() - Private IRQ raise function. * * @model: Model handle. @@ -155,6 +142,5 @@ void gpu_device_set_data(void *model, void *data); * Return: Pointer to the data carried by model. */ void *gpu_device_get_data(void *model); -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 030d56a..801db54 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -36,6 +36,9 @@ #include <linux/version_compat_defs.h> #include <linux/pm_runtime.h> #include <mali_kbase_reset_gpu.h> +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include <csf/mali_kbase_csf_scheduler.h> +#endif /* !CONFIG_MALI_ARBITER_SUPPORT */ #endif /* !MALI_USE_CSF */ #include <hwcnt/mali_kbase_hwcnt_context.h> #include <backend/gpu/mali_kbase_pm_internal.h> @@ -860,9 +863,11 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) } KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); #else -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, - u64 new_core_mask_js1, u64 new_core_mask_js2) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask, + size_t new_core_mask_size) { + size_t i; + lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->pm.lock); @@ -870,13 +875,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_ dev_warn_once( kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); - new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + new_core_mask[0] = kbdev->pm.debug_core_mask[0]; } - kbdev->pm.debug_core_mask[0] = new_core_mask_js0; - kbdev->pm.debug_core_mask[1] = new_core_mask_js1; - kbdev->pm.debug_core_mask[2] = new_core_mask_js2; - kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2; + kbdev->pm.debug_core_mask_all = 0; + for (i = 0; i < new_core_mask_size; i++) { + kbdev->pm.debug_core_mask[i] = new_core_mask[i]; + kbdev->pm.debug_core_mask_all |= new_core_mask[i]; + } kbase_pm_update_dynamic_cores_onoff(kbdev); } @@ -962,7 +968,9 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; -#if !MALI_USE_CSF +#if MALI_USE_CSF + unsigned long flags_sched; +#else ktime_t end_timestamp = ktime_get_raw(); #endif struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -981,24 +989,41 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) */ WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n"); - /* Full GPU reset will have been done by hypervisor, so - * cancel - */ +#if MALI_USE_CSF + /* Full GPU reset will have been done by hypervisor, so cancel */ + kbase_reset_gpu_prevent_and_wait(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_spin_lock(kbdev, &flags_sched); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); + kbase_csf_scheduler_spin_unlock(kbdev, flags_sched); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_synchronize_irqs(kbdev); + + /* Scheduler reset happens outside of spinlock due to the mutex it acquires */ + kbase_csf_scheduler_reset(kbdev); + + /* Update kbase status */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + /* Full GPU reset will have been done by hypervisor, so cancel */ atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + kbase_synchronize_irqs(kbdev); /* Clear all jobs running on the GPU */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->protected_mode = false; -#if !MALI_USE_CSF kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); -#endif kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#if !MALI_USE_CSF /* Cancel any pending HWC dumps */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || @@ -1008,12 +1033,11 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) wake_up(&kbdev->hwcnt.backend.wait); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -#endif +#endif /* MALI_USE_CSF */ } mutex_unlock(&arb_vm_state->vm_state_lock); mutex_unlock(&kbdev->pm.lock); } - #endif /* CONFIG_MALI_ARBITER_SUPPORT */ #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) @@ -1069,7 +1093,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode && - !work_pending(&kbdev->csf.scheduler.gpu_idle_work)) { + !atomic_read(&kbdev->csf.scheduler.pending_gpu_idle_work)) { u32 glb_req = kbase_csf_firmware_global_input_read(&kbdev->csf.global_iface, GLB_REQ); u32 glb_ack = kbase_csf_firmware_global_output(&kbdev->csf.global_iface, GLB_ACK); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index f042b48..7bbfef8 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -3139,6 +3139,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) return 0; } + static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { struct device_node *np = kbdev->dev->of_node; @@ -3191,6 +3192,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) error = kbase_set_mmu_quirks(kbdev); } + return error; } @@ -3210,6 +3212,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) #else kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu); #endif + } void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index dfdf469..c403161 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,10 +30,7 @@ #include <mali_kbase_config_defaults.h> #include <linux/version_compat_defs.h> #include <asm/arch_timer.h> - -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -#include <backend/gpu/mali_kbase_model_linux.h> -#endif +#include <linux/mali_hw_access.h> struct kbase_timeout_info { char *selector_str; @@ -47,6 +44,7 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_CSG_TERM_TIMEOUT] = { "CSF_CSG_TERM_TIMEOUT", CSF_CSG_TERM_TIMEOUT_CYCLES }, [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", @@ -307,11 +305,7 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t u64 kbase_arch_timer_get_cntfrq(struct kbase_device *kbdev) { - u64 freq = arch_timer_get_cntfrq(); - -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) - freq = midgard_model_arch_timer_get_cntfrq(kbdev->model); -#endif + u64 freq = mali_arch_timer_get_cntfrq(); dev_dbg(kbdev->dev, "System Timer Freq = %lluHz", freq); diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index 77e193a..a0570c2 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -71,18 +71,6 @@ bob_defaults { mali_real_hw: { kbuild_options: ["CONFIG_MALI_REAL_HW=y"], }, - mali_error_inject_none: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], - }, - mali_error_inject_track_list: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], - }, - mali_error_inject_random: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], - }, - mali_error_inject: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], - }, mali_debug: { kbuild_options: [ "CONFIG_MALI_DEBUG=y", diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c index 8b14108..1bf5f9b 100644 --- a/mali_kbase/context/backend/mali_kbase_context_csf.c +++ b/mali_kbase/context/backend/mali_kbase_context_csf.c @@ -187,11 +187,17 @@ void kbase_destroy_context(struct kbase_context *kctx) * Customer side that a hang could occur if context termination is * not blocked until the resume of GPU device. */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_inc(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ while (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { dev_info(kbdev->dev, "Suspend in progress when destroying context"); wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev)); } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_dec(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* Have synchronized against the System suspend and incremented the * pm.active_count. So any subsequent invocation of System suspend diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index a8b5052..894eac1 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -534,6 +534,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; + queue->clear_faults = true; + INIT_LIST_HEAD(&queue->link); atomic_set(&queue->pending_kick, 0); INIT_LIST_HEAD(&queue->pending_kick_link); @@ -729,7 +731,7 @@ out: } /** - * get_bound_queue_group - Get the group to which a queue was bound + * get_bound_queue_group() - Get the group to which a queue was bound * * @queue: Pointer to the queue for this group * @@ -842,6 +844,48 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index kbase_csf_ring_csg_doorbell(kbdev, csg_nr); } +int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *faults) +{ + void __user *user_bufs = u64_to_user_ptr(faults->addr); + u32 i; + int ret = 0; + struct kbase_device *kbdev = kctx->kbdev; + const u32 nr_queues = faults->nr_queues; + + if (unlikely(nr_queues > kbdev->csf.global_iface.groups[0].stream_num)) { + dev_warn(kbdev->dev, "Invalid nr_queues %u", nr_queues); + return -EINVAL; + } + + for (i = 0; i < nr_queues; ++i) { + u64 buf_gpu_addr; + struct kbase_va_region *region; + + if (copy_from_user(&buf_gpu_addr, user_bufs, sizeof(buf_gpu_addr))) + return -EFAULT; + + kbase_gpu_vm_lock(kctx); + region = kbase_region_tracker_find_region_enclosing_address(kctx, buf_gpu_addr); + if (likely(!kbase_is_region_invalid_or_free(region))) { + struct kbase_queue *queue = region->user_data; + + queue->clear_faults = true; + } else { + dev_warn(kbdev->dev, "GPU queue %u without a valid command buffer region", + i); + ret = -EFAULT; + goto out_unlock; + } + kbase_gpu_vm_unlock(kctx); + + user_bufs = (void __user *)((uintptr_t)user_bufs + sizeof(buf_gpu_addr)); + } +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { struct kbase_device *kbdev = kctx->kbdev; @@ -863,7 +907,7 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue struct kbase_queue *queue = region->user_data; if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) { - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); if (list_empty(&queue->pending_kick_link)) { /* Queue termination shall block until this * kick has been handled. @@ -871,10 +915,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue atomic_inc(&queue->pending_kick); list_add_tail( &queue->pending_kick_link, - &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); - complete(&kbdev->csf.scheduler.kthread_signal); + &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]); + if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, false, true) == + false) + complete(&kbdev->csf.scheduler.kthread_signal); } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } } else { dev_dbg(kbdev->dev, @@ -1090,12 +1136,11 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, } static void timer_event_worker(struct work_struct *data); -static void protm_event_worker(struct work_struct *data); static void term_normal_suspend_buffer(struct kbase_context *const kctx, struct kbase_normal_suspend_buffer *s_buf); /** - * create_suspend_buffers - Setup normal and protected mode + * create_suspend_buffers() - Setup normal and protected mode * suspend buffers. * * @kctx: Address of the kbase context within which the queue group @@ -1201,7 +1246,8 @@ static int create_queue_group(struct kbase_context *const kctx, INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); INIT_WORK(&group->timer_event_work, timer_event_worker); - INIT_WORK(&group->protm_event_work, protm_event_worker); + INIT_LIST_HEAD(&group->protm_event_work); + atomic_set(&group->pending_protm_event_work, 0); bitmap_zero(group->protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); group->run_state = KBASE_CSF_GROUP_INACTIVE; @@ -1374,7 +1420,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) } /** - * term_queue_group - Terminate a GPU command queue group. + * term_queue_group() - Terminate a GPU command queue group. * * @group: Pointer to GPU command queue group data. * @@ -1402,8 +1448,8 @@ static void term_queue_group(struct kbase_queue_group *group) } /** - * wait_group_deferred_deschedule_completion - Wait for refcount of the group to - * become 0 that was taken when the group deschedule had to be deferred. + * wait_group_deferred_deschedule_completion() - Wait for refcount of the group + * to become 0 that was taken when the group deschedule had to be deferred. * * @group: Pointer to GPU command queue group that is being deleted. * @@ -1432,7 +1478,10 @@ static void wait_group_deferred_deschedule_completion(struct kbase_queue_group * static void cancel_queue_group_events(struct kbase_queue_group *group) { cancel_work_sync(&group->timer_event_work); - cancel_work_sync(&group->protm_event_work); + + /* Drain a pending protected mode request if any */ + kbase_csf_scheduler_wait_for_kthread_pending_work(group->kctx->kbdev, + &group->pending_protm_event_work); } static void remove_pending_group_fatal_error(struct kbase_queue_group *group) @@ -1587,6 +1636,7 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.queue_list); INIT_LIST_HEAD(&kctx->csf.link); + atomic_set(&kctx->csf.pending_sync_update, 0); kbase_csf_event_init(kctx); @@ -1822,7 +1872,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) } /** - * handle_oom_event - Handle the OoM event generated by the firmware for the + * handle_oom_event() - Handle the OoM event generated by the firmware for the * CSI. * * @group: Pointer to the CSG group the oom-event belongs to. @@ -1892,7 +1942,7 @@ static int handle_oom_event(struct kbase_queue_group *const group, } /** - * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event + * report_tiler_oom_error() - Report a CSG error due to a tiler heap OOM event * * @group: Pointer to the GPU command queue group that encountered the error */ @@ -1935,7 +1985,7 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) } /** - * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. + * kbase_queue_oom_event() - Handle tiler out-of-memory for a GPU command queue. * * @queue: Pointer to queue for which out-of-memory event was received. * @@ -2023,7 +2073,7 @@ unlock: } /** - * oom_event_worker - Tiler out-of-memory handler called from a workqueue. + * oom_event_worker() - Tiler out-of-memory handler called from a workqueue. * * @data: Pointer to a work_struct embedded in GPU command queue data. * @@ -2051,7 +2101,8 @@ static void oom_event_worker(struct work_struct *data) } /** - * report_group_timeout_error - Report the timeout error for the group to userspace. + * report_group_timeout_error() - Report the timeout error for the group to + * userspace. * * @group: Pointer to the group for which timeout error occurred */ @@ -2075,7 +2126,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group) } /** - * timer_event_worker - Handle the progress timeout error for the group + * timer_event_worker() - Handle the progress timeout error for the group * * @data: Pointer to a work_struct embedded in GPU command queue group data. * @@ -2110,7 +2161,7 @@ static void timer_event_worker(struct work_struct *data) } /** - * handle_progress_timer_event - Progress timer timeout event handler. + * handle_progress_timer_event() - Progress timer timeout event handler. * * @group: Pointer to GPU queue group for which the timeout event is received. * @@ -2201,41 +2252,7 @@ static void report_group_fatal_error(struct kbase_queue_group *const group) } /** - * protm_event_worker - Protected mode switch request event handler - * called from a workqueue. - * - * @data: Pointer to a work_struct embedded in GPU command queue group data. - * - * Request to switch to protected mode. - */ -static void protm_event_worker(struct work_struct *data) -{ - struct kbase_queue_group *const group = - container_of(data, struct kbase_queue_group, protm_event_work); - struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; - int err = 0; - - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); - - err = alloc_grp_protected_suspend_buffer_pages(group); - if (!err) { - kbase_csf_scheduler_group_protm_enter(group); - } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { - sbuf->alloc_retries++; - /* try again to allocate pages */ - queue_work(group->kctx->csf.wq, &group->protm_event_work); - } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { - dev_err(group->kctx->kbdev->dev, - "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", - group->handle, group->kctx->tgid, group->kctx->id); - report_group_fatal_error(group); - } - - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); -} - -/** - * handle_fault_event - Handler for CS fault. + * handle_fault_event() - Handler for CS fault. * * @queue: Pointer to queue for which fault event was received. * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for @@ -2331,7 +2348,7 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fat } /** - * cs_error_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue + * cs_error_worker() - Handle the CS_FATAL/CS_FAULT error for the GPU queue * * @data: Pointer to a work_struct embedded in GPU command queue. * @@ -2409,7 +2426,7 @@ unlock: } /** - * handle_fatal_event - Handler for CS fatal. + * handle_fatal_event() - Handler for CS fatal. * * @queue: Pointer to queue for which fatal event was received. * @stream: Pointer to the structure containing info provided by the @@ -2471,7 +2488,7 @@ static void handle_fatal_event(struct kbase_queue *const queue, } /** - * process_cs_interrupts - Process interrupts for a CS. + * process_cs_interrupts() - Process interrupts for a CS. * * @group: Pointer to GPU command queue group data. * @ginfo: The CSG interface provided by the firmware. @@ -2585,7 +2602,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, } if (!group->protected_suspend_buf.pma) - queue_work(group->kctx->csf.wq, &group->protm_event_work); + kbase_csf_scheduler_enqueue_protm_event_work(group); if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); @@ -2598,7 +2615,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, } /** - * process_csg_interrupts - Process interrupts for a CSG. + * process_csg_interrupts() - Process interrupts for a CSG. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @csg_nr: CSG number. @@ -2718,7 +2735,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c } /** - * process_prfcnt_interrupts - Process performance counter interrupts. + * process_prfcnt_interrupts() - Process performance counter interrupts. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_req: Global request register value. @@ -2790,7 +2807,7 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, u } /** - * check_protm_enter_req_complete - Check if PROTM_ENTER request completed + * check_protm_enter_req_complete() - Check if PROTM_ENTER request completed * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_req: Global request register value. @@ -2824,7 +2841,7 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, u3 } /** - * process_protm_exit - Handle the protected mode exit interrupt + * process_protm_exit() - Handle the protected mode exit interrupt * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @glb_ack: Global acknowledge register value. @@ -2913,7 +2930,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, if (!tock_triggered) { dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", group->handle, group->csg_nr); - queue_work(group->kctx->csf.wq, &group->protm_event_work); + kbase_csf_scheduler_enqueue_protm_event_work(group); } } } @@ -2942,6 +2959,46 @@ static void order_job_irq_clear_with_iface_mem_read(void) dmb(osh); } +static const char *const glb_fatal_status_errors[GLB_FATAL_STATUS_VALUE_COUNT] = { + [GLB_FATAL_STATUS_VALUE_OK] = "OK", + [GLB_FATAL_STATUS_VALUE_ASSERT] = "Firmware assert triggered", + [GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION] = + "Hardware raised an exception firmware did not expect", + [GLB_FATAL_STATUS_VALUE_HANG] = "Firmware hangs and watchdog timer expired", +}; + +/** + * handle_glb_fatal_event() - Handle the GLB fatal event + * + * @kbdev: Instance of GPU device. + * @global_iface: CSF global interface + */ +static void handle_glb_fatal_event(struct kbase_device *kbdev, + const struct kbase_csf_global_iface *const global_iface) +{ + const char *error_string = NULL; + const u32 fatal_status = kbase_csf_firmware_global_output(global_iface, GLB_FATAL_STATUS); + + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + dev_warn(kbdev->dev, "MCU encountered unrecoverable error"); + + if (fatal_status < GLB_FATAL_STATUS_VALUE_COUNT) + error_string = glb_fatal_status_errors[fatal_status]; + else { + dev_err(kbdev->dev, "Invalid GLB_FATAL_STATUS (%u)", fatal_status); + return; + } + + if (fatal_status == GLB_FATAL_STATUS_VALUE_OK) + dev_err(kbdev->dev, "GLB_FATAL_STATUS(OK) must be set with proper reason"); + else { + dev_warn(kbdev->dev, "GLB_FATAL_STATUS: %s", error_string); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } +} + void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) { bool deferred_handling_glb_idle_irq = false; @@ -3016,6 +3073,9 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) deferred_handling_glb_idle_irq = true; } + if (glb_ack & GLB_ACK_FATAL_MASK) + handle_glb_fatal_event(kbdev, global_iface); + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); kbase_csf_scheduler_spin_unlock(kbdev, flags); @@ -3077,6 +3137,11 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) if (kbdev->csf.db_filp) { struct page *page = as_page(kbdev->csf.dummy_db_page); + /* This is a shared dummy sink page for avoiding potential segmentation fault + * to user-side library when a csi is off slot. Additionally, the call is on + * module unload path, so the page can be left uncleared before returning it + * back to kbdev memory pool. + */ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.db_filp); @@ -3108,26 +3173,27 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) return 0; } -void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev) +void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev) { size_t i; - for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) - INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]); - spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock); + atomic_set(&kbdev->csf.pending_gpuq_kicks, false); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) + INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kick_queues[i]); + spin_lock_init(&kbdev->csf.pending_gpuq_kick_queues_lock); } -void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev) +void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev) { size_t i; - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); - for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) { - if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i])) + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) { + if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[i])) dev_warn(kbdev->dev, "Some GPU queue kicks for priority %zu were not handled", i); } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) @@ -3135,6 +3201,11 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) if (kbdev->csf.user_reg.filp) { struct page *page = as_page(kbdev->csf.user_reg.dummy_page); + /* This is a shared dummy page in place of the real USER Register page just + * before the GPU is powered down. Additionally, the call is on module unload + * path, so the page can be left uncleared before returning it back to kbdev + * memory pool. + */ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); fput(kbdev->csf.user_reg.filp); } @@ -3217,17 +3288,17 @@ void kbase_csf_process_queue_kick(struct kbase_queue *queue) if (err == -EBUSY) { retry_kick = true; - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); if (list_empty(&queue->pending_kick_link)) { /* A failed queue kick shall be pushed to the * back of the queue to avoid potential abuse. */ list_add_tail( &queue->pending_kick_link, - &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); } else { - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); WARN_ON(atomic_read(&queue->pending_kick) == 0); } @@ -3250,3 +3321,27 @@ out_release_queue: WARN_ON(atomic_read(&queue->pending_kick) == 0); atomic_dec(&queue->pending_kick); } + +void kbase_csf_process_protm_event_request(struct kbase_queue_group *group) +{ + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; + int err = 0; + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u); + + err = alloc_grp_protected_suspend_buffer_pages(group); + if (!err) { + kbase_csf_scheduler_group_protm_enter(group); + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { + sbuf->alloc_retries++; + /* try again to allocate pages */ + kbase_csf_scheduler_enqueue_protm_event_work(group); + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { + dev_err(group->kctx->kbdev->dev, + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", + group->handle, group->kctx->tgid, group->kctx->id); + report_group_fatal_error(group); + } + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u); +} diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index b2f6ab2..5661363 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -244,6 +244,19 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle); /** + * kbase_csf_queue_group_clear_faults - Re-enable CS Fault reporting. + * + * @kctx: Pointer to the kbase context within which the + * CS Faults for the queues has to be re-enabled. + * @clear_faults: Pointer to the structure which contains details of the + * queues for which the CS Fault reporting has to be re-enabled. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *clear_faults); + +/** * kbase_csf_queue_group_create - Create a GPU command queue group. * * @kctx: Pointer to the kbase context within which the @@ -379,20 +392,20 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); /** - * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling - * GPU queue kicks. + * kbase_csf_pending_gpuq_kick_queues_init - Initialize the data used for handling + * GPU queue kicks. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev); +void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev); /** - * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling - * GPU queue kicks. + * kbase_csf_pending_gpuq_kick_queues_term - De-initialize the data used for handling + * GPU queue kicks. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev); +void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev); /** * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. @@ -546,4 +559,13 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) */ void kbase_csf_process_queue_kick(struct kbase_queue *queue); +/** + * kbase_csf_process_protm_event_request - Handle protected mode switch request + * + * @group: The group to handle protected mode request + * + * Request to switch to protected mode. + */ +void kbase_csf_process_protm_event_request(struct kbase_queue_group *group); + #endif /* _KBASE_CSF_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index fdebf55..d5587e3 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -268,6 +268,7 @@ enum kbase_queue_group_priority { * Shader, L2 and MCU state. * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended. + * @CSF_CSG_TERM_TIMEOUT: Timeout given for a CSG to be terminated. * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond * to a ping from KBase. @@ -290,6 +291,7 @@ enum kbase_timeout_selector { CSF_PM_TIMEOUT, CSF_GPU_RESET_TIMEOUT, CSF_CSG_SUSPEND_TIMEOUT, + CSF_CSG_TERM_TIMEOUT, CSF_FIRMWARE_BOOT_TIMEOUT, CSF_FIRMWARE_PING_TIMEOUT, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, @@ -398,6 +400,7 @@ struct kbase_csf_notification { * @cs_error: Records information about the CS fatal event or * about CS fault event if dump on fault is enabled. * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. + * @clear_faults: Flag to track if the CS fault reporting is enabled for this queue * @extract_ofs: The current EXTRACT offset, this is only updated when handling * the GLB IDLE IRQ if the idle timeout value is non-0 in order * to help detect a queue's true idle status. @@ -441,6 +444,7 @@ struct kbase_queue { u64 cs_error_info; u32 cs_error; bool cs_error_fatal; + bool clear_faults; u64 extract_ofs; u64 saved_cmd_ptr; }; @@ -501,6 +505,8 @@ struct kbase_protected_suspend_buffer { * @compute_max: Maximum number of compute endpoints the group is * allowed to use. * @csi_handlers: Requested CSI exception handler flags for the group. + * @cs_fault_report_enable: Indicated if reporting of CS_FAULTs to + * userspace is enabled. * @tiler_mask: Mask of tiler endpoints the group is allowed to use. * @fragment_mask: Mask of fragment endpoints the group is allowed to use. * @compute_mask: Mask of compute endpoints the group is allowed to use. @@ -531,8 +537,13 @@ struct kbase_protected_suspend_buffer { * @bound_queues: Array of registered queues bound to this queue group. * @doorbell_nr: Index of the hardware doorbell page assigned to the * group. - * @protm_event_work: Work item corresponding to the protected mode entry - * event for this queue. + * @protm_event_work: List item corresponding to the protected mode entry + * event for this queue. This would be handled by + * kbase_csf_scheduler_kthread(). + * @pending_protm_event_work: Indicates that kbase_csf_scheduler_kthread() should + * handle PROTM request for this group. This would + * be set to false when the work is done. This is used + * mainly for synchronisation with group termination. * @protm_pending_bitmap: Bit array to keep a track of CSs that * have pending protected mode entry requests. * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be @@ -569,7 +580,7 @@ struct kbase_queue_group { u8 compute_max; u8 csi_handlers; - + __u8 cs_fault_report_enable; u64 tiler_mask; u64 fragment_mask; u64 compute_mask; @@ -588,7 +599,8 @@ struct kbase_queue_group { struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; int doorbell_nr; - struct work_struct protm_event_work; + struct list_head protm_event_work; + atomic_t pending_protm_event_work; DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); struct kbase_csf_notification error_fatal; @@ -625,6 +637,9 @@ struct kbase_queue_group { * @cmd_seq_num: The sequence number assigned to an enqueued command, * in incrementing order (older commands shall have a * smaller number). + * @kcpu_wq: Work queue to process KCPU commands for all queues in this + * context. This would be used if the context is not prioritised, + * otherwise it would be handled by kbase_csf_scheduler_kthread(). * @jit_lock: Lock to serialise JIT operations. * @jit_cmds_head: A list of the just-in-time memory commands, both * allocate & free, in submission order, protected @@ -640,6 +655,8 @@ struct kbase_csf_kcpu_queue_context { DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); atomic64_t cmd_seq_num; + struct workqueue_struct *kcpu_wq; + struct mutex jit_lock; struct list_head jit_cmds_head; struct list_head jit_blocked_queues; @@ -747,15 +764,7 @@ struct kbase_csf_ctx_heap_reclaim_info { * GPU command queues are idle and at least one of them * is blocked on a sync wait operation. * @num_idle_wait_grps: Length of the @idle_wait_groups list. - * @sync_update_wq_high_prio: high-priority work queue to process the - * SYNC_UPDATE events by sync_set / sync_add - * instruction execution on command streams bound to - * groups of @idle_wait_groups list. This WQ would - * be used if the context is prioritised. - * @sync_update_wq_normal_prio: similar to sync_update_wq_high_prio, but this - * WQ would be used if the context is not - * prioritised. - * @sync_update_work: Work item to process the SYNC_UPDATE events. + * @sync_update_work: List item to process the SYNC_UPDATE event. * @ngrp_to_schedule: Number of groups added for the context to the * 'groups_to_schedule' list of scheduler instance. * @heap_info: Heap reclaim information data of the kctx. As the @@ -768,9 +777,7 @@ struct kbase_csf_scheduler_context { u32 num_runnable_grps; struct list_head idle_wait_groups; u32 num_idle_wait_grps; - struct workqueue_struct *sync_update_wq_high_prio; - struct workqueue_struct *sync_update_wq_normal_prio; - struct work_struct sync_update_work; + struct list_head sync_update_work; u32 ngrp_to_schedule; struct kbase_csf_ctx_heap_reclaim_info heap_info; }; @@ -865,17 +872,16 @@ struct kbase_csf_user_reg_context { * @wq: Dedicated workqueue to process work items corresponding * to the OoM events raised for chunked tiler heaps being * used by GPU command queues, and progress timeout events. - * @kcpu_wq_high_prio: High-priority work queue to process KCPU commands for - * all queues in this context. This WQ would be used if - * the context is prioritised. - * @kcpu_wq_normal_prio: Similar to kcpu_wq_high_prio, but this WQ would be - * used if the context is not prioritised. * @link: Link to this csf context in the 'runnable_kctxs' list of * the scheduler instance * @sched: Object representing the scheduler's context * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. * @user_reg: Collective information to support mapping to USER Register page. + * @pending_sync_update: Indicates that kbase_csf_scheduler_kthread() should + * handle SYNC_UPDATE event for this context. This would + * be set to false when the work is done. This is used + * mainly for synchronisation with context termination. */ struct kbase_csf_context { struct list_head event_pages_head; @@ -888,12 +894,11 @@ struct kbase_csf_context { struct kbase_csf_event event; struct kbase_csf_tiler_heap_context tiler_heaps; struct workqueue_struct *wq; - struct workqueue_struct *kcpu_wq_high_prio; - struct workqueue_struct *kcpu_wq_normal_prio; struct list_head link; struct kbase_csf_scheduler_context sched; struct kbase_csf_cpu_queue_context cpu_queue; struct kbase_csf_user_reg_context user_reg; + atomic_t pending_sync_update; }; /** @@ -936,14 +941,15 @@ struct kbase_csf_csg_slot { * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim * kctx lists inside the CSF device's scheduler. * - * @heap_reclaim: Tiler heap reclaim shrinker object. + * @heap_reclaim: Defines Tiler heap reclaim shrinker object. * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The * lists track the kctxs attached to the reclaim manager. * @unused_pages: Estimated number of unused pages from the @ctxlist array. The * number is indicative for use with reclaim shrinker's count method. */ struct kbase_csf_sched_heap_reclaim_mgr { - struct shrinker heap_reclaim; + DEFINE_KBASE_SHRINKER heap_reclaim; + struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; atomic_t unused_pages; }; @@ -1042,10 +1048,29 @@ struct kbase_csf_mcu_shared_regions { * workqueue items (kernel-provided delayed_work * items do not use hrtimer and for some reason do * not provide sufficiently reliable periodicity). - * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform - * a scheduling tick. - * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform - * a scheduling tock. + * @pending_sync_update_works: Indicates that kbase_csf_scheduler_kthread() + * should handle SYNC_UPDATE events. + * @sync_update_work_ctxs_lock: Lock protecting the list of contexts that + * require handling SYNC_UPDATE events. + * @sync_update_work_ctxs: The list of contexts that require handling + * SYNC_UPDATE events. + * @pending_protm_event_works: Indicates that kbase_csf_scheduler_kthread() + * should handle PROTM requests. + * @protm_event_work_grps_lock: Lock protecting the list of groups that + * have requested protected mode. + * @protm_event_work_grps: The list of groups that have requested + * protected mode. + * @pending_kcpuq_works: Indicates that kbase_csf_scheduler_kthread() + * should process pending KCPU queue works. + * @kcpuq_work_queues_lock: Lock protecting the list of KCPU queues that + * need to be processed. + * @kcpuq_work_queues: The list of KCPU queue that need to be processed + * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should + * perform a scheduling tick. + * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should + * perform a scheduling tock. + * @pending_gpu_idle_work: Indicates that kbase_csf_scheduler_kthread() should + * handle the GPU IDLE event. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would @@ -1063,10 +1088,6 @@ struct kbase_csf_mcu_shared_regions { * This pointer being set doesn't necessarily indicates * that GPU is in protected mode, kbdev->protected_mode * needs to be checked for that. - * @idle_wq: Workqueue for executing GPU idle notification - * handler. - * @gpu_idle_work: Work item for facilitating the scheduler to bring - * the GPU to a low-power mode on becoming idle. * @fast_gpu_idle_handling: Indicates whether to relax many of the checks * normally done in the GPU idle worker. This is * set to true when handling the GLB IDLE IRQ if the @@ -1109,7 +1130,8 @@ struct kbase_csf_mcu_shared_regions { * thread when a queue needs attention. * @kthread_running: Whether the GPU queue submission thread should keep * executing. - * @gpuq_kthread: High-priority thread used to handle GPU queue + * @gpuq_kthread: Dedicated thread primarily used to handle + * latency-sensitive tasks such as GPU queue * submissions. */ struct kbase_csf_scheduler { @@ -1134,14 +1156,22 @@ struct kbase_csf_scheduler { unsigned long last_schedule; atomic_t timer_enabled; struct hrtimer tick_timer; + atomic_t pending_sync_update_works; + spinlock_t sync_update_work_ctxs_lock; + struct list_head sync_update_work_ctxs; + atomic_t pending_protm_event_works; + spinlock_t protm_event_work_grps_lock; + struct list_head protm_event_work_grps; + atomic_t pending_kcpuq_works; + spinlock_t kcpuq_work_queues_lock; + struct list_head kcpuq_work_queues; atomic_t pending_tick_work; atomic_t pending_tock_work; + atomic_t pending_gpu_idle_work; struct delayed_work ping_work; struct kbase_context *top_kctx; struct kbase_queue_group *top_grp; struct kbase_queue_group *active_protm_grp; - struct workqueue_struct *idle_wq; - struct work_struct gpu_idle_work; bool fast_gpu_idle_handling; atomic_t gpu_no_longer_idle; atomic_t non_idle_offslot_grps; @@ -1653,12 +1683,15 @@ struct kbase_csf_user_reg { * @dof: Structure for dump on fault. * @user_reg: Collective information to support the mapping to * USER Register page for user processes. - * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not - * yet processed, categorised by queue group's priority. - * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and - * kbase_queue.pending_kick_link. + * @pending_gpuq_kicks: Indicates that kbase_csf_scheduler_kthread() + * should handle GPU queue kicks. + * @pending_gpuq_kick_queues: Lists of GPU queued that have been kicked but not + * yet processed, categorised by queue group's priority. + * @pending_gpuq_kick_queues_lock: Protect @pending_gpuq_kick_queues and + * kbase_queue.pending_kick_link. * @quirks_ext: Pointer to an allocated buffer containing the firmware * workarounds configuration. + * @pmode_sync_sem: RW Semaphore to prevent MMU operations during P.Mode entrance. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1710,9 +1743,11 @@ struct kbase_csf_device { struct kbase_debug_coresight_device coresight; #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ struct kbase_csf_user_reg user_reg; - struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; - spinlock_t pending_gpuq_kicks_lock; + atomic_t pending_gpuq_kicks; + struct list_head pending_gpuq_kick_queues[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + spinlock_t pending_gpuq_kick_queues_lock; u32 *quirks_ext; + struct rw_semaphore pmode_sync_sem; }; /** diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index aec6e65..20f9348 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1550,7 +1550,6 @@ static bool global_request_complete(struct kbase_device *const kbdev, u32 const unsigned long flags; kbase_csf_scheduler_spin_lock(kbdev, &flags); - if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) == (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask)) complete = true; @@ -1888,6 +1887,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) { struct kbase_device *kbdev = container_of(work, struct kbase_device, csf.firmware_reload_work); + unsigned long flags; int err; dev_info(kbdev->dev, "reloading firmware"); @@ -1906,7 +1906,9 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) return; /* Reboot the firmware */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_csf_firmware_enable_mcu(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) @@ -2253,8 +2255,9 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.glb_init_request_pending = true; + init_rwsem(&kbdev->csf.pmode_sync_sem); mutex_init(&kbdev->csf.reg_lock); - kbase_csf_pending_gpuq_kicks_init(kbdev); + kbase_csf_pending_gpuq_kick_queues_init(kbdev); kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; @@ -2263,7 +2266,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - kbase_csf_pending_gpuq_kicks_term(kbdev); + kbase_csf_pending_gpuq_kick_queues_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } @@ -2772,6 +2775,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); } + int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -2810,8 +2814,6 @@ int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) { int err; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); if (!err) { @@ -2877,6 +2879,7 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + lockdep_assert_held(&kbdev->hwaccess_lock); /* Clear the HALT bit before triggering the boot of MCU firmware */ kbase_csf_firmware_global_input_mask(iface, GLB_REQ, 0, GLB_REQ_HALT_MASK); @@ -3156,6 +3159,9 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev, } if (csf_mapping->phys) { + /* This is on module unload path, so the pages can be left uncleared before + * returning them back to kbdev memory pool. + */ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], csf_mapping->num_pages, csf_mapping->phys, false, false); } diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index 9b6f153..4baf2b7 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -618,6 +618,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev); bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev); #endif + /** * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for * the cold boot case firmware image would diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c index d08686f..030a1eb 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c @@ -367,10 +367,10 @@ int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) */ entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext"); - if (entry_count == -EINVAL) + if (entry_count < 0) entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext"); - if (entry_count == -EINVAL || entry_count == -ENODATA) + if (entry_count < 0) return 0; entry_bytes = (size_t)entry_count * sizeof(u32); diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index d0599d6..a087388 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -857,11 +857,11 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) container_of(work, struct kbase_device, csf.firmware_reload_work); unsigned long flags; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Reboot the firmware */ kbase_csf_firmware_enable_mcu(kbdev); /* Tell MCU state machine to transit to next state */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->csf.firmware_reloaded = true; kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -934,7 +934,7 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n /* add the source flag */ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : - GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER)); + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER)); return reg_val_u32; } @@ -1118,15 +1118,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + init_rwsem(&kbdev->csf.pmode_sync_sem); mutex_init(&kbdev->csf.reg_lock); - kbase_csf_pending_gpuq_kicks_init(kbdev); + kbase_csf_pending_gpuq_kick_queues_init(kbdev); return 0; } void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { - kbase_csf_pending_gpuq_kicks_term(kbdev); + kbase_csf_pending_gpuq_kick_queues_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } @@ -1308,6 +1309,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } + int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout) { const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface; @@ -1370,6 +1372,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Trigger the boot of MCU firmware, Use the AUTO mode as * otherwise on fast reset, to exit protected mode, MCU will * not reboot by itself to enter normal mode. diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index 6c0c8d1..aa34d88 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,13 +39,7 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock); #endif -#ifdef CONFIG_MALI_FENCE_DEBUG #define FENCE_WAIT_TIMEOUT_MS 3000 -#endif - -static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue); - -static void kcpu_queue_process_worker(struct work_struct *data); static int kbase_kcpu_map_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue, struct base_kcpu_command_import_info *import_info, @@ -445,6 +439,16 @@ static void kbase_kcpu_jit_allocate_finish(struct kbase_kcpu_command_queue *queu kfree(cmd->info.jit_alloc.info); } +static void enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_context *const kctx = queue->kctx; + + if (!atomic_read(&kctx->prioritized)) + queue_work(kctx->csf.kcpu_queues.kcpu_wq, &queue->work); + else + kbase_csf_scheduler_enqueue_kcpuq_work(queue); +} + /** * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands * @@ -464,9 +468,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) * kbase_csf_kcpu_queue_context.jit_lock . */ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &blocked_queue->work); + enqueue_kcpuq_work(blocked_queue); } static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, @@ -717,11 +719,8 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; - struct kbase_context *kctx = kcpu_queue->kctx; - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -1322,9 +1321,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct dma_fe fence->seqno); /* Resume kcpu command queue processing. */ - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); } static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, @@ -1360,7 +1357,6 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_ fence_info->fence = NULL; } -#ifdef CONFIG_MALI_FENCE_DEBUG /** * fence_timeout_callback() - Timeout callback function for fence-wait * @@ -1399,9 +1395,7 @@ static void fence_timeout_callback(struct timer_list *timer) kbase_sync_fence_info_get(fence, &info); if (info.status == 1) { - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->work); + enqueue_kcpuq_work(kcpu_queue); } else if (info.status == 0) { dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", FENCE_WAIT_TIMEOUT_MS); @@ -1430,7 +1424,6 @@ static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd) { mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); } -#endif /** * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command @@ -1469,9 +1462,8 @@ static int kbase_kcpu_fence_wait_process(struct kbase_kcpu_command_queue *kcpu_q fence_status = cb_err; if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; -#ifdef CONFIG_MALI_FENCE_DEBUG - fence_wait_timeout_start(kcpu_queue); -#endif + if (IS_ENABLED(CONFIG_MALI_FENCE_DEBUG)) + fence_wait_timeout_start(kcpu_queue); } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); if (!fence_status) { @@ -1692,9 +1684,7 @@ static void fence_signal_timeout_cb(struct timer_list *timer) if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1) fence_signal_timeout_start(kcpu_queue); - queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio : - kctx->csf.kcpu_wq_normal_prio, - &kcpu_queue->timeout_work); + queue_work(kctx->csf.kcpu_queues.kcpu_wq, &kcpu_queue->timeout_work); } } @@ -1973,7 +1963,7 @@ static void kcpu_queue_process_worker(struct work_struct *data) container_of(data, struct kbase_kcpu_command_queue, work); mutex_lock(&queue->lock); - kcpu_queue_process(queue, false); + kbase_csf_kcpu_queue_process(queue, false); mutex_unlock(&queue->lock); } @@ -2006,7 +1996,7 @@ static int delete_queue(struct kbase_context *kctx, u32 id) /* Drain the remaining work for this queue first and go past * all the waits. */ - kcpu_queue_process(queue, true); + kbase_csf_kcpu_queue_process(queue, true); /* All commands should have been processed */ WARN_ON(queue->num_pending_cmds); @@ -2022,11 +2012,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id) mutex_unlock(&queue->lock); cancel_work_sync(&queue->timeout_work); + + /* + * Drain a pending request to process this queue in + * kbase_csf_scheduler_kthread() if any. By this point the + * queue would be empty so this would be a no-op. + */ + kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev, + &queue->pending_kick); + cancel_work_sync(&queue->work); mutex_destroy(&queue->lock); - kfree(queue); + vfree(queue); } else { dev_dbg(kctx->kbdev->dev, "Attempt to delete a non-existent KCPU queue"); mutex_unlock(&kctx->csf.kcpu_queues.lock); @@ -2079,7 +2078,7 @@ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(struct kbase_device *kbde KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); } -static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) +void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue) { struct kbase_device *kbdev = queue->kctx->kbdev; bool process_next = true; @@ -2199,10 +2198,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); meta = kbase_sticky_resource_acquire(queue->kctx, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (meta == NULL) { queue->has_error = true; @@ -2219,10 +2218,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); ret = kbase_sticky_resource_release(queue->kctx, NULL, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (!ret) { queue->has_error = true; @@ -2240,10 +2239,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev, queue); - kbase_gpu_vm_lock(queue->kctx); + kbase_gpu_vm_lock_with_pmode_sync(queue->kctx); ret = kbase_sticky_resource_release_force(queue->kctx, NULL, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx); if (!ret) { queue->has_error = true; @@ -2642,7 +2641,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } queue->num_pending_cmds += enq->nr_commands; - kcpu_queue_process(queue, false); + kbase_csf_kcpu_queue_process(queue, false); } out: @@ -2653,23 +2652,14 @@ out: int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) { - kctx->csf.kcpu_wq_high_prio = alloc_workqueue("mali_kcpu_wq_%i_high_prio", - WQ_UNBOUND | WQ_HIGHPRI, 0, kctx->tgid); - if (kctx->csf.kcpu_wq_high_prio == NULL) { + kctx->csf.kcpu_queues.kcpu_wq = + alloc_workqueue("mali_kcpu_wq_%i_%i", 0, 0, kctx->tgid, kctx->id); + if (kctx->csf.kcpu_queues.kcpu_wq == NULL) { dev_err(kctx->kbdev->dev, "Failed to initialize KCPU queue high-priority workqueue"); return -ENOMEM; } - kctx->csf.kcpu_wq_normal_prio = - alloc_workqueue("mali_kcpu_wq_%i_normal_prio", 0, 0, kctx->tgid); - if (kctx->csf.kcpu_wq_normal_prio == NULL) { - dev_err(kctx->kbdev->dev, - "Failed to initialize KCPU queue normal-priority workqueue"); - destroy_workqueue(kctx->csf.kcpu_wq_high_prio); - return -ENOMEM; - } - mutex_init(&kctx->csf.kcpu_queues.lock); return 0; @@ -2688,8 +2678,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) mutex_destroy(&kctx->csf.kcpu_queues.lock); - destroy_workqueue(kctx->csf.kcpu_wq_normal_prio); - destroy_workqueue(kctx->csf.kcpu_wq_high_prio); + destroy_workqueue(kctx->csf.kcpu_queues.kcpu_wq); } KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); @@ -2699,15 +2688,42 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, return delete_queue(kctx, (u32)del->id); } +static struct kbase_kcpu_dma_fence_meta * +kbase_csf_kcpu_queue_metadata_new(struct kbase_context *kctx, u64 fence_context) +{ + int n; + struct kbase_kcpu_dma_fence_meta *metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + + if (!metadata) + goto early_ret; + + *metadata = (struct kbase_kcpu_dma_fence_meta){ + .kbdev = kctx->kbdev, + .kctx_id = kctx->id, + }; + + /* Please update MAX_TIMELINE_NAME macro when making changes to the string. */ + n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu", + kctx->kbdev->id, kctx->tgid, kctx->id, fence_context); + if (WARN_ON(n >= MAX_TIMELINE_NAME)) { + kfree(metadata); + metadata = NULL; + goto early_ret; + } + + kbase_refcount_set(&metadata->refcount, 1); + +early_ret: + return metadata; +} +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_csf_kcpu_queue_metadata_new, ERRNO_NULL); + int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq) { struct kbase_kcpu_command_queue *queue; + struct kbase_kcpu_dma_fence_meta *metadata; int idx; - int n; int ret = 0; -#if IS_ENABLED(CONFIG_SYNC_FILE) - struct kbase_kcpu_dma_fence_meta *metadata; -#endif /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2727,54 +2743,48 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu goto out; } - queue = kzalloc(sizeof(*queue), GFP_KERNEL); - + queue = vzalloc(sizeof(*queue)); if (!queue) { ret = -ENOMEM; goto out; } - bitmap_set(kctx->csf.kcpu_queues.in_use, (unsigned int)idx, 1); - kctx->csf.kcpu_queues.array[idx] = queue; - mutex_init(&queue->lock); - queue->kctx = kctx; - queue->start_offset = 0; - queue->num_pending_cmds = 0; + *queue = (struct kbase_kcpu_command_queue) + { + .kctx = kctx, .start_offset = 0, .num_pending_cmds = 0, .enqueue_failed = false, + .command_started = false, .has_error = false, .id = idx, #if IS_ENABLED(CONFIG_SYNC_FILE) - queue->fence_context = dma_fence_context_alloc(1); - queue->fence_seqno = 0; - queue->fence_wait_processed = false; + .fence_context = dma_fence_context_alloc(1), .fence_seqno = 0, + .fence_wait_processed = false, +#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ + }; - metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); - if (!metadata) { - kfree(queue); - ret = -ENOMEM; - goto out; - } + mutex_init(&queue->lock); + INIT_WORK(&queue->work, kcpu_queue_process_worker); + INIT_LIST_HEAD(&queue->high_prio_work); + atomic_set(&queue->pending_kick, 0); + INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); + INIT_LIST_HEAD(&queue->jit_blocked); - metadata->kbdev = kctx->kbdev; - metadata->kctx_id = kctx->id; - n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu", - kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); - if (WARN_ON(n >= MAX_TIMELINE_NAME)) { - kfree(queue); - kfree(metadata); - ret = -EINVAL; - goto out; + if (IS_ENABLED(CONFIG_SYNC_FILE)) { + metadata = kbase_csf_kcpu_queue_metadata_new(kctx, queue->fence_context); + if (!metadata) { + vfree(queue); + ret = -ENOMEM; + goto out; + } + + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); + atomic_set(&queue->fence_signal_pending_cnt, 0); + kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); } - kbase_refcount_set(&metadata->refcount, 1); - queue->metadata = metadata; - atomic_inc(&kctx->kbdev->live_fence_metadata); -#endif /* CONFIG_SYNC_FILE */ - queue->enqueue_failed = false; - queue->command_started = false; - INIT_LIST_HEAD(&queue->jit_blocked); - queue->has_error = false; - INIT_WORK(&queue->work, kcpu_queue_process_worker); - INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker); - queue->id = idx; + if (IS_ENABLED(CONFIG_MALI_FENCE_DEBUG)) + kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); + bitmap_set(kctx->csf.kcpu_queues.in_use, (unsigned int)idx, 1); + kctx->csf.kcpu_queues.array[idx] = queue; newq->id = idx; /* Fire the tracepoint with the mutex held to enforce correct ordering @@ -2784,14 +2794,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu queue->num_pending_cmds); KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0); -#ifdef CONFIG_MALI_FENCE_DEBUG - kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); -#endif - -#if IS_ENABLED(CONFIG_SYNC_FILE) - atomic_set(&queue->fence_signal_pending_cnt, 0); - kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); -#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index a19847e..291509b 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -243,7 +243,19 @@ struct kbase_kcpu_command { * @work: struct work_struct which contains a pointer to * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; - * part of kernel API for processing workqueues + * part of kernel API for processing workqueues. + * This would be used if the context is not + * prioritised, otherwise it would be handled by + * kbase_csf_scheduler_kthread(). + * @high_prio_work: A counterpart to @work, this queue would be + * added to a list to be processed by + * kbase_csf_scheduler_kthread() if it is + * prioritised. + * @pending_kick: Indicates that kbase_csf_scheduler_kthread() + * should re-evaluate pending commands for this + * queue. This would be set to false when the work + * is done. This is used mainly for + * synchronisation with queue termination. * @timeout_work: struct work_struct which contains a pointer to the * function which handles post-timeout actions * queue when a fence signal timeout occurs. @@ -287,6 +299,8 @@ struct kbase_kcpu_command_queue { struct kbase_context *kctx; struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; struct work_struct work; + struct list_head high_prio_work; + atomic_t pending_kick; struct work_struct timeout_work; u8 start_offset; u8 id; @@ -299,9 +313,7 @@ struct kbase_kcpu_command_queue { bool command_started; struct list_head jit_blocked; bool has_error; -#ifdef CONFIG_MALI_FENCE_DEBUG struct timer_list fence_timeout; -#endif /* CONFIG_MALI_FENCE_DEBUG */ #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; #endif /* CONFIG_SYNC_FILE */ @@ -335,6 +347,18 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del); /** + * kbase_csf_kcpu_queue_process - Proces pending KCPU queue commands + * + * @queue: The queue to process pending commands for + * @drain_queue: Whether to skip all blocking commands in the queue. + * This is expected to be set to true on queue + * termination. + * + * Return: 0 if successful or a negative error code on failure. + */ +void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue); + +/** * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command * queue. * diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h index d01f307..9a7c6e4 100644 --- a/mali_kbase/csf/mali_kbase_csf_registers.h +++ b/mali_kbase/csf/mali_kbase_csf_registers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -250,7 +250,7 @@ #define GLB_ACK 0x0000 /* () Global acknowledge */ #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ -#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ +#define GLB_FATAL_STATUS 0x0010 /* () Global fatal error status */ #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ #define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ @@ -1422,6 +1422,12 @@ #define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK)) +#define GLB_ACK_FATAL_SHIFT GPU_U(27) +#define GLB_ACK_FATAL_MASK (GPU_U(0x1) << GLB_ACK_FATAL_SHIFT) +#define GLB_ACK_FATAL_GET(reg_val) (((reg_val)&GLB_ACK_FATAL_MASK) >> GLB_ACK_FATAL_SHIFT) +#define GLB_ACK_FATAL_SET(reg_val, value) \ + (~(~(reg_val) | GLB_ACK_FATAL_MASK) | \ + (((value) << GLB_ACK_FATAL_SHIFT) & GLB_ACK_FATAL_MASK)) #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) #define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) \ @@ -1822,6 +1828,20 @@ (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) +/* GLB_FATAL_STATUS register */ +#define GLB_FATAL_STATUS_VALUE_SHIFT GPU_U(0) +#define GLB_FATAL_STATUS_VALUE_MASK (GPU_U(0xFFFFFFFF) << GLB_FATAL_STATUS_VALUE_SHIFT) +#define GLB_FATAL_STATUS_VALUE_GET(reg_val) \ + (((reg_val)&GLB_FATAL_STATUS_VALUE_MASK) >> GLB_FATAL_STATUS_VALUE_SHIFT) + +enum glb_fatal_status { + GLB_FATAL_STATUS_VALUE_OK, + GLB_FATAL_STATUS_VALUE_ASSERT, + GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION, + GLB_FATAL_STATUS_VALUE_HANG, + GLB_FATAL_STATUS_VALUE_COUNT +}; + /* GLB_DEBUG_ACK register */ #define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) #define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c index 240397e..b07cc96 100644 --- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c +++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,8 +224,11 @@ static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, int err_du static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) { + unsigned long flags; + kbase_io_history_dump(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)), @@ -251,6 +254,7 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG))); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /** @@ -396,6 +400,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini */ if (likely(firmware_inited)) kbase_csf_scheduler_reset(kbdev); + cancel_work_sync(&kbdev->csf.firmware_reload_work); dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); @@ -403,6 +408,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent); + if (ret == SOFT_RESET_FAILED) { dev_err(kbdev->dev, "Soft-reset failed"); goto err; @@ -490,6 +496,13 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data) bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_gpu_lost(kbdev)) { + /* GPU access has been removed, reset will be done by Arbiter instead */ + return false; + } +#endif + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index 7fba656..5e215ca 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,7 @@ #include "mali_kbase_csf_tiler_heap_reclaim.h" #include "mali_kbase_csf_mcu_shared_reg.h" #include <linux/version_compat_defs.h> +#include <hwcnt/mali_kbase_hwcnt_context.h> #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) #include <mali_kbase_gpu_metrics.h> #include <csf/mali_kbase_csf_trace_buffer.h> @@ -84,7 +85,8 @@ scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev, struct kbase_queue_group *const group); static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler); static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); -static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask); +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask, + bool reset); static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend); static void schedule_in_cycle(struct kbase_queue_group *group, bool force); static bool queue_group_scheduled_locked(struct kbase_queue_group *group); @@ -788,7 +790,8 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) { atomic_set(&scheduler->gpu_no_longer_idle, false); - queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work); + atomic_inc(&scheduler->pending_gpu_idle_work); + complete(&scheduler->kthread_signal); } bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) @@ -800,7 +803,8 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&scheduler->interrupt_lock); - can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); + can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev) && + !kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, (((u64)can_suspend_on_idle) << 32)); @@ -2410,6 +2414,11 @@ static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) atomic_set(&scheduler->pending_tock_work, false); } +static void cancel_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) +{ + atomic_set(&scheduler->pending_gpu_idle_work, false); +} + static void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, struct kbase_queue_group *group, enum kbase_csf_group_state run_state) @@ -3131,8 +3140,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, static int term_group_sync(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; - const unsigned int fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - long remaining = kbase_csf_timeout_in_jiffies(fw_timeout_ms); + const unsigned int group_term_timeout_ms = + kbase_get_timeout_ms(kbdev, CSF_CSG_TERM_TIMEOUT); + long remaining = kbase_csf_timeout_in_jiffies(group_term_timeout_ms); int err = 0; term_csg_slot(group); @@ -3148,7 +3158,7 @@ static int term_group_sync(struct kbase_queue_group *group) dev_warn( kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms, group->handle, + kbase_backend_get_cycle_cnt(kbdev), group_term_timeout_ms, group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) error_type = DF_PING_REQUEST_TIMEOUT; @@ -4138,7 +4148,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, * entry to protected mode happens with a memory region being locked and * the same region is then accessed by the GPU in protected mode. */ - mutex_lock(&kbdev->mmu_hw_mutex); + down_write(&kbdev->csf.pmode_sync_sem); spin_lock_irqsave(&scheduler->interrupt_lock, flags); /* Check if the previous transition to enter & exit the protected @@ -4204,7 +4214,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); err = kbase_csf_wait_protected_mode_enter(kbdev); - mutex_unlock(&kbdev->mmu_hw_mutex); + up_write(&kbdev->csf.pmode_sync_sem); if (err) schedule_actions_trigger_df( @@ -4219,7 +4229,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + up_write(&kbdev->csf.pmode_sync_sem); } /** @@ -4797,8 +4807,9 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; + int ret; - int ret = suspend_active_queue_groups(kbdev, slot_mask); + ret = suspend_active_queue_groups(kbdev, slot_mask, false); if (unlikely(ret)) { const int csg_nr = ffs(slot_mask[0]) - 1; @@ -4988,14 +4999,14 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) return true; } -static void gpu_idle_worker(struct work_struct *work) +static void gpu_idle_worker(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.gpu_idle_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool scheduler_is_idle_suspendable = false; bool all_groups_suspended = false; + WARN_ON_ONCE(atomic_read(&scheduler->pending_gpu_idle_work) == 0); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ @@ -5005,7 +5016,7 @@ static void gpu_idle_worker(struct work_struct *work) dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, __ENCODE_KTRACE_INFO(true, false, false)); - return; + goto exit; } kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); @@ -5014,7 +5025,7 @@ static void gpu_idle_worker(struct work_struct *work) if (unlikely(scheduler->state == SCHED_BUSY)) { mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - return; + goto exit; } #endif @@ -5039,6 +5050,9 @@ static void gpu_idle_worker(struct work_struct *work) __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable, all_groups_suspended)); #undef __ENCODE_KTRACE_INFO + +exit: + atomic_dec(&scheduler->pending_gpu_idle_work); } static int scheduler_prepare(struct kbase_device *kbdev) @@ -5662,7 +5676,9 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask) + +static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask, + bool reset) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; @@ -5675,12 +5691,12 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long struct kbase_queue_group *group = scheduler->csg_slots[slot_num].resident_group; if (group) { - suspend_queue_group(group); + suspend_queue_group(group); set_bit(slot_num, slot_mask); } } - ret = wait_csg_slots_suspend(kbdev, slot_mask); + ret = wait_csg_slots_suspend(kbdev, slot_mask); return ret; } @@ -5693,7 +5709,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) mutex_lock(&scheduler->lock); - ret = suspend_active_queue_groups(kbdev, slot_mask); + ret = suspend_active_queue_groups(kbdev, slot_mask, true); if (ret) { dev_warn( @@ -5830,9 +5846,9 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); /* Cancel any potential queued delayed work(s) */ - cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); cancel_tick_work(scheduler); cancel_tock_work(scheduler); + cancel_gpu_idle_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); mutex_lock(&scheduler->lock); @@ -5860,12 +5876,13 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) void kbase_csf_scheduler_reset(struct kbase_device *kbdev) { struct kbase_context *kctx; - WARN_ON(!kbase_reset_gpu_is_active(kbdev)); KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); - kbase_debug_csf_fault_wait_completion(kbdev); + if (kbase_reset_gpu_is_active(kbdev)) + kbase_debug_csf_fault_wait_completion(kbdev); + if (scheduler_handle_reset_in_protected_mode(kbdev) && !suspend_active_queue_groups_on_reset(kbdev)) { @@ -6453,8 +6470,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) * check_group_sync_update_worker() - Check the sync wait condition for all the * blocked queue groups * - * @work: Pointer to the context-specific work item for evaluating the wait - * condition for all the queue groups in idle_wait_groups list. + * @kctx: The context to evaluate the wait condition for all the queue groups + * in idle_wait_groups list. * * This function checks the gpu queues of all the groups present in both * idle_wait_groups list of a context and all on slot idle groups (if GPU @@ -6464,27 +6481,14 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) * runnable groups so that Scheduler can consider scheduling the group * in next tick or exit protected mode. */ -static void check_group_sync_update_worker(struct work_struct *work) +static void check_group_sync_update_worker(struct kbase_context *kctx) { - struct kbase_context *const kctx = - container_of(work, struct kbase_context, csf.sched.sync_update_work); struct kbase_device *const kbdev = kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool sync_updated = false; mutex_lock(&scheduler->lock); -#if IS_ENABLED(CONFIG_DEBUG_FS) - if (unlikely(scheduler->state == SCHED_BUSY)) { - queue_work(atomic_read(&kctx->prioritized) ? - kctx->csf.sched.sync_update_wq_high_prio : - kctx->csf.sched.sync_update_wq_normal_prio, - &kctx->csf.sched.sync_update_work); - mutex_unlock(&scheduler->lock); - return; - } -#endif - KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); if (kctx->csf.sched.num_idle_wait_grps != 0) { struct kbase_queue_group *group, *temp; @@ -6522,13 +6526,10 @@ static void check_group_sync_update_worker(struct work_struct *work) static enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) { struct kbase_context *const kctx = param; - struct workqueue_struct *wq = atomic_read(&kctx->prioritized) ? - kctx->csf.sched.sync_update_wq_high_prio : - kctx->csf.sched.sync_update_wq_normal_prio; KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); - queue_work(wq, &kctx->csf.sched.sync_update_work); + kbase_csf_scheduler_enqueue_sync_update_work(kctx); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -6539,6 +6540,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) int err; struct kbase_device *kbdev = kctx->kbdev; + WARN_ON_ONCE(!kbdev->csf.scheduler.kthread_running); + #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) err = gpu_metrics_ctx_init(kctx); if (err) @@ -6551,25 +6554,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); - kctx->csf.sched.sync_update_wq_high_prio = alloc_ordered_workqueue( - "mali_sync_wq_%i_high_prio", WQ_UNBOUND | WQ_HIGHPRI, kctx->tgid); - if (kctx->csf.sched.sync_update_wq_high_prio == NULL) { - dev_err(kbdev->dev, - "Failed to initialize scheduler context high-priority workqueue"); - err = -ENOMEM; - goto alloc_high_prio_wq_failed; - } - - kctx->csf.sched.sync_update_wq_normal_prio = - alloc_ordered_workqueue("mali_sync_wq_%i_normal_prio", 0, kctx->tgid); - if (kctx->csf.sched.sync_update_wq_normal_prio == NULL) { - dev_err(kbdev->dev, - "Failed to initialize scheduler context normal-priority workqueue"); - err = -ENOMEM; - goto alloc_normal_prio_wq_failed; - } - - INIT_WORK(&kctx->csf.sched.sync_update_work, check_group_sync_update_worker); + INIT_LIST_HEAD(&kctx->csf.sched.sync_update_work); kbase_csf_tiler_heap_reclaim_ctx_init(kctx); @@ -6583,10 +6568,6 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) return err; event_wait_add_failed: - destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio); -alloc_normal_prio_wq_failed: - destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio); -alloc_high_prio_wq_failed: kbase_ctx_sched_remove_ctx(kctx); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) gpu_metrics_ctx_term(kctx); @@ -6597,9 +6578,10 @@ alloc_high_prio_wq_failed: void kbase_csf_scheduler_context_term(struct kbase_context *kctx) { kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); - cancel_work_sync(&kctx->csf.sched.sync_update_work); - destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio); - destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio); + + /* Drain a pending SYNC_UPDATE work if any */ + kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev, + &kctx->csf.pending_sync_update); kbase_ctx_sched_remove_ctx(kctx); #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) @@ -6607,53 +6589,157 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ } +static void handle_pending_sync_update_works(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_context *sync_update_ctx; + + if (atomic_cmpxchg(&scheduler->pending_sync_update_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags); + sync_update_ctx = NULL; + if (!list_empty(&scheduler->sync_update_work_ctxs)) { + sync_update_ctx = list_first_entry(&scheduler->sync_update_work_ctxs, + struct kbase_context, + csf.sched.sync_update_work); + list_del_init(&sync_update_ctx->csf.sched.sync_update_work); + } + spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags); + + if (sync_update_ctx != NULL) { + WARN_ON_ONCE(atomic_read(&sync_update_ctx->csf.pending_sync_update) == 0); + check_group_sync_update_worker(sync_update_ctx); + atomic_dec(&sync_update_ctx->csf.pending_sync_update); + } + } while (sync_update_ctx != NULL); +} + +static void handle_pending_protm_requests(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_queue_group *protm_grp; + + if (atomic_cmpxchg(&scheduler->pending_protm_event_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags); + protm_grp = NULL; + if (!list_empty(&scheduler->protm_event_work_grps)) { + protm_grp = list_first_entry(&scheduler->protm_event_work_grps, + struct kbase_queue_group, protm_event_work); + list_del_init(&protm_grp->protm_event_work); + } + spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags); + + if (protm_grp != NULL) { + WARN_ON_ONCE(atomic_read(&protm_grp->pending_protm_event_work) == 0); + kbase_csf_process_protm_event_request(protm_grp); + atomic_dec(&protm_grp->pending_protm_event_work); + } + } while (protm_grp != NULL); +} + +static void handle_pending_kcpuq_commands(struct kbase_csf_scheduler *scheduler) +{ + struct kbase_kcpu_command_queue *kcpuq; + + if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, true, false) == false) + return; + + do { + unsigned long flags; + + spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags); + kcpuq = NULL; + if (!list_empty(&scheduler->kcpuq_work_queues)) { + kcpuq = list_first_entry(&scheduler->kcpuq_work_queues, + struct kbase_kcpu_command_queue, high_prio_work); + list_del_init(&kcpuq->high_prio_work); + } + spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags); + + if (kcpuq != NULL) { + WARN_ON_ONCE(atomic_read(&kcpuq->pending_kick) == 0); + + mutex_lock(&kcpuq->lock); + kbase_csf_kcpu_queue_process(kcpuq, false); + mutex_unlock(&kcpuq->lock); + + atomic_dec(&kcpuq->pending_kick); + } + } while (kcpuq != NULL); +} + +static void handle_pending_queue_kicks(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_queue *queue; + + if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, true, false) == false) + return; + + do { + u8 prio; + + spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock); + queue = NULL; + for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { + if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[prio])) { + queue = list_first_entry(&kbdev->csf.pending_gpuq_kick_queues[prio], + struct kbase_queue, pending_kick_link); + list_del_init(&queue->pending_kick_link); + break; + } + } + spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock); + + if (queue != NULL) { + WARN_ONCE( + prio != queue->group_priority, + "Queue %pK has priority %u but instead its kick was handled at priority %u", + (void *)queue, queue->group_priority, prio); + WARN_ON_ONCE(atomic_read(&queue->pending_kick) == 0); + + kbase_csf_process_queue_kick(queue); + + /* Perform a scheduling tock for high-priority queue groups if + * required. + */ + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); + if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && + atomic_read(&scheduler->pending_tock_work)) + schedule_on_tock(kbdev); + } + } while (queue != NULL); +} + static int kbase_csf_scheduler_kthread(void *data) { struct kbase_device *const kbdev = data; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; while (scheduler->kthread_running) { - struct kbase_queue *queue; - if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0) continue; reinit_completion(&scheduler->kthread_signal); - /* Iterate through queues with pending kicks */ - do { - u8 prio; - - spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); - queue = NULL; - for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { - if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) { - queue = list_first_entry( - &kbdev->csf.pending_gpuq_kicks[prio], - struct kbase_queue, pending_kick_link); - list_del_init(&queue->pending_kick_link); - break; - } - } - spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); - - if (queue != NULL) { - WARN_ONCE( - prio != queue->group_priority, - "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu", - (void *)queue, queue->group_priority, prio); - - kbase_csf_process_queue_kick(queue); + /* + * The order in which these requests are handled is based on + * how they would influence each other's decisions. As a + * result, the tick & tock requests must be handled after all + * other requests, but before the GPU IDLE work. + */ - /* Perform a scheduling tock for high-priority queue groups if - * required. - */ - BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); - BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); - if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && - atomic_read(&scheduler->pending_tock_work)) - schedule_on_tock(kbdev); - } - } while (queue != NULL); + handle_pending_sync_update_works(scheduler); + handle_pending_protm_requests(scheduler); + handle_pending_kcpuq_commands(scheduler); + handle_pending_queue_kicks(kbdev); /* Check if we need to perform a scheduling tick/tock. A tick * event shall override a tock event but not vice-versa. @@ -6665,6 +6751,10 @@ static int kbase_csf_scheduler_kthread(void *data) schedule_on_tock(kbdev); } + /* Drain pending GPU idle works */ + while (atomic_read(&scheduler->pending_gpu_idle_work) > 0) + gpu_idle_worker(kbdev); + dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); wake_up_all(&kbdev->csf.event_wait); } @@ -6694,7 +6784,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) scheduler->kthread_running = true; scheduler->gpuq_kthread = kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread"); - if (!scheduler->gpuq_kthread) { + if (IS_ERR_OR_NULL(scheduler->gpuq_kthread)) { kfree(scheduler->csg_slots); scheduler->csg_slots = NULL; @@ -6734,12 +6824,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) atomic_set(&scheduler->timer_enabled, true); - scheduler->idle_wq = alloc_ordered_workqueue("csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); - if (!scheduler->idle_wq) { - dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); - return -ENOMEM; - } - INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); mutex_init(&scheduler->lock); @@ -6757,20 +6841,30 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); - INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); scheduler->tick_timer.function = tick_timer_callback; - kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); + atomic_set(&scheduler->pending_sync_update_works, false); + spin_lock_init(&scheduler->sync_update_work_ctxs_lock); + INIT_LIST_HEAD(&scheduler->sync_update_work_ctxs); + atomic_set(&scheduler->pending_protm_event_works, false); + spin_lock_init(&scheduler->protm_event_work_grps_lock); + INIT_LIST_HEAD(&scheduler->protm_event_work_grps); + atomic_set(&scheduler->pending_kcpuq_works, false); + spin_lock_init(&scheduler->kcpuq_work_queues_lock); + INIT_LIST_HEAD(&scheduler->kcpuq_work_queues); + atomic_set(&scheduler->pending_tick_work, false); + atomic_set(&scheduler->pending_tock_work, false); + atomic_set(&scheduler->pending_gpu_idle_work, 0); - return 0; + return kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); } void kbase_csf_scheduler_term(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - if (scheduler->gpuq_kthread) { + if (!IS_ERR_OR_NULL(scheduler->gpuq_kthread)) { scheduler->kthread_running = false; complete(&scheduler->kthread_signal); kthread_stop(scheduler->gpuq_kthread); @@ -6784,7 +6878,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) * to be active at the time of Driver unload. */ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); - flush_work(&kbdev->csf.scheduler.gpu_idle_work); mutex_lock(&kbdev->csf.scheduler.lock); if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { @@ -6811,9 +6904,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { - if (kbdev->csf.scheduler.idle_wq) - destroy_workqueue(kbdev->csf.scheduler.idle_wq); - kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); mutex_destroy(&kbdev->csf.scheduler.lock); } @@ -7096,6 +7186,65 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) return 0; } +void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx) +{ + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags); + if (list_empty(&kctx->csf.sched.sync_update_work)) { + list_add_tail(&kctx->csf.sched.sync_update_work, &scheduler->sync_update_work_ctxs); + atomic_inc(&kctx->csf.pending_sync_update); + if (atomic_cmpxchg(&scheduler->pending_sync_update_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags); +} + +void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group) +{ + struct kbase_context *const kctx = group->kctx; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags); + if (list_empty(&group->protm_event_work)) { + list_add_tail(&group->protm_event_work, &scheduler->protm_event_work_grps); + atomic_inc(&group->pending_protm_event_work); + if (atomic_cmpxchg(&scheduler->pending_protm_event_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags); +} + +void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_csf_scheduler *const scheduler = &queue->kctx->kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags); + if (list_empty(&queue->high_prio_work)) { + list_add_tail(&queue->high_prio_work, &scheduler->kcpuq_work_queues); + atomic_inc(&queue->pending_kick); + if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, false, true) == false) + complete(&scheduler->kthread_signal); + } + spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags); +} + +void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev, + atomic_t *pending) +{ + /* + * Signal kbase_csf_scheduler_kthread() to allow for the + * eventual completion of the current iteration. Once the work is + * done, the event_wait wait queue shall be signalled. + */ + + complete(&kbdev->csf.scheduler.kthread_signal); + wait_event(kbdev->csf.event_wait, atomic_read(pending) == 0); +} + void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev) { u32 csg_nr; diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h index 5047092..2200bdf 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.h +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h @@ -235,7 +235,8 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); * No explicit re-initialization is done for CSG & CS interface I/O pages; * instead, that happens implicitly on firmware reload. * - * Should be called only after initiating the GPU reset. + * Should be called either after initiating the GPU reset or when MCU reset is + * expected to follow such as GPU_LOST case. */ void kbase_csf_scheduler_reset(struct kbase_device *kbdev); @@ -488,6 +489,48 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) } /** + * kbase_csf_scheduler_enqueue_sync_update_work() - Add a context to the list + * of contexts to handle + * SYNC_UPDATE events. + * + * @kctx: The context to handle SYNC_UPDATE event + * + * This function wakes up kbase_csf_scheduler_kthread() to handle pending + * SYNC_UPDATE events for all contexts. + */ +void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx); + +/** + * kbase_csf_scheduler_enqueue_protm_event_work() - Add a group to the list + * of groups to handle + * PROTM requests. + * + * @group: The group to handle protected mode request + * + * This function wakes up kbase_csf_scheduler_kthread() to handle pending + * protected mode requests for all groups. + */ +void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_enqueue_kcpuq_work() - Wake up kbase_csf_scheduler_kthread() to process + * pending commands for a KCPU queue. + * + * @queue: The queue to process pending commands for + */ +void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue); + +/** + * kbase_csf_scheduler_wait_for_kthread_pending_work - Wait until a pending work has completed in + * kbase_csf_scheduler_kthread(). + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface + * @pending: The work to wait for + */ +void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev, + atomic_t *pending); + +/** * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick * * @kbdev: Pointer to the device diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 2d148ee..51d665f 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -218,7 +218,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, if (WARN_ON(!list_empty(&chunk->link))) return; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. @@ -231,7 +231,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif kbase_mem_free_region(kctx, chunk->region); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); kfree(chunk); } @@ -1058,6 +1058,7 @@ static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 c struct kbase_csf_tiler_heap_chunk *chunk = NULL; lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); chunk = find_chunk(heap, chunk_gpu_va); if (unlikely(!chunk)) { diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c index 2fc19de..df4feb7 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -331,8 +331,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_d static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_device *kbdev = - container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER( + s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc); } @@ -340,8 +340,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_device *kbdev = - container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER( + s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc); } @@ -352,11 +352,17 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); } -void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) +int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim; u8 prio; + struct shrinker *reclaim; + + reclaim = + KBASE_INIT_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, "mali-csf-tiler-heap"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, reclaim); for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; prio++) @@ -367,13 +373,10 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) reclaim->seeks = HEAP_SHRINKER_SEEKS; reclaim->batch = HEAP_SHRINKER_BATCH; -#if !defined(CONFIG_MALI_VECTOR_DUMP) -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(reclaim); -#else - register_shrinker(reclaim, "mali-csf-tiler-heap"); -#endif -#endif + if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)) + KBASE_REGISTER_SHRINKER(reclaim, "mali-csf-tiler-heap", kbdev); + + return 0; } void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) @@ -381,9 +384,8 @@ void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; u8 prio; -#if !defined(CONFIG_MALI_VECTOR_DUMP) - unregister_shrinker(&scheduler->reclaim_mgr.heap_reclaim); -#endif + if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)) + KBASE_UNREGISTER_SHRINKER(scheduler->reclaim_mgr.heap_reclaim); for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; prio++) diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h index 7880de0..d41b7ba 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h @@ -66,8 +66,10 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); * @kbdev: Pointer to the device. * * This function must be called only when a kbase device is initialized. + * + * Return: 0 if issuing reclaim_mgr init was successful, otherwise an error code. */ -void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); +int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); /** * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger. diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c index 06163e5..4ee64e1 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c @@ -152,13 +152,22 @@ static bool tl_reader_overflow_check(struct kbase_csf_tl_reader *self, u16 event * * Reset the reader to the default state, i.e. set all the * mutable fields to zero. + * + * NOTE: this function expects the irq spinlock to be held. */ static void tl_reader_reset(struct kbase_csf_tl_reader *self) { + lockdep_assert_held(&self->read_lock); + self->got_first_event = false; self->is_active = false; self->expected_event_id = 0; self->tl_header.btc = 0; + + /* There might be data left in the trace buffer from the previous + * tracing session. We don't want it to leak into this session. + */ + kbase_csf_firmware_trace_buffer_discard_all(self->trace_buffer); } int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) @@ -325,21 +334,16 @@ static int tl_reader_update_enable_bit(struct kbase_csf_tl_reader *self, bool va void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream) { - self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; + *self = (struct kbase_csf_tl_reader){ + .timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT, + .stream = stream, + .kbdev = NULL, /* This will be initialized by tl_reader_init_late() */ + .is_active = false, + }; kbase_timer_setup(&self->read_timer, kbasep_csf_tl_reader_read_callback); - self->stream = stream; - - /* This will be initialized by tl_reader_init_late() */ - self->kbdev = NULL; - self->trace_buffer = NULL; - self->tl_header.data = NULL; - self->tl_header.size = 0; - spin_lock_init(&self->read_lock); - - tl_reader_reset(self); } void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) @@ -349,13 +353,19 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev) { + unsigned long flags; int rcode; + spin_lock_irqsave(&self->read_lock, flags); + /* If already running, early exit. */ - if (self->is_active) + if (self->is_active) { + spin_unlock_irqrestore(&self->read_lock, flags); return 0; + } if (tl_reader_init_late(self, kbdev)) { + spin_unlock_irqrestore(&self->read_lock, flags); #if IS_ENABLED(CONFIG_MALI_NO_MALI) dev_warn(kbdev->dev, "CSFFW timeline is not available for MALI_NO_MALI builds!"); return 0; @@ -367,6 +377,9 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_dev tl_reader_reset(self); self->is_active = true; + + spin_unlock_irqrestore(&self->read_lock, flags); + /* Set bytes to copy to the header size. This is to trigger copying * of the header to the user space. */ diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c index 47a3d21..9c27a71 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c @@ -520,6 +520,14 @@ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard); +void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer) +{ + if (WARN_ON(!trace_buffer)) + return; + + *(trace_buffer->cpu_va.extract_cpu_va) = *(trace_buffer->cpu_va.insert_cpu_va); +} + static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { unsigned int i; diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h index 90dfcb2..35988ea 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h @@ -180,6 +180,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer); /** + * kbase_csf_firmware_trace_buffer_discard_all - Discard all data from a trace buffer + * + * @trace_buffer: Trace buffer handle + * + * Discard all the data in the trace buffer to make it empty. + */ +void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer); + +/** * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask * * @tb: Trace buffer handle diff --git a/mali_kbase/csf/mali_kbase_csf_util.c b/mali_kbase/csf/mali_kbase_csf_util.c index 7dc32a1..504379e 100644 --- a/mali_kbase/csf/mali_kbase_csf_util.c +++ b/mali_kbase/csf/mali_kbase_csf_util.c @@ -115,7 +115,7 @@ struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev, if (kbpr) { if (kfifo_alloc(&kbpr->fifo, KBASEP_PRINTER_BUFFER_MAX_SIZE, GFP_KERNEL)) { - kfree(kbpr); + vfree(kbpr); return NULL; } kbpr->kbdev = kbdev; diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index 680c69d..33db85d 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -279,10 +279,8 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_get_irqs, NULL, "IRQ search failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -#if !IS_ENABLED(CONFIG_MALI_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c index ab9df01..c5959f3 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -173,6 +173,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) kbase_pm_power_changed(kbdev); } + if (val & MCU_STATUS_GPU_IRQ) + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } KBASE_EXPORT_TEST_API(kbase_gpu_interrupt); diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index b2fd8bd..4acf2a5 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -217,10 +217,8 @@ static const struct kbase_device_init dev_init[] = { { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { kbase_get_irqs, NULL, "IRQ search failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -#if !IS_ENABLED(CONFIG_MALI_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, -#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h index 9cca6af..b58f0b5 100644 --- a/mali_kbase/device/mali_kbase_device.h +++ b/mali_kbase/device/mali_kbase_device.h @@ -58,6 +58,9 @@ void kbase_increment_device_id(void); * When a device file is opened for the first time, * load firmware and initialize hardware counter components. * + * It is safe for this function to be called multiple times without ill + * effects. Only the first call would be effective. + * * Return: 0 on success. An error code on failure. */ int kbase_device_firmware_init_once(struct kbase_device *kbdev); diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c index da597af..8b20c0b 100644 --- a/mali_kbase/device/mali_kbase_device_hw.c +++ b/mali_kbase/device/mali_kbase_device_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,7 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev) if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) return false; - return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); + return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0); } /** diff --git a/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c b/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c index f4afbf5..21a4fd1 100644 --- a/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c +++ b/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c @@ -24,12 +24,13 @@ #include <mali_kbase.h> #include <hw_access/mali_kbase_hw_access.h> +#include <linux/mali_hw_access.h> u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev) { u32 val[2] = { 0 }; - val[0] = readl(kbdev->reg); + val[0] = mali_readl(kbdev->reg); return (u64)val[0] | ((u64)val[1] << 32); @@ -45,7 +46,7 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum) KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT))) return 0; - val = readl(kbdev->regmap.regs[reg_enum]); + val = mali_readl(kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) @@ -69,8 +70,8 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum) KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT))) return 0; - val = (u64)readl(kbdev->regmap.regs[reg_enum]) | - ((u64)readl(kbdev->regmap.regs[reg_enum] + 4) << 32); + val = (u64)mali_readl(kbdev->regmap.regs[reg_enum]) | + ((u64)mali_readl(kbdev->regmap.regs[reg_enum] + 4) << 32); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { @@ -101,9 +102,9 @@ u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum) return 0; do { - hi1 = readl(kbdev->regmap.regs[reg_enum] + 4); - lo = readl(kbdev->regmap.regs[reg_enum]); - hi2 = readl(kbdev->regmap.regs[reg_enum] + 4); + hi1 = mali_readl(kbdev->regmap.regs[reg_enum] + 4); + lo = mali_readl(kbdev->regmap.regs[reg_enum]); + hi2 = mali_readl(kbdev->regmap.regs[reg_enum] + 4); } while (hi1 != hi2); val = lo | (((u64)hi1) << 32); @@ -131,7 +132,7 @@ void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value) KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT))) return; - writel(value, kbdev->regmap.regs[reg_enum]); + mali_writel(value, kbdev->regmap.regs[reg_enum]); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) @@ -151,8 +152,8 @@ void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value) KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT))) return; - writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]); - writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4); + mali_writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]); + mali_writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4); #if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbdev->io_history.enabled)) { diff --git a/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h b/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h index 9bd646d..1ba2598 100644 --- a/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h +++ b/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h @@ -308,6 +308,16 @@ #define TC_CLOCK_GATE_OVERRIDE (1ul << 0) /* End TILER_CONFIG register */ +/* L2_FEATURES register */ +#define L2_FEATURES_CACHE_SIZE_SHIFT GPU_U(16) +#define L2_FEATURES_CACHE_SIZE_MASK (GPU_U(0xFF) << L2_FEATURES_CACHE_SIZE_SHIFT) +#define L2_FEATURES_CACHE_SIZE_GET(reg_val) \ + (((reg_val)&L2_FEATURES_CACHE_SIZE_MASK) >> L2_FEATURES_CACHE_SIZE_SHIFT) +#define L2_FEATURES_CACHE_SIZE_SET(reg_val, value) \ + (~(~(reg_val) | L2_FEATURES_CACHE_SIZE_MASK) | \ + (((value) << L2_FEATURES_CACHE_SIZE_SHIFT) & L2_FEATURES_CACHE_SIZE_MASK)) +/* End L2_FEATURES register */ + /* L2_CONFIG register */ #define L2_CONFIG_SIZE_SHIFT 16 #define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index d7911ae..d1290ca 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,6 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" #include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_types.h" #include <linux/log2.h> #include <linux/kernel.h> @@ -255,7 +254,8 @@ struct kbase_hwcnt_csf_physical_layout { * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; @@ -283,7 +283,7 @@ struct kbase_hwcnt_backend_csf { struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; size_t num_l2_slices; - u64 shader_present_bitmap; + u64 powered_shader_core_mask; }; static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) @@ -296,7 +296,7 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c } void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, - size_t num_l2_slices, u64 shader_present_bitmap) + size_t num_l2_slices, u64 powered_shader_core_mask) { struct kbase_hwcnt_backend_csf_info *csf_info; @@ -313,12 +313,12 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte return; if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) || - WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) != - shader_present_bitmap)) + WARN_ON((powered_shader_core_mask & csf_info->backend->phys_layout.shader_avail_mask) != + powered_shader_core_mask)) return; csf_info->backend->num_l2_slices = num_l2_slices; - csf_info->backend->shader_present_bitmap = shader_present_bitmap; + csf_info->backend->powered_shader_core_mask = powered_shader_core_mask; } /** @@ -424,7 +424,7 @@ static void kbasep_hwcnt_backend_csf_init_layout( WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); - shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask); + shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask); values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); @@ -445,7 +445,7 @@ static void kbasep_hwcnt_backend_csf_init_layout( .fw_block_cnt = fw_block_cnt, .hw_block_cnt = hw_block_cnt, .block_cnt = fw_block_cnt + hw_block_cnt, - .shader_avail_mask = prfcnt_info->core_mask, + .shader_avail_mask = prfcnt_info->sc_core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, @@ -517,34 +517,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe memset(backend_csf->block_states, 0, block_state_bytes); } -/** - * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with - * information from a sample. - * @phys_layout: Physical memory layout information of HWC - * sample buffer. - * @enable_mask: Counter enable mask for the block whose state is being updated. - * @enable_state: The CSF backend internal enabled state. - * @exiting_protm: Whether or not the sample is taken when the GPU is exiting - * protected mode. - * @block_idx: Index of block within the ringbuffer. - * @block_state: Pointer to existing block state of the block whose state is being - * updated. - * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. - */ -static void kbasep_hwcnt_backend_csf_update_block_state( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask, - enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm, - size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode) +void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend, + const u32 enable_mask, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, + bool fw_in_protected_mode) { + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout; /* Offset of shader core blocks from the start of the HW blocks in the sample */ size_t shader_core_block_offset = - (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt); + (size_t)(phys_layout->block_cnt - phys_layout->shader_cnt); bool is_shader_core_block; - is_shader_core_block = block_idx >= shader_core_block_offset; + is_shader_core_block = (block_idx >= shader_core_block_offset); /* Set power bits for the block state for the block, for the sample */ - switch (enable_state) { + switch (backend->enable_state) { /* Disabled states */ case KBASE_HWCNT_BACKEND_CSF_DISABLED: case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: @@ -592,21 +579,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state( KBASE_HWCNT_STATE_NORMAL); else kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL); + + /* powered_shader_core_mask stored in the backend is a combination of + * the shader present and the debug core mask, so explicit checking of the + * core mask is not required here. + */ + if (is_shader_core_block) { + u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset); + + WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64); + + if (current_shader_core & backend->info->backend->powered_shader_core_mask) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE); + else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE); + else + WARN_ON_ONCE(true); + } + else + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE); } -static void kbasep_hwcnt_backend_csf_accumulate_sample( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, - u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, - blk_stt_t *const block_states, bool clearing_samples, - enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode) +static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend, + const u32 *old_sample_buf, + const u32 *new_sample_buf) { + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout; + const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes; + const size_t values_per_block = phys_layout->values_per_block; + blk_stt_t *const block_states = backend->block_states; + const bool fw_in_protected_mode = backend->info->fw_in_protected_mode; + const bool clearing_samples = backend->info->prfcnt_info.clearing_samples; + u64 *accum_buf = backend->accum_buf; + size_t block_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u64 *acc_block = accum_buf; /* Flag to indicate whether current sample is exiting protected mode. */ bool exiting_protm = false; - const size_t values_per_block = phys_layout->values_per_block; /* The block pointers now point to the first HW block, which is always a CSHW/front-end * block. The counter enable mask for this block can be checked to determine whether this @@ -620,9 +631,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; /* Update block state with information of the current sample */ - kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask, - enable_state, exiting_protm, block_idx, - &block_states[block_idx], + kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm, + block_idx, &block_states[block_idx], fw_in_protected_mode); if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { @@ -706,7 +716,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; - bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf = old_sample_buf; const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout; @@ -740,10 +749,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - kbasep_hwcnt_backend_csf_accumulate_sample( - phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, - new_sample_buf, backend_csf->block_states, clearing_samples, - backend_csf->enable_state, backend_csf->info->fw_in_protected_mode); + kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf, + new_sample_buf); old_sample_buf = new_sample_buf; } @@ -1457,7 +1464,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, backend_csf->to_user_block_states, dst_enable_map, backend_csf->num_l2_slices, - backend_csf->shader_present_bitmap, accumulate); + backend_csf->powered_shader_core_mask, accumulate); /* If no error occurred (zero ret value), then update block state for all blocks in the * accumulation with the current sample's block state. @@ -2098,7 +2105,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface * gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count; - gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h index 2487db2..1b4e16d 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,8 +30,10 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend.h" #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" #include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" struct kbase_hwcnt_physical_enable_map; +struct kbase_hwcnt_backend_csf; /** * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend @@ -123,11 +125,12 @@ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interfac * this function is called. * @iface: Non-NULL pointer to HWC backend interface. * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. */ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, size_t num_l2_slices, - uint64_t shader_present_bitmap); + uint64_t powered_shader_core_mask); /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to * guarantee headers are @@ -174,4 +177,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interfa */ void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); +/** + * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with + * information from a sample. + * @backend: CSF hardware counter backend. + * @enable_mask: Counter enable mask for the block whose state is being updated. + * @exiting_protm: Whether or not the sample is taken when the GPU is exiting + * protected mode. + * @block_idx: Index of block within the ringbuffer. + * @block_state: Pointer to existing block state of the block whose state is being + * updated. + * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. + */ +void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend, + const u32 enable_mask, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, + bool fw_in_protected_mode); + #endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h index 65bb965..4ee2c8a 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -68,7 +68,7 @@ struct kbase_hwcnt_backend_csf_if_enable { * @prfcnt_block_size: Bytes of each performance counter block. * @l2_count: The MMU L2 cache count. * @csg_count: The total number of CSGs in the system - * @core_mask: Shader core mask. + * @sc_core_mask: Shader core mask. * @clk_cnt: Clock domain count in the system. * @clearing_samples: Indicates whether counters are cleared after each sample * is taken. @@ -80,7 +80,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { size_t prfcnt_block_size; size_t l2_count; u32 csg_count; - u64 core_mask; + u64 sc_core_mask; u8 clk_cnt; bool clearing_samples; }; diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index 1b7a116..fe81ce1 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -229,7 +229,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, - .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, + .sc_core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, .prfcnt_hw_size = KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, .prfcnt_fw_size = @@ -290,12 +290,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( .dump_bytes = fw_ctx->buf_bytes, .prfcnt_block_size = prfcnt_block_size, .l2_count = kbdev->gpu_props.num_l2_slices, - .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), + .sc_core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), .csg_count = fw_block_count > 1 ? csg_count : 0, .clk_cnt = fw_ctx->clk_cnt, .clearing_samples = true, }; + /* Block size must be multiple of counter size. */ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); /* Total size must be multiple of block size. */ @@ -513,10 +514,15 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages, MCU_AS_NR)); + /* Clear the dump ring_buf content to zeros */ + memset(fw_ring_buf->cpu_dump_base, 0, fw_ring_buf->num_pages * PAGE_SIZE); vunmap(fw_ring_buf->cpu_dump_base); + /* After zeroing, the ring_buf pages are dirty so need to pass the 'dirty' flag + * as true when freeing the pages to the Global pool. + */ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); + fw_ring_buf->num_pages, fw_ring_buf->phys, true, false); kfree(fw_ring_buf->phys); diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 4df7dd4..8b337eb 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -165,7 +165,7 @@ static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, #endif info->l2_count = l2_count; - info->core_mask = core_mask; + info->sc_core_mask = core_mask; info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; /* Determine the number of available clock domains. */ @@ -186,7 +186,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf WARN_ON(!gpu_info); WARN_ON(!phys_layout); - shader_core_cnt = fls64(gpu_info->core_mask); + shader_core_cnt = fls64(gpu_info->sc_core_mask); *phys_layout = (struct kbase_hwcnt_jm_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, @@ -195,7 +195,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf .shader_cnt = shader_core_cnt, .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + gpu_info->l2_count + shader_core_cnt, - .shader_avail_mask = gpu_info->core_mask, + .shader_avail_mask = gpu_info->sc_core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = gpu_info->prfcnt_values_per_block, .counters_per_block = @@ -384,14 +384,12 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, enable = (struct kbase_instr_hwcnt_enable) { - .fe_bm = phys_enable_map.fe_bm, - .shader_bm = phys_enable_map.shader_bm, - .tiler_bm = phys_enable_map.tiler_bm, - .mmu_l2_bm = phys_enable_map.mmu_l2_bm, + .fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm, + .tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm, .counter_set = phys_counter_set, #if IS_ENABLED(CONFIG_MALI_NO_MALI) /* The dummy model needs the CPU mapping. */ - .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, + .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, #else .dump_buffer = backend_jm->gpu_dump_va, #endif /* CONFIG_MALI_NO_MALI */ @@ -411,7 +409,7 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev); backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count; - backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask; + backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask; backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c index 5da5645..ffe8449 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu /* Calculate number of block instances that aren't cores */ non_core_block_count = 2 + gpu_info->l2_count; /* Calculate number of block instances that are shader cores */ - sc_block_count = (size_t)fls64(gpu_info->core_mask); + sc_block_count = (size_t)fls64(gpu_info->sc_core_mask); /* Determine the total number of cores */ core_block_count = sc_block_count; @@ -277,7 +277,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0); kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX); kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count, - gpu_info->core_mask); + gpu_info->sc_core_mask); return kbase_hwcnt_metadata_create(&desc, metadata); @@ -294,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in { WARN_ON(!gpu_info); - return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) * + return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->sc_core_mask)) * gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; } @@ -384,6 +384,7 @@ bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type b return false; } + bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || @@ -466,9 +467,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, else hw_res_available = true; - /* - * Skip block if no values in the destination block are enabled. - */ + /* Skip block if no values in the destination block are enabled. */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; @@ -592,7 +591,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *src_block_stt, const struct kbase_hwcnt_enable_map *dst_enable_map, - size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate) + size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u64 *dump_src = src; @@ -614,9 +613,7 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); - /* - * Skip block if no values in the destination block are enabled. - */ + /* Skip block if no values in the destination block are enabled. */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h index 4339fdd..570aad7 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,7 +169,7 @@ enum kbase_hwcnt_physical_set { /** * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. * @l2_count: L2 cache count. - * @core_mask: Shader core mask. May be sparse. + * @sc_core_mask: Shader core mask. May be sparse. * @clk_cnt: Number of clock domains available. * @csg_cnt: Number of CSGs available. * @prfcnt_values_per_block: Total entries (header + counters) of performance @@ -178,7 +178,7 @@ enum kbase_hwcnt_physical_set { */ struct kbase_hwcnt_gpu_info { size_t l2_count; - u64 core_mask; + u64 sc_core_mask; u8 clk_cnt; u8 csg_cnt; size_t prfcnt_values_per_block; @@ -327,15 +327,16 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. - * @dst: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source raw dump buffer, of same length - * as dump_buf_bytes in the metadata of dst dump buffer. - * @src_block_stt: Non-NULL pointer to source block state buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @num_l2_slices: Current number of L2 slices allocated to the GPU. - * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. - * @accumulate: True if counters in src should be accumulated into - * destination, rather than copied. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of dst dump buffer. + * @src_block_stt: Non-NULL pointer to source block state buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @powered_shader_core_mask: The common mask between the debug_core_mask + * and the shader_present_bitmap. + * @accumulate: True if counters in src should be accumulated into + * destination, rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get @@ -346,7 +347,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, blk_stt_t *src_block_stt, const struct kbase_hwcnt_enable_map *dst_enable_map, - size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate); + size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate); /** * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block @@ -453,6 +454,7 @@ bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type b bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type); bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type); + /** * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from * counter selection bitmasks. diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index 1f32fc9..dd76be3 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 4426bd7..d01977f 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -462,22 +462,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p2 BASE_HW_ISSUE_END }; -__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_2968_TTRX_3162, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, - BASE_HW_ISSUE_TTRX_3470, - BASE_HW_ISSUE_TTRX_3464, - BASE_HW_ISSUE_TITANHW_2710, - BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, - BASE_HW_ISSUE_END -}; - __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, @@ -512,7 +496,13 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, - BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END + BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p1[] = { + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END }; __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { diff --git a/mali_kbase/mali_kbase_caps.h b/mali_kbase/mali_kbase_caps.h index a92569d..c458ac1 100644 --- a/mali_kbase/mali_kbase_caps.h +++ b/mali_kbase/mali_kbase_caps.h @@ -33,15 +33,22 @@ * * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit + * @MALI_KBASE_CAP_MEM_DONT_NEED: Not needed physical memory * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory + * @MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP: CPU cache maintenance required when + * imported GPU memory is mapped/unmapped + * @MALI_KBASE_CAP_MEM_KERNEL_SYNC: Kernel side cache sync ops required * @MALI_KBASE_NUM_CAPS: Delimiter */ enum mali_kbase_cap { MALI_KBASE_CAP_SYSTEM_MONITOR = 0, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, + MALI_KBASE_CAP_MEM_DONT_NEED, MALI_KBASE_CAP_MEM_GROW_ON_GPF, MALI_KBASE_CAP_MEM_PROTECTED, + MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP, + MALI_KBASE_CAP_MEM_KERNEL_SYNC, MALI_KBASE_NUM_CAPS }; @@ -57,6 +64,11 @@ static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_vers return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); } +static inline bool mali_kbase_supports_mem_dont_need(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_DONT_NEED); +} + static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) { return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); @@ -67,4 +79,14 @@ static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); } +static inline bool mali_kbase_supports_mem_import_sync_on_map_unmap(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP); +} + +static inline bool mali_kbase_supports_mem_kernel_sync(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_KERNEL_SYNC); +} + #endif /* __KBASE_CAPS_H_ */ diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index 5ce06dd..0b983b4 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -194,9 +194,22 @@ enum { */ #define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull) +/* Waiting timeout in clock cycles for GPU suspend to complete. */ +#define CSF_GPU_SUSPEND_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES) + /* Waiting timeout in clock cycles for GPU reset to complete. */ #define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2) +/* Waiting timeout in clock cycles for a CSG to be terminated. + * + * Based on 0.6s timeout at 100MHZ, scaled from 0.1s at 600Mhz GPU frequency + * which is the timeout defined in FW to wait for iterator to complete the + * transitioning to DISABLED state. + * More cycles (0.4s @ 100Mhz = 40000000) are added up to ensure that + * host timeout is always bigger than FW timeout. + */ +#define CSF_CSG_TERM_TIMEOUT_CYCLES (100000000) + /* Waiting timeout in clock cycles for GPU firmware to boot. * * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system. diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index 9ea5d74..dad363d 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -106,6 +106,7 @@ #include <linux/clk-provider.h> #include <linux/delay.h> #include <linux/log2.h> +#include <linux/mali_hw_access.h> #include <mali_kbase_config.h> @@ -151,13 +152,19 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA #if MALI_USE_CSF { 1, 0 }, /* SYSTEM_MONITOR */ { 1, 0 }, /* JIT_PRESSURE_LIMIT */ + { 1, 22 }, /* MEM_DONT_NEED */ { 1, 0 }, /* MEM_GROW_ON_GPF */ - { 1, 0 } /* MEM_PROTECTED */ + { 1, 0 }, /* MEM_PROTECTED */ + { 1, 26 }, /* MEM_IMPORT_SYNC_ON_MAP_UNMAP */ + { 1, 26 } /* MEM_KERNEL_SYNC */ #else { 11, 15 }, /* SYSTEM_MONITOR */ { 11, 25 }, /* JIT_PRESSURE_LIMIT */ + { 11, 40 }, /* MEM_DONT_NEED */ { 11, 2 }, /* MEM_GROW_ON_GPF */ - { 11, 2 } /* MEM_PROTECTED */ + { 11, 2 }, /* MEM_PROTECTED */ + { 11, 43 }, /* MEM_IMPORT_SYNC_ON_MAP_UNMAP */ + { 11, 43 } /* MEM_KERNEL_SYNC */ #endif }; @@ -1412,7 +1419,7 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, if (ret != 0) return -EFAULT; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); for (i = 0; i < map->count; i++) { if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { @@ -1429,7 +1436,7 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, } } - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -1449,7 +1456,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, if (ret != 0) return -EFAULT; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); for (i = 0; i < unmap->count; i++) { if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { @@ -1458,7 +1465,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, } } - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -1516,6 +1523,12 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_c return kbase_csf_queue_kick(kctx, kick); } +static int kbasep_queue_group_clear_faults(struct kbase_context *kctx, + struct kbase_ioctl_queue_group_clear_faults *faults) +{ + return kbase_csf_queue_group_clear_faults(kctx, faults); +} + static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create_1_6 *create) { @@ -1585,6 +1598,8 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, static int kbasep_cs_queue_group_create(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create *create) { + /* create->in.reserved only present pre-TDRX configuration. */ + if (create->in.reserved != 0) { dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n"); return -EINVAL; @@ -2086,6 +2101,11 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, kbasep_kcpu_queue_enqueue, struct kbase_ioctl_kcpu_queue_enqueue, kctx); break; + case KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS, + kbasep_queue_group_clear_faults, + struct kbase_ioctl_queue_group_clear_faults, kctx); + break; case KBASE_IOCTL_CS_TILER_HEAP_INIT: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, kbasep_cs_tiler_heap_init, union kbase_ioctl_cs_tiler_heap_init, kctx); @@ -2543,6 +2563,9 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, struct kbase_device *kbdev; unsigned long flags; ssize_t ret = 0; +#if !MALI_USE_CSF + size_t i; +#endif CSTD_UNUSED(attr); @@ -2561,154 +2584,191 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current in use core mask : 0x%llX\n", kbdev->pm.backend.shaders_avail); #else - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS0) : 0x%llX\n", - kbdev->pm.debug_core_mask[0]); - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS1) : 0x%llX\n", - kbdev->pm.debug_core_mask[1]); - ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS2) : 0x%llX\n", - kbdev->pm.debug_core_mask[2]); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) { + if (PAGE_SIZE < ret) + goto out_unlock; + + ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), + "Current core mask (JS%zu) : 0x%llX\n", i, + kbdev->pm.debug_core_mask[i]); + } #endif /* MALI_USE_CSF */ ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Available core mask : 0x%llX\n", kbdev->gpu_props.shader_present); - +#if !MALI_USE_CSF +out_unlock: +#endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } -/** - * core_mask_store - Store callback for the core_mask sysfs file. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes to write to the sysfs file - * - * This function is called when the core_mask sysfs file is written to. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, - size_t count) -{ - struct kbase_device *kbdev; #if MALI_USE_CSF +struct kbase_core_mask { u64 new_core_mask; -#else - u64 new_core_mask[3]; - u64 group_core_mask; - int i; -#endif /* MALI_USE_CSF */ - - int items; - ssize_t err = (ssize_t)count; - unsigned long flags; - u64 shader_present; - - CSTD_UNUSED(attr); - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - -#if MALI_USE_CSF - items = sscanf(buf, "%llx", &new_core_mask); +}; - if (items != 1) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format <core_mask>\n"); - err = -EINVAL; - goto end; - } -#else - items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1], - &new_core_mask[2]); +static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf, + struct kbase_core_mask *const mask) +{ + int err = kstrtou64(buf, 0, &mask->new_core_mask); - if (items != 1 && items != 3) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format <core_mask>\n" - "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n"); - err = -EINVAL; - goto end; - } + if (err) + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"); - if (items == 1) - new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; -#endif + return err; +} - mutex_lock(&kbdev->pm.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask) +{ + u64 new_core_mask = new_mask->new_core_mask; + u64 shader_present = kbdev->gpu_props.shader_present; - shader_present = kbdev->gpu_props.shader_present; + lockdep_assert_held(&kbdev->pm.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); -#if MALI_USE_CSF if ((new_core_mask & shader_present) != new_core_mask) { - dev_err(dev, + dev_err(kbdev->dev, "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", new_core_mask, shader_present); - err = -EINVAL; - goto unlock; + return -EINVAL; } else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, - "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + dev_err(kbdev->dev, + "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)", new_core_mask, kbdev->gpu_props.shader_present, kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; + return -EINVAL; } + if (kbdev->pm.debug_core_mask != new_core_mask) kbase_pm_set_debug_core_mask(kbdev, new_core_mask); + + return 0; +} #else - group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; +struct kbase_core_mask { + u64 new_core_mask[BASE_JM_MAX_NR_SLOTS]; +}; + +static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf, + struct kbase_core_mask *const mask) +{ + int items; + + items = sscanf(buf, "%llx %llx %llx", &mask->new_core_mask[0], &mask->new_core_mask[1], + &mask->new_core_mask[2]); - for (i = 0; i < 3; ++i) { + if (items != 1 && items != BASE_JM_MAX_NR_SLOTS) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format <core_mask>\n" + "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n"); + return -EINVAL; + } + + /* If only one value was provided, set all other core masks equal to the value. */ + if (items == 1) { + size_t i; + + for (i = 1; i < BASE_JM_MAX_NR_SLOTS; i++) + mask->new_core_mask[i] = mask->new_core_mask[0]; + } + + return 0; +} + +static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask) +{ + u64 shader_present = kbdev->gpu_props.shader_present; + u64 group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask; + u64 *new_core_mask = &new_mask->new_core_mask[0]; + size_t i; + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) { if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: Includes non-existent cores (present = 0x%llX)", new_core_mask[i], i, shader_present); - err = -EINVAL; - goto unlock; + return -EINVAL; } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)", new_core_mask[i], i, kbdev->gpu_props.shader_present, kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; + return -EINVAL; } else if (!(new_core_mask[i] & group_core_mask)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with group 0 core mask 0x%llX", new_core_mask[i], i, group_core_mask); - err = -EINVAL; - goto unlock; + return -EINVAL; } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { - dev_err(dev, - "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", + dev_err(kbdev->dev, + "Invalid core mask 0x%llX for JS %zu: No intersection with current core mask 0x%llX", new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); - err = -EINVAL; - goto unlock; + return -EINVAL; } } - if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != new_core_mask[2]) { - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], - new_core_mask[2]); + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) { + if (kbdev->pm.debug_core_mask[i] != new_core_mask[i]) { + kbase_pm_set_debug_core_mask(kbdev, new_core_mask, BASE_JM_MAX_NR_SLOTS); + break; + } } -#endif /* MALI_USE_CSF */ -unlock: + return 0; +} + +#endif + +/** + * core_mask_store - Store callback for the core_mask sysfs file. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * This function is called when the core_mask sysfs file is written to. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + struct kbase_device *kbdev; + struct kbase_core_mask core_mask = {}; + + int err; + unsigned long flags; + + CSTD_UNUSED(attr); + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + err = core_mask_parse(kbdev, buf, &core_mask); + if (err) + return err; + + mutex_lock(&kbdev->pm.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + err = core_mask_set(kbdev, &core_mask); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); -end: - return err; + + if (err) + return err; + + return count; } /* @@ -3477,12 +3537,8 @@ int kbase_pm_gpu_freq_init(struct kbase_device *kbdev) /* convert found frequency to KHz */ found_freq /= 1000; - /* If lowest frequency in OPP table is still higher - * than the reference, then keep the reference frequency - * as the one to use for scaling . - */ - if (found_freq < lowest_freq_khz) - lowest_freq_khz = found_freq; + /* always use the lowest freqency from opp table */ + lowest_freq_khz = found_freq; } #else dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT"); @@ -4465,7 +4521,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) goto out_region; } - kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = mali_ioremap(kbdev->reg_start, kbdev->reg_size); if (!kbdev->reg) { dev_err(kbdev->dev, "Can't remap register window\n"); err = -EINVAL; @@ -4483,7 +4539,7 @@ out_region: static void kbase_common_reg_unmap(struct kbase_device *const kbdev) { if (kbdev->reg) { - iounmap(kbdev->reg); + mali_iounmap(kbdev->reg); release_mem_region(kbdev->reg_start, kbdev->reg_size); kbdev->reg = NULL; kbdev->reg_start = 0; @@ -5086,6 +5142,7 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev) return dentry; } + dentry = debugfs_ctx_defaults_init(kbdev); if (IS_ERR_OR_NULL(dentry)) return dentry; diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 30ea787..f3c8340 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -171,16 +171,11 @@ struct kbase_gpu_metrics { * * @link: Links the object in kbase_device::gpu_metrics::active_list * or kbase_device::gpu_metrics::inactive_list. - * @first_active_start_time: Records the time at which the application first became + * @active_start_time: Records the time at which the application first became * active in the current work period. - * @last_active_start_time: Records the time at which the application last became - * active in the current work period. - * @last_active_end_time: Records the time at which the application last became - * inactive in the current work period. - * @total_active: Tracks the time for which application has been active - * in the current work period. - * @prev_wp_active_end_time: Records the time at which the application last became - * inactive in the previous work period. + * @active_end_time: Records the time at which the application last became + * inactive in the current work period, or the time of the end of + * previous work period if the application remained active. * @aid: Unique identifier for an application. * @kctx_count: Counter to keep a track of the number of Kbase contexts * created for an application. There may be multiple Kbase @@ -188,19 +183,14 @@ struct kbase_gpu_metrics { * metrics context. * @active_cnt: Counter that is updated every time the GPU activity starts * and ends in the current work period for an application. - * @flags: Flags to track the state of GPU metrics context. */ struct kbase_gpu_metrics_ctx { struct list_head link; - u64 first_active_start_time; - u64 last_active_start_time; - u64 last_active_end_time; - u64 total_active; - u64 prev_wp_active_end_time; + u64 active_start_time; + u64 active_end_time; unsigned int aid; unsigned int kctx_count; u8 active_cnt; - u8 flags; }; #endif @@ -548,7 +538,7 @@ struct kbase_mem_pool { u8 group_id; spinlock_t pool_lock; struct list_head page_list; - struct shrinker reclaim; + DEFINE_KBASE_SHRINKER reclaim; atomic_t isolation_in_progress_cnt; struct kbase_mem_pool *next_pool; @@ -839,8 +829,6 @@ struct kbase_mem_migrate { * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. - * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are - * supported and used where possible. * @gpu_props: Object containing complete information about the * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues @@ -1141,8 +1129,6 @@ struct kbase_device { spinlock_t mmu_mask_change; - bool pagesize_2mb; - struct kbase_gpu_props gpu_props; unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; @@ -1986,7 +1972,8 @@ struct kbase_context { struct kbase_mem_pool_group mem_pools; - struct shrinker reclaim; + DEFINE_KBASE_SHRINKER reclaim; + struct list_head evict_list; atomic_t evict_nents; diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h index 06690d4..d45a0fe 100644 --- a/mali_kbase/mali_kbase_fence.h +++ b/mali_kbase/mali_kbase_fence.h @@ -35,8 +35,37 @@ #include <linux/version_compat_defs.h> #if MALI_USE_CSF +/* Number of digits needed to express the max value of given unsigned type. + * + * Details: The number of digits needed to express the max value of given type is log10(t_max) + 1 + * sizeof(t) == log2(t_max)/8 + * log10(t_max) == log2(t_max) / log2(10) + * log2(t_max) == sizeof(type) * 8 + * 1/log2(10) is approx (1233 >> 12) + * Hence, number of digits for given type == log10(t_max) + 1 == sizeof(type) * 8 * (1233 >> 12) + 1 + */ +#define MAX_DIGITS_FOR_UNSIGNED_TYPE(t) ((((sizeof(t) * BITS_PER_BYTE) * 1233) >> 12) + 1) + +/* Number of digits needed to express the max value of given signed type, + * including the sign character, + */ +#define MAX_DIGITS_FOR_SIGNED_TYPE(t) (MAX_DIGITS_FOR_UNSIGNED_TYPE(t) + 1) + +/* Max number of characters for id member of kbase_device struct. */ +#define MAX_KBDEV_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32) +/* Max number of characters for tgid member of kbase_context struct. */ +#define MAX_KCTX_TGID_LEN MAX_DIGITS_FOR_SIGNED_TYPE(pid_t) +/* Max number of characters for id member of kbase_context struct. */ +#define MAX_KCTX_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32) +/* Max number of characters for fence_context member of kbase_kcpu_command_queue struct. */ +#define MAX_KCTX_QUEUE_FENCE_CTX_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u64) +/* Max number of characters for timeline name fixed format, including null character. */ +#define FIXED_FORMAT_LEN (9) + /* Maximum number of characters in DMA fence timeline name. */ -#define MAX_TIMELINE_NAME (32) +#define MAX_TIMELINE_NAME \ + (MAX_KBDEV_ID_LEN + MAX_KCTX_TGID_LEN + MAX_KCTX_ID_LEN + MAX_KCTX_QUEUE_FENCE_CTX_LEN + \ + FIXED_FORMAT_LEN) /** * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing diff --git a/mali_kbase/mali_kbase_gpu_metrics.c b/mali_kbase/mali_kbase_gpu_metrics.c index d404c69..e5dcde9 100644 --- a/mali_kbase/mali_kbase_gpu_metrics.c +++ b/mali_kbase/mali_kbase_gpu_metrics.c @@ -29,46 +29,12 @@ #include <linux/module.h> #include <linux/slab.h> -/** - * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context - * - * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in - * the current work period. - * - * @INSIDE_ACTIVE_LIST: Flag to track if object is in kbase_device::gpu_metrics::active_list - * - * All members need to be separate bits. This enum is intended for use in a - * bitmask where multiple values get OR-ed together. - */ -enum gpu_metrics_ctx_flags { - ACTIVE_INTERVAL_IN_WP = 1 << 0, - INSIDE_ACTIVE_LIST = 1 << 1, -}; - static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444); MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns, "Time interval in nano seconds at which GPU metrics tracepoints are emitted"); -static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - return (gpu_metrics_ctx->flags & flag); -} - -static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - gpu_metrics_ctx->flags |= flag; -} - -static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, - enum gpu_metrics_ctx_flags flag) -{ - gpu_metrics_ctx->flags &= ~flag; -} - static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 start_time, u64 end_time, u64 total_active) { @@ -82,43 +48,30 @@ static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_me WARN(total_active > (end_time - start_time), "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u", total_active, end_time, start_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); - - WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time, - "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u", - gpu_metrics_ctx->prev_wp_active_end_time, start_time, gpu_metrics_ctx->aid, - gpu_metrics_ctx->active_cnt); #endif } static void emit_tracepoint_for_active_gpu_metrics_ctx( struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time) { - const u64 start_time = gpu_metrics_ctx->first_active_start_time; - u64 total_active = gpu_metrics_ctx->total_active; - u64 end_time; + const u64 start_time = gpu_metrics_ctx->active_start_time; + u64 total_active, end_time = current_time; /* Check if the GPU activity is currently ongoing */ if (gpu_metrics_ctx->active_cnt) { /* The following check is to handle the race on CSF GPUs that can happen between * the draining of trace buffer and FW emitting the ACT=1 event . */ - if (unlikely(current_time == gpu_metrics_ctx->last_active_start_time)) - current_time++; - end_time = current_time; - total_active += end_time - gpu_metrics_ctx->last_active_start_time; - - gpu_metrics_ctx->first_active_start_time = current_time; - gpu_metrics_ctx->last_active_start_time = current_time; - } else { - end_time = gpu_metrics_ctx->last_active_end_time; - gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + if (unlikely(end_time == start_time)) + end_time++; + gpu_metrics_ctx->active_start_time = end_time; } + total_active = end_time - start_time; trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid, start_time, end_time, total_active); validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active); - gpu_metrics_ctx->prev_wp_active_end_time = end_time; - gpu_metrics_ctx->total_active = 0; + gpu_metrics_ctx->active_end_time = end_time; } void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, @@ -131,7 +84,8 @@ void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, if (gpu_metrics_ctx->kctx_count) return; - if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) + /* Generate a tracepoint if there's still activity */ + if (gpu_metrics_ctx->active_cnt) emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ktime_get_raw_ns()); @@ -166,12 +120,11 @@ struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbd void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid) { + gpu_metrics_ctx->active_start_time = 0; + gpu_metrics_ctx->active_end_time = 0; gpu_metrics_ctx->aid = aid; - gpu_metrics_ctx->total_active = 0; gpu_metrics_ctx->kctx_count = 1; gpu_metrics_ctx->active_cnt = 0; - gpu_metrics_ctx->prev_wp_active_end_time = 0; - gpu_metrics_ctx->flags = 0; list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list); } @@ -180,17 +133,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; gpu_metrics_ctx->active_cnt++; - if (gpu_metrics_ctx->active_cnt == 1) - gpu_metrics_ctx->last_active_start_time = timestamp_ns; - - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { - gpu_metrics_ctx->first_active_start_time = timestamp_ns; - gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); - } - - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) { + if (gpu_metrics_ctx->active_cnt == 1) { + gpu_metrics_ctx->active_start_time = timestamp_ns; list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list); - gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); } } @@ -201,22 +146,22 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt)) return; + /* Do not emit tracepoint if GPU activity still continues. */ if (--gpu_metrics_ctx->active_cnt) return; - if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) { - gpu_metrics_ctx->last_active_end_time = timestamp_ns; - gpu_metrics_ctx->total_active += - timestamp_ns - gpu_metrics_ctx->last_active_start_time; + if (likely(timestamp_ns > gpu_metrics_ctx->active_start_time)) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + timestamp_ns); return; } /* Due to conversion from system timestamp to CPU timestamp (which involves rounding) * the value for start and end timestamp could come as same on CSF GPUs. */ - if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) { - gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1; - gpu_metrics_ctx->total_active += 1; + if (timestamp_ns == gpu_metrics_ctx->active_start_time) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + timestamp_ns + 1); return; } @@ -224,12 +169,9 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam * visible to the Kbase even though the system timestamp value sampled by FW was less than * the system timestamp value sampled by Kbase just before the draining of trace buffer. */ - if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time && - gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) { - WARN_ON_ONCE(gpu_metrics_ctx->total_active); - gpu_metrics_ctx->last_active_end_time = - gpu_metrics_ctx->prev_wp_active_end_time + 1; - gpu_metrics_ctx->total_active = 1; + if (gpu_metrics_ctx->active_end_time == gpu_metrics_ctx->active_start_time) { + emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx, + gpu_metrics_ctx->active_end_time + 1); return; } @@ -242,15 +184,12 @@ void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts) struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp; list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) { - if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { - WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)); - WARN_ON(gpu_metrics_ctx->active_cnt); - list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); - gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + if (gpu_metrics_ctx->active_cnt) { + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); continue; } - emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); + list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); } } diff --git a/mali_kbase/mali_kbase_gpu_metrics.h b/mali_kbase/mali_kbase_gpu_metrics.h index c445dff..658cf1c 100644 --- a/mali_kbase/mali_kbase_gpu_metrics.h +++ b/mali_kbase/mali_kbase_gpu_metrics.h @@ -106,7 +106,7 @@ void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. * @timestamp_ns: CPU timestamp at which the GPU activity started. * - * The provided timestamp would be later used as the "start_time_ns" for the + * The provided timestamp is used as the "start_time_ns" for the * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU * metrics context in the current work period. * @@ -122,9 +122,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. * @timestamp_ns: CPU timestamp at which the GPU activity ended. * - * The provided timestamp would be later used as the "end_time_ns" for the - * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU - * metrics context in the current work period. + * The provided timestamp is used as the "end_time_ns" for the power/gpu_work_period + * tracepoint if this is the last GPU activity for the GPU metrics context + * in the current work period. * * Note: The caller must appropriately serialize the call to this function with the * call to other GPU metrics functions declared in this file. @@ -138,8 +138,8 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam * @kbdev: Pointer to the GPU device. * @ts: Timestamp at which the tracepoint is being emitted. * - * This function would loop through all the active GPU metrics contexts and emit a - * power/gpu_work_period tracepoint for them. + * This function would loop through all GPU metrics contexts in the active list and + * emit a power/gpu_work_period tracepoint if the GPU work in the context still active. * The GPU metrics context that is found to be inactive since the last tracepoint * was emitted would be moved to the inactive list. * The current work period would be considered as over and a new work period would diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index 3ac1c45..e6c31d4 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -357,6 +357,7 @@ enum l2_config_override_result { /** * kbase_read_l2_config_from_dt - Read L2 configuration * @kbdev: The kbase device for which to get the L2 configuration. + * @regdump: Pointer to struct kbase_gpuprops_regdump structure. * * Check for L2 configuration overrides in module parameters and device tree. * Override values in module parameters take priority over override values in @@ -366,9 +367,16 @@ enum l2_config_override_result { * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. * L2_CONFIG_OVERRIDE_FAIL otherwise. */ -static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) +static enum l2_config_override_result +kbase_read_l2_config_from_dt(struct kbase_device *const kbdev, + struct kbasep_gpuprops_regdump *regdump) { struct device_node *np = kbdev->dev->of_node; + /* + * CACHE_SIZE bit fields in L2_FEATURES register, default value after the reset/powerup + * holds the maximum size of the cache that can be programmed in L2_CONFIG register. + */ + const u8 l2_size_max = L2_FEATURES_CACHE_SIZE_GET(regdump->l2_features); if (!np) return L2_CONFIG_OVERRIDE_NONE; @@ -378,8 +386,12 @@ static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_ else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) kbdev->l2_size_override = 0; - if (kbdev->l2_size_override != 0 && kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2) + if (kbdev->l2_size_override != 0 && (kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2 || + kbdev->l2_size_override > l2_size_max)) { + dev_err(kbdev->dev, "Invalid Cache Size in %s", + override_l2_size ? "Module parameters" : "Device tree node"); return L2_CONFIG_OVERRIDE_FAIL; + } /* Check overriding value is supported, if not will result in * undefined behavior. @@ -429,7 +441,7 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev); /* Check for L2 cache size & hash overrides */ - switch (kbase_read_l2_config_from_dt(kbdev)) { + switch (kbase_read_l2_config_from_dt(kbdev, regdump)) { case L2_CONFIG_OVERRIDE_FAIL: err = -EIO; goto exit; diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c index c92d54c..5e59bf6 100644 --- a/mali_kbase/mali_kbase_gwt.c +++ b/mali_kbase/mali_kbase_gwt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,10 @@ #include <linux/module.h> static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kctx, - unsigned long flag, struct rb_node *node) + unsigned long flag, + struct kbase_reg_zone *zone) { - struct rb_node *rbnode = node; + struct rb_node *rbnode = rb_first(&zone->reg_rbtree); while (rbnode) { struct kbase_va_region *reg; @@ -55,17 +56,15 @@ static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kct static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag) { - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree)); - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree)); + kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[SAME_VA_ZONE]); + kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[CUSTOM_VA_ZONE]); } int kbase_gpu_gwt_start(struct kbase_context *kctx) { - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (kctx->gwt_enabled) { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return -EBUSY; } @@ -91,7 +90,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return 0; } diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index 7d4200e..1fde75b 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -225,6 +225,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev { GPU_ID_PRODUCT_TVAX, { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tVAx_r0p0 }, + { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tVAx_r0p1 }, { U32_MAX, NULL } } }, { GPU_ID_PRODUCT_TTUX, @@ -334,6 +336,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev gpu_id->version_id = fallback_version; } } + + return issues; } diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h index 7a0ea49..982547d 100644 --- a/mali_kbase/mali_kbase_hwaccess_pm.h +++ b/mali_kbase/mali_kbase_hwaccess_pm.h @@ -129,14 +129,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) * kbase_pm_set_debug_core_mask - Set the debug core mask. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @new_core_mask_js0: The core mask to use for job slot 0 - * @new_core_mask_js1: The core mask to use for job slot 1 - * @new_core_mask_js2: The core mask to use for job slot 2 + * @new_core_mask: The core mask to use, as an array where each element refers + * to a job slot. + * @new_core_mask_size: Number of elements in the core mask array. * * This determines which cores the power manager is allowed to use. */ -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, - u64 new_core_mask_js1, u64 new_core_mask_js2); +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask, + size_t new_core_mask_size); #endif /* MALI_USE_CSF */ /** diff --git a/mali_kbase/mali_kbase_linux.h b/mali_kbase/mali_kbase_linux.h index 9195be3..cb55d4b 100644 --- a/mali_kbase/mali_kbase_linux.h +++ b/mali_kbase/mali_kbase_linux.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,8 +35,13 @@ #if IS_ENABLED(MALI_KERNEL_TEST_API) #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +/* Note: due to the 2-layer macro translation, using the NULL _etype does not + * compile, and one workaround is to use ERRNO_NULL instead. + */ +#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) ALLOW_ERROR_INJECTION(func, etype) #else #define KBASE_EXPORT_TEST_API(func) +#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) #endif #define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index ddf6ea3..71088dd 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,6 +46,9 @@ #include <mali_kbase_trace_gpu_mem.h> #include <linux/version_compat_defs.h> +/* Static key used to determine if large pages are enabled or not */ +static DEFINE_STATIC_KEY_FALSE(large_pages_static_key); + #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) @@ -143,20 +146,20 @@ MODULE_PARM_DESC(large_page_conf, "User override for large page usage on support static void kbasep_mem_page_size_init(struct kbase_device *kbdev) { if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Large page support was disabled at compile-time!"); return; } switch (large_page_conf) { case LARGE_PAGE_AUTO: { - kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC)) + static_branch_enable(&large_pages_static_key); dev_info(kbdev->dev, "Large page allocation set to %s after hardware feature check", - kbdev->pagesize_2mb ? "true" : "false"); + static_branch_unlikely(&large_pages_static_key) ? "true" : "false"); break; } case LARGE_PAGE_ON: { - kbdev->pagesize_2mb = true; + static_branch_enable(&large_pages_static_key); if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC)) dev_warn(kbdev->dev, "Enabling large page allocations on unsupporting GPU!"); @@ -165,12 +168,10 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev) break; } case LARGE_PAGE_OFF: { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Large page allocation override: turned off\n"); break; } default: { - kbdev->pagesize_2mb = false; dev_info(kbdev->dev, "Invalid large page override, turning off large pages\n"); break; } @@ -180,12 +181,18 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev) * so that userspace could read it to figure out the state of the configuration * if necessary. */ - if (kbdev->pagesize_2mb) + if (static_branch_unlikely(&large_pages_static_key)) large_page_conf = LARGE_PAGE_ON; else large_page_conf = LARGE_PAGE_OFF; } +inline bool kbase_is_large_pages_enabled(void) +{ + return static_branch_unlikely(&large_pages_static_key); +} +KBASE_EXPORT_TEST_API(kbase_is_large_pages_enabled); + int kbase_mem_init(struct kbase_device *kbdev) { int err = 0; @@ -672,7 +679,9 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa, dma_addr_t dma_addr; WARN_ON(!cpu_page); - WARN_ON((size_t)offset + size > PAGE_SIZE); + + if ((size_t)offset + size > PAGE_SIZE) + dev_warn(kctx->kbdev->dev, "Size and offset exceed page size"); dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + (dma_addr_t)offset; @@ -942,7 +951,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) __func__); return -EINVAL; } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (gpu_addr >= BASE_MEM_COOKIE_BASE && gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { unsigned int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); @@ -981,7 +990,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return err; } @@ -1156,7 +1165,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa /* Check if we have enough pages requested so we can allocate a large * page (512 * 4KB = 2MB ) */ - if (kbdev->pagesize_2mb && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) { size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE; res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], @@ -1307,6 +1316,7 @@ alloc_failed: invalid_request: return -ENOMEM; } +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages_helper); static size_t free_partial_locked(struct kbase_context *kctx, struct kbase_mem_pool *pool, struct tagged_addr tp) @@ -1363,7 +1373,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all kctx = alloc->imported.native.kctx; kbdev = kctx->kbdev; - if (!kbdev->pagesize_2mb) + if (!kbase_is_large_pages_enabled()) WARN_ON(pool->order); if (alloc->reg) { @@ -1386,7 +1396,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all tp = alloc->pages + alloc->nents; new_pages = tp; - if (kbdev->pagesize_2mb && pool->order) { + if (kbase_is_large_pages_enabled() && pool->order) { size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE; res = kbase_mem_pool_alloc_pages_locked(pool, nr_lp * NUM_PAGES_IN_2MB_LARGE_PAGE, @@ -1503,7 +1513,7 @@ alloc_failed: struct tagged_addr *start_free = alloc->pages + alloc->nents; - if (kbdev->pagesize_2mb && pool->order) { + if (kbase_is_large_pages_enabled() && pool->order) { while (nr_pages_to_free) { if (is_huge_head(*start_free)) { kbase_mem_pool_free_pages_locked( @@ -1659,6 +1669,7 @@ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pag return 0; } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper); void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, struct tagged_addr *pages, @@ -2156,17 +2167,31 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx != NULL); mutex_lock(&kctx->reg_lock); } - KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); +void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx) +{ +#if MALI_USE_CSF + down_read(&kctx->kbdev->csf.pmode_sync_sem); +#endif + kbase_gpu_vm_lock(kctx); +} + void kbase_gpu_vm_unlock(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); mutex_unlock(&kctx->reg_lock); } - KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); +void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx) +{ + kbase_gpu_vm_unlock(kctx); +#if MALI_USE_CSF + up_read(&kctx->kbdev->csf.pmode_sync_sem); +#endif +} + #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_jit_debugfs_data { int (*func)(struct kbase_jit_debugfs_data *data); @@ -2708,7 +2733,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo delta = info->commit_pages - reg->gpu_alloc->nents; pages_required = delta; - if (kctx->kbdev->pagesize_2mb && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ pages_required += (NUM_PAGES_IN_2MB_LARGE_PAGE - 1); @@ -2746,10 +2771,10 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo kbase_mem_pool_lock(pool); } - if (reg->gpu_alloc->nents > info->commit_pages) { + if (reg->gpu_alloc->nents >= info->commit_pages) { kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); - dev_warn( + dev_info( kctx->kbdev->dev, "JIT alloc grown beyond the required number of initially required pages, this grow no longer needed."); goto done; @@ -2999,7 +3024,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* Preallocate memory for the sub-allocation structs */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); @@ -3008,7 +3033,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); mutex_lock(&kctx->jit_evict_lock); /* @@ -3086,7 +3111,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_jit_done_phys_increase(kctx, needed_pages); #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); if (ret) { /* @@ -3147,7 +3172,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, &flags, &gpu_addr, mmu_sync_info); @@ -3249,9 +3274,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) u64 delta = old_pages - new_size; if (delta) { - mutex_lock(&kctx->reg_lock); + kbase_gpu_vm_lock_with_pmode_sync(kctx); kbase_mem_shrink(kctx, reg, old_pages - delta); - mutex_unlock(&kctx->reg_lock); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); } } @@ -3356,8 +3381,7 @@ void kbase_jit_term(struct kbase_context *kctx) struct kbase_va_region *walker; /* Free all allocations for this context */ - - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); mutex_lock(&kctx->jit_evict_lock); /* Free all allocations from the pool */ while (!list_empty(&kctx->jit_pool_head)) { @@ -3398,7 +3422,7 @@ void kbase_jit_term(struct kbase_context *kctx) WARN_ON(kctx->jit_phys_pages_to_be_allocated); #endif mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); /* * Flush the freeing of allocations whose backing has been freed diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 6277814..d4c3aee 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1409,11 +1409,29 @@ int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region void kbase_gpu_vm_lock(struct kbase_context *kctx); /** + * kbase_gpu_vm_lock_with_pmode_sync() - Wrapper of kbase_gpu_vm_lock. + * @kctx: KBase context + * + * Same as kbase_gpu_vm_lock for JM GPU. + * Additionally acquire P.mode read-write semaphore for CSF GPU. + */ +void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx); + +/** * kbase_gpu_vm_unlock() - Release the per-context region list lock * @kctx: KBase context */ void kbase_gpu_vm_unlock(struct kbase_context *kctx); +/** + * kbase_gpu_vm_unlock_with_pmode_sync() - Wrapper of kbase_gpu_vm_unlock. + * @kctx: KBase context + * + * Same as kbase_gpu_vm_unlock for JM GPU. + * Additionally release P.mode read-write semaphore for CSF GPU. + */ +void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx); + int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); /** @@ -1651,7 +1669,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * * @prealloc_sa: Information about the partial allocation if the amount of memory requested * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be - * allocated by the caller if kbdev->pagesize_2mb is enabled. + * allocated by the caller if large pages are enabled. * * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be @@ -1679,9 +1697,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa * This ensures that the pool can be grown to the required size and that the allocation can * complete without another thread using the newly grown pages. * - * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the - * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the - * mempools from alloc->imported.native.kctx->mem_pools.small[]. + * If large (2MiB) pages are enabled and the allocation is >= 2MiB, then @pool + * must be one of the pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it + * must be one of the mempools from alloc->imported.native.kctx->mem_pools.small[]. * * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa @@ -2595,4 +2613,7 @@ static inline base_mem_alloc_flags kbase_mem_group_id_set(int id) { return BASE_MEM_GROUP_ID_SET(id); } + +bool kbase_is_large_pages_enabled(void); + #endif /* _KBASE_MEM_H_ */ diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 6838fba..7801441 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,6 +46,7 @@ #include <mali_kbase_caps.h> #include <mali_kbase_trace_gpu_mem.h> #include <mali_kbase_reset_gpu.h> +#include <linux/version_compat_defs.h> #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) @@ -433,7 +434,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } reg->initial_commit = commit_pages; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { /* Permanent kernel mappings must happen as soon as @@ -443,7 +444,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages */ int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, commit_pages); if (err < 0) { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_kern_mapping; } } @@ -455,7 +456,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /* Bind to a cookie */ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { dev_err(dev, "No cookies available for allocation!"); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_cookie; } /* return a cookie */ @@ -472,7 +473,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } else /* we control the VA */ { size_t align = 1; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* If there's enough (> 33 bits) of GPU VA space, align to 2MB * boundaries. The similar condition is used for mapping from * the SAME_VA zone inside kbase_context_get_unmapped_area(). @@ -490,7 +491,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); goto no_mmap; } /* return real GPU VA */ @@ -508,7 +509,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); #if MALI_USE_CSF if (*flags & BASE_MEM_FIXABLE) @@ -596,8 +597,10 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co *out |= BASE_MEM_COHERENT_SYSTEM; if (KBASE_REG_SHARE_IN & reg->flags) *out |= BASE_MEM_COHERENT_LOCAL; - if (KBASE_REG_DONT_NEED & reg->flags) - *out |= BASE_MEM_DONT_NEED; + if (mali_kbase_supports_mem_dont_need(kctx->api_version)) { + if (KBASE_REG_DONT_NEED & reg->flags) + *out |= BASE_MEM_DONT_NEED; + } if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { /* Prior to this version, this was known about by * user-side but we did not return them. Returning @@ -634,9 +637,19 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co else *out |= BASE_MEM_FIXABLE; } -#endif +#endif /* MALI_USE_CSF */ if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + if (mali_kbase_supports_mem_import_sync_on_map_unmap(kctx->api_version)) { + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + if (reg->gpu_alloc->imported.umm.need_sync) + *out |= BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP; + } + } + if (mali_kbase_supports_mem_kernel_sync(kctx->api_version)) { + if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) + *out |= BASE_MEM_KERNEL_SYNC; + } *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); @@ -667,7 +680,9 @@ out_unlock: static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); + struct kbase_context *kctx = + KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim); + int evict_nents = atomic_read(&kctx->evict_nents); unsigned long nr_freeable_items; @@ -717,8 +732,15 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s struct kbase_mem_phy_alloc *tmp; unsigned long freed = 0; - kctx = container_of(s, struct kbase_context, reclaim); + kctx = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim); +#if MALI_USE_CSF + if (!down_read_trylock(&kctx->kbdev->csf.pmode_sync_sem)) { + dev_warn(kctx->kbdev->dev, + "Can't shrink GPU memory when P.Mode entrance is in progress"); + return 0; + } +#endif mutex_lock(&kctx->jit_evict_lock); list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { @@ -757,32 +779,36 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s } mutex_unlock(&kctx->jit_evict_lock); - +#if MALI_USE_CSF + up_read(&kctx->kbdev->csf.pmode_sync_sem); +#endif return freed; } int kbase_mem_evictable_init(struct kbase_context *kctx) { + struct shrinker *reclaim; + INIT_LIST_HEAD(&kctx->evict_list); mutex_init(&kctx->jit_evict_lock); - kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; - kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; - kctx->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch - */ -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(&kctx->reclaim); -#else - register_shrinker(&kctx->reclaim, "mali-mem"); -#endif + reclaim = KBASE_INIT_RECLAIM(kctx, reclaim, "mali-mem"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(kctx, reclaim, reclaim); + + reclaim->count_objects = kbase_mem_evictable_reclaim_count_objects; + reclaim->scan_objects = kbase_mem_evictable_reclaim_scan_objects; + reclaim->seeks = DEFAULT_SEEKS; + + KBASE_REGISTER_SHRINKER(reclaim, "mali-mem", kctx); + return 0; } void kbase_mem_evictable_deinit(struct kbase_context *kctx) { - unregister_shrinker(&kctx->reclaim); + KBASE_UNREGISTER_SHRINKER(kctx->reclaim); } /** @@ -1058,7 +1084,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in /* Lock down the context, and find the region */ down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); @@ -1110,7 +1136,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); up_write(kbase_mem_get_process_mmap_lock()); return ret; @@ -1791,7 +1817,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent if (!reg->gpu_alloc->imported.alias.aliased) goto no_aliased_array; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* validate and add src handles */ for (i = 0; i < nents; i++) { @@ -1901,7 +1927,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent reg->flags &= ~KBASE_REG_FREE; reg->flags &= ~KBASE_REG_GROWABLE; - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return gpu_va; @@ -1912,7 +1938,7 @@ bad_handle: * them is handled by putting reg's allocs, so no rollback of those * actions is done here. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); no_aliased_array: invalid_flags: kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -2013,7 +2039,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, if (!reg) goto no_reg; - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* mmap needed to setup VA? */ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { @@ -2047,13 +2073,13 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* clear out private flags */ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return 0; no_gpu_va: no_cookie: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); kfree(reg); @@ -2139,7 +2165,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) } down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); @@ -2247,7 +2273,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) } out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); if (read_locked) up_read(kbase_mem_get_process_mmap_lock()); else @@ -2271,11 +2297,16 @@ int kbase_mem_shrink(struct kbase_context *const kctx, struct kbase_va_region *c return -EINVAL; old_pages = kbase_reg_current_backed_size(reg); - if (WARN_ON(old_pages < new_pages)) + if (old_pages < new_pages) { + dev_warn( + kctx->kbdev->dev, + "Requested number of pages (%llu) is larger than the current number of pages (%llu)", + new_pages, old_pages); return -EINVAL; + } delta = old_pages - new_pages; - if (kctx->kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages; /* Move the end of new commited range to a valid location. @@ -2329,7 +2360,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) KBASE_DEBUG_ASSERT(map->kctx); KBASE_DEBUG_ASSERT(map->alloc); - kbase_gpu_vm_lock(map->kctx); + kbase_gpu_vm_lock_with_pmode_sync(map->kctx); if (map->free_on_close) { KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE); @@ -2343,7 +2374,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) list_del(&map->mappings_list); kbase_va_region_alloc_put(map->kctx, map->region); - kbase_gpu_vm_unlock(map->kctx); + kbase_gpu_vm_unlock_with_pmode_sync(map->kctx); kbase_mem_phy_alloc_put(map->alloc); kbase_file_dec_cpu_mapping_count(map->kctx->kfile); @@ -2746,7 +2777,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * goto out; } - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { /* The non-mapped tracking helper page */ @@ -2781,11 +2812,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); goto out; case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE)... PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); mutex_lock(&kctx->csf.lock); err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); mutex_unlock(&kctx->csf.lock); @@ -2879,7 +2910,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct * } #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); out: if (err) dev_err(dev, "mmap failed %d\n", err); diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h index 2866603..037bdfe 100644 --- a/mali_kbase/mali_kbase_mem_linux.h +++ b/mali_kbase/mali_kbase_mem_linux.h @@ -57,6 +57,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /** * kbase_mem_query - Query properties of a GPU memory region * + * Note: Does not currently report the BASE_MEM_SAME_VA flag for any memory allocation. + * * @kctx: The kernel context * @gpu_addr: A GPU address contained within the memory region * @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c index dbd340a..26ddeed 100644 --- a/mali_kbase/mali_kbase_mem_migrate.c +++ b/mali_kbase/mali_kbase_mem_migrate.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,9 @@ #include <mali_kbase_mem_migrate.h> #include <mmu/mali_kbase_mmu.h> +/* Static key used to determine if page migration is enabled or not */ +static DEFINE_STATIC_KEY_FALSE(page_migration_static_key); + /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. * Feature is disabled on all platforms by default. @@ -50,15 +53,6 @@ MODULE_PARM_DESC(kbase_page_migration_enabled, KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); -bool kbase_is_page_migration_enabled(void) -{ - /* Handle uninitialised int case */ - if (kbase_page_migration_enabled < 0) - return false; - return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled; -} -KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled); - #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) static const struct movable_operations movable_ops; #endif @@ -225,7 +219,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new * This blocks the CPU page fault handler from remapping pages. * Only MCU's mmut is device wide, i.e. no corresponding kctx. */ - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); ret = kbase_mmu_migrate_page( as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, @@ -252,7 +246,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); /* Page fault handler for CPU mapping unblocked. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -291,7 +285,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa /* Lock context to protect access to array of pages in physical allocation. * This blocks the CPU page fault handler from remapping pages. */ - kbase_gpu_vm_lock(kctx); + kbase_gpu_vm_lock_with_pmode_sync(kctx); /* Unmap the old physical range. */ unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, @@ -328,7 +322,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); /* Page fault handler for CPU mapping unblocked. */ - kbase_gpu_vm_unlock(kctx); + kbase_gpu_vm_unlock_with_pmode_sync(kctx); return ret; } @@ -679,11 +673,15 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) * integer for a negative value to see if insmod parameter was * passed in at all (it will override the default negative value). */ - if (kbase_page_migration_enabled < 0) - kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0; - else + if (kbase_page_migration_enabled < 0) { + if (kbase_is_large_pages_enabled()) + static_branch_enable(&page_migration_static_key); + } else { dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.", kbase_page_migration_enabled ? "enabled" : "disabled"); + if (kbase_page_migration_enabled) + static_branch_enable(&page_migration_static_key); + } spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); @@ -708,3 +706,9 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev) iput(mem_migrate->inode); #endif } + +bool kbase_is_page_migration_enabled(void) +{ + return static_branch_unlikely(&page_migration_static_key); +} +KBASE_EXPORT_TEST_API(kbase_is_page_migration_enabled); diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h index ece8734..70c3135 100644 --- a/mali_kbase/mali_kbase_mem_migrate.h +++ b/mali_kbase/mali_kbase_mem_migrate.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index cb862d5..5984730 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -480,7 +480,7 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, CSTD_UNUSED(sc); - pool = container_of(s, struct kbase_mem_pool, reclaim); + pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { @@ -502,7 +502,7 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, struct kbase_mem_pool *pool; unsigned long freed; - pool = container_of(s, struct kbase_mem_pool, reclaim); + pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim); kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { @@ -528,6 +528,8 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool unsigned int order, int group_id, struct kbase_device *kbdev, struct kbase_mem_pool *next_pool) { + struct shrinker *reclaim; + if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { return -EINVAL; } @@ -544,18 +546,17 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool spin_lock_init(&pool->pool_lock); INIT_LIST_HEAD(&pool->page_list); - pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; - pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; - pool->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch - */ - pool->reclaim.batch = 0; -#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE - register_shrinker(&pool->reclaim); -#else - register_shrinker(&pool->reclaim, "mali-mem-pool"); -#endif + reclaim = KBASE_INIT_RECLAIM(pool, reclaim, "mali-mem-pool"); + if (!reclaim) + return -ENOMEM; + KBASE_SET_RECLAIM(pool, reclaim, reclaim); + + reclaim->count_objects = kbase_mem_pool_reclaim_count_objects; + reclaim->scan_objects = kbase_mem_pool_reclaim_scan_objects; + reclaim->seeks = DEFAULT_SEEKS; + reclaim->batch = 0; + + KBASE_REGISTER_SHRINKER(reclaim, "mali-mem-pool", pool); pool_dbg(pool, "initialized\n"); @@ -581,7 +582,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) pool_dbg(pool, "terminate()\n"); - unregister_shrinker(&pool->reclaim); + KBASE_UNREGISTER_SHRINKER(pool->reclaim); kbase_mem_pool_lock(pool); pool->max_size = 0; diff --git a/mali_kbase/mali_kbase_native_mgm.c b/mali_kbase/mali_kbase_native_mgm.c index 5e3d1ee..d688509 100644 --- a/mali_kbase/mali_kbase_native_mgm.c +++ b/mali_kbase/mali_kbase_native_mgm.c @@ -121,44 +121,20 @@ static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(struct memory_group_manag return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); } -/** - * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table - * entry - * - * @mgm_dev: The memory group manager the request is being made through. - * @group_id: A physical memory group ID, which must be valid but is not used. - * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. - * @mmu_level: The level of the MMU page table where the page is getting mapped. - * @pte: The prepared page table entry. - * - * This function simply returns the @pte without modification. - * - * Return: A GPU page table entry to be stored in a page table. - */ static u64 kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id, int mmu_level, u64 pte) { - CSTD_UNUSED(mgm_dev); - CSTD_UNUSED(group_id); - CSTD_UNUSED(mmu_level); + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return pte; + + pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK; + + /* Address could be translated into a different bus address here */ + pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); return pte; } -/** - * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in - * kbase_native_mgm_update_gpu_pte() - * - * @mgm_dev: The memory group manager the request is being made through. - * @group_id: A physical memory group ID, which must be valid but is not used. - * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. - * @mmu_level: The level of the MMU page table where the page is getting mapped. - * @pte: The prepared page table entry. - * - * This function simply returns the @pte without modification. - * - * Return: A GPU page table entry to be stored in a page table. - */ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id, int mmu_level, u64 pte) { @@ -166,6 +142,11 @@ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_devi CSTD_UNUSED(group_id); CSTD_UNUSED(mmu_level); + /* Undo the group ID modification */ + pte &= ~PTE_PBHA_MASK; + /* Undo the bit set */ + pte &= ~((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT); + return pte; } diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c index 341ea90..c5b6fad 100644 --- a/mali_kbase/mali_kbase_pbha.c +++ b/mali_kbase/mali_kbase_pbha.c @@ -277,16 +277,16 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { - u32 bits = 0; + u8 bits = 0; int err; if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) return 0; - err = of_property_read_u32(pbha_node, "propagate-bits", &bits); + err = of_property_read_u8(pbha_node, "propagate-bits", &bits); if (err == -EINVAL) { - err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + err = of_property_read_u8(pbha_node, "propagate_bits", &bits); } if (err < 0) { diff --git a/mali_kbase/mali_kbase_pbha_debugfs.c b/mali_kbase/mali_kbase_pbha_debugfs.c index f1d2794..8ab0d18 100644 --- a/mali_kbase/mali_kbase_pbha_debugfs.c +++ b/mali_kbase/mali_kbase_pbha_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,8 +50,8 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data) #endif /* MALI_USE_CSF */ for (j = 0; j < sizeof(u32); ++j) { - u8 r_val; - u8 w_val; + u8 r_val = 0; + u8 w_val = 0; switch (j) { case 0: diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index ff71524..c17e302 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -200,19 +200,24 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) mutex_unlock(&kbdev->pm.lock); #ifdef CONFIG_MALI_ARBITER_SUPPORT -#if !MALI_USE_CSF if (kbdev->arb.arb_if) { - unsigned int i; unsigned long flags; +#if MALI_USE_CSF + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_disjoint_state_up(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + unsigned int i; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->js_data.runpool_irq.submit_allowed = 0; kbase_disjoint_state_up(kbdev); for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_softstop(kbdev, i, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif } -#endif /* !MALI_USE_CSF */ #endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* From now on, the active count will drop towards zero. Sometimes, diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index cc4de07..2783e04 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -776,7 +776,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg return false; } - if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) { + if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) { root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; *grow_2mb_pool = true; } else { @@ -923,7 +923,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) int err; bool grown = false; size_t pages_to_grow; - bool grow_2mb_pool; + bool grow_2mb_pool = false; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; size_t current_backed_size; @@ -1093,7 +1093,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) } page_fault_retry: - if (kbdev->pagesize_2mb) { + if (kbase_is_large_pages_enabled()) { /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { if (!prealloc_sas[i]) { @@ -1180,10 +1180,14 @@ page_fault_retry: */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; - /* Can safely skip the invalidate for all levels in case - * of duplicate page faults. + /* Usually it is safe to skip the MMU cache invalidate for all levels + * in case of duplicate page faults. But for the pathological scenario + * where the faulty VA gets mapped by the time page fault worker runs it + * becomes imperative to invalidate MMU cache for all levels, otherwise + * there is a possibility of repeated page faults on GPUs which supports + * fine grained MMU cache invalidation. */ - op_param.flush_skip_levels = 0xF; + op_param.flush_skip_levels = 0x0; op_param.vpfn = fault_pfn; op_param.nr = 1; spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); @@ -1217,10 +1221,14 @@ page_fault_retry: /* See comment [1] about UNLOCK usage */ op_param.mmu_sync_info = mmu_sync_info; op_param.kctx_id = kctx->id; - /* Can safely skip the invalidate for all levels in case - * of duplicate page faults. + /* Usually it is safe to skip the MMU cache invalidate for all levels + * in case of duplicate page faults. But for the pathological scenario + * where the faulty VA gets mapped by the time page fault worker runs it + * becomes imperative to invalidate MMU cache for all levels, otherwise + * there is a possibility of repeated page faults on GPUs which supports + * fine grained MMU cache invalidation. */ - op_param.flush_skip_levels = 0xF; + op_param.flush_skip_levels = 0x0; op_param.vpfn = fault_pfn; op_param.nr = 1; spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); @@ -1382,7 +1390,7 @@ page_fault_retry: * Otherwise fail the allocation. */ if (pages_to_grow > 0) { - if (kbdev->pagesize_2mb && grow_2mb_pool) { + if (kbase_is_large_pages_enabled() && grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = &kctx->mem_pools.large[group_id]; @@ -1595,6 +1603,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu return err; } +KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO); /* * On success, sets out_pgd to the PGD for the specified level of translation @@ -1700,8 +1709,16 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode->entries_invalidate(&page[idx], pcount); if (!num_of_valid_entries) { + mmu_mode->set_num_valid_entries(page, 0); + kbase_kunmap(p, page); + /* No CPU and GPU cache maintenance is done here as caller would do the + * complete flush of GPU cache and invalidation of TLB before the PGD + * page is freed. CPU cache flush would be done when the PGD page is + * returned to the memory pool. + */ + kbase_mmu_add_to_free_pgds_list(mmut, p); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, @@ -1728,7 +1745,8 @@ next: * going to happen to these pages at this stage. They might return * movable once they are returned to a memory pool. */ - if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) { + if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys && + !is_huge(*phys) && !is_partial(*phys)) { const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE; u64 i; @@ -2525,6 +2543,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO); int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, @@ -2582,6 +2601,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ return 0; } +KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO); void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) { @@ -2720,15 +2740,25 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { + kbdev->mmu_mode->set_num_valid_entries(current_page, 0); + kbase_kunmap(p, current_page); - /* Ensure the cacheline containing the last valid entry - * of PGD is invalidated from the GPU cache, before the - * PGD page is freed. - */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, - current_pgd + (index * sizeof(u64)), sizeof(u64), - flush_op); + /* Check if fine grained GPU cache maintenance is being used */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) { + /* Ensure the invalidated PTE is visible in memory right away */ + kbase_mmu_sync_pgd_cpu(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + sizeof(u64)); + /* Invalidate the GPU cache for the whole PGD page and not just for + * the cacheline containing the invalidated PTE, as the PGD page is + * going to be freed. There is an extremely remote possibility that + * other cachelines (containing all invalid PTEs) of PGD page are + * also present in the GPU cache. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd, + 512 * sizeof(u64), KBASE_MMU_OP_FLUSH_PT); + } kbase_mmu_add_to_free_pgds_list(mmut, p); } else { @@ -2832,8 +2862,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase phys_addr_t pgd = mmut->pgd; struct page *p = phys_to_page(pgd); - if (count > nr) - count = nr; + count = MIN(nr, count); /* need to check if this is a 2MB page or a small page */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { @@ -2844,23 +2873,12 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { - /* nothing here, advance */ - switch (level) { - case MIDGARD_MMU_LEVEL(0): - count = 134217728; - break; - case MIDGARD_MMU_LEVEL(1): - count = 262144; - break; - case MIDGARD_MMU_LEVEL(2): - count = 512; - break; - case MIDGARD_MMU_LEVEL(3): - count = 1; - break; - } - if (count > nr) - count = nr; + dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx", + level, vpfn << PAGE_SHIFT); + /* nothing here, advance to the next PTE of the current level */ + count = (1 << ((3 - level) * 9)); + count -= (vpfn & (count - 1)); + count = MIN(nr, count); goto next; } next_pgd = mmu_mode->pte_to_phy_addr( @@ -2915,14 +2933,25 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase mmu_mode->entries_invalidate(&page[index], pcount); if (!num_of_valid_entries) { + mmu_mode->set_num_valid_entries(page, 0); + kbase_kunmap(p, page); - /* Ensure the cacheline(s) containing the last valid entries - * of PGD is invalidated from the GPU cache, before the - * PGD page is freed. - */ - kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - pcount * sizeof(u64), flush_op); + /* Check if fine grained GPU cache maintenance is being used */ + if (flush_op == KBASE_MMU_OP_FLUSH_PT) { + /* Ensure the invalidated ATEs are visible in memory right away */ + kbase_mmu_sync_pgd_cpu(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + pcount * sizeof(u64)); + /* Invalidate the GPU cache for the whole PGD page and not just for + * the cachelines containing the invalidated ATEs, as the PGD page + * is going to be freed. There is an extremely remote possibility + * that other cachelines (containing all invalid ATEs) of PGD page + * are also present in the GPU cache. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + } kbase_mmu_add_to_free_pgds_list(mmut, p); @@ -3069,6 +3098,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, false); } +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c index d19579d..e3ad78d 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c @@ -32,7 +32,7 @@ */ #define ENTRY_IS_ATE_L3 3ULL #define ENTRY_IS_ATE_L02 1ULL -#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_INVAL 0ULL #define ENTRY_IS_PTE 3ULL #define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c index 7c92505..9e3f789 100644 --- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c +++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/tests/include/kutf/kutf_kprobe.h b/mali_kbase/tests/include/kutf/kutf_kprobe.h index f75cd77..cdcaa46 100644 --- a/mali_kbase/tests/include/kutf/kutf_kprobe.h +++ b/mali_kbase/tests/include/kutf/kutf_kprobe.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #ifndef _KUTF_KPROBE_H_ #define _KUTF_KPROBE_H_ +struct dentry; + int kutf_kprobe_init(struct dentry *base_dir); void kutf_kprobe_exit(void); diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c index 1592eab..9fad54d 100644 --- a/mali_kbase/thirdparty/mali_kbase_mmap.c +++ b/mali_kbase/thirdparty/mali_kbase_mmap.c @@ -20,18 +20,84 @@ * kbase_context_get_unmapped_area() interface. */ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) +/** + * move_mt_gap() - Search the maple tree for an existing gap of a particular size + * immediately before another pre-identified gap. + * @gap_start: Pre-identified gap starting address. + * @gap_end: Pre-identified gap ending address. + * @size: Size of the new gap needed before gap_start. + * + * This function will search the calling process' maple tree + * for another gap, one that is immediately preceding the pre-identified + * gap, for a specific size, and upon success it will decrement gap_end + * by the specified size, and replace gap_start with the new gap_start of + * the newly identified gap. + * + * Return: true if large enough preceding gap is found, false otherwise. + */ +static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size) +{ + unsigned long new_gap_start, new_gap_end; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + + if (*gap_end < size) + return false; + + /* Calculate the gap end for the new, resultant gap */ + new_gap_end = *gap_end - size; + + /* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the + * pre-identified gap already has space to shrink to accommodate the decrease in + * gap_end. + */ + if (new_gap_end >= *gap_start) { + /* Pre-identified gap already has space - just patch gap_end to new + * lower value and exit. + */ + *gap_end = new_gap_end; + return true; + } + + /* Since the new VA start address (new_gap_end) is below the start of the pre-identified + * gap in the maple tree, see if there is a free gap directly before the existing gap, of + * the same size as the alignment shift, such that the effective gap found is "extended". + * This may be larger than needed but leaves the same distance between gap_end and gap_start + * that currently exists. + */ + new_gap_start = *gap_start - size; + if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) { + /* There's no gap between the new start address needed and the + * current start address - so return false to find a new + * gap from the maple tree. + */ + return false; + } + /* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the + * value of the start of new gap found, which now correctly precedes gap_end, and gap_end + * takes on the new aligned value that has now been decremented by the requested size. + */ + *gap_start = mas.index; + *gap_end = new_gap_end; + return true; +} + /** * align_and_check() - Align the specified pointer to the provided alignment and - * check that it is still in range. - * @gap_end: Highest possible start address for allocation (end of gap in - * address space) - * @gap_start: Start address of current memory area / gap in address space - * @info: vm_unmapped_area_info structure passed to caller, containing - * alignment, length and limits for the allocation - * @is_shader_code: True if the allocation is for shader code (which has - * additional alignment requirements) - * @is_same_4gb_page: True if the allocation needs to reside completely within - * a 4GB chunk + * check that it is still in range. On kernel 6.1 onwards + * this function does not require that the initial requested + * gap is extended with the maximum size needed to guarantee + * an alignment. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk * * Return: true if gap_end is now aligned correctly and is still in range, * false otherwise @@ -40,9 +106,94 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, struct vm_unmapped_area_info *info, bool is_shader_code, bool is_same_4gb_page) { + unsigned long alignment_shift; + /* Compute highest gap address at the desired alignment */ - (*gap_end) -= info->length; - (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + *gap_end -= info->length; + alignment_shift = (*gap_end - info->align_offset) & info->align_mask; + + /* Align desired start VA (gap_end) by calculated alignment shift amount */ + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + /* Alignment is done so far - check for further alignment requirements */ + + if (is_shader_code) { + /* Shader code allocations must not start or end on a 4GB boundary */ + alignment_shift = info->align_offset ? info->align_offset : info->length; + if (0 == (*gap_end & BASE_MEM_MASK_4GB)) { + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + } + if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) { + if (!move_mt_gap(&gap_start, gap_end, alignment_shift)) + return false; + } + + if (!(*gap_end & BASE_MEM_MASK_4GB) || + !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + return false; + } else if (is_same_4gb_page) { + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); + + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1); + + if (!move_mt_gap(&gap_start, gap_end, rounded_offset)) + return false; + /* Re-calculate start and end values */ + start = *gap_end; + end = *gap_end + info->length; + + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + WARN_ON((start & mask) != ((end - 1) & mask)); + } + } + + if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) + return false; + + return true; +} +#else +/** + * align_and_check() - Align the specified pointer to the provided alignment and + * check that it is still in range. For Kernel versions below + * 6.1, it requires that the length of the alignment is already + * extended by a worst-case alignment mask. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk + * + * Return: true if gap_end is now aligned correctly and is still in range, + * false otherwise + */ +static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) +{ + /* Compute highest gap address at the desired alignment */ + *gap_end -= info->length; + *gap_end -= (*gap_end - info->align_offset) & info->align_mask; if (is_shader_code) { /* Check for 4GB boundary */ @@ -73,6 +224,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, start -= rounded_offset; end -= rounded_offset; + /* Patch gap_end to use new starting address for VA region */ *gap_end = start; /* The preceding 4GB boundary shall not get straddled, @@ -89,6 +241,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, return true; } +#endif /** * kbase_unmapped_area_topdown() - allocates new areas top-down from @@ -218,31 +371,27 @@ check_current: } } #else - unsigned long length, high_limit, gap_start, gap_end; + unsigned long high_limit, gap_start, gap_end; MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); - /* Adjust search length to account for worst case alignment overhead */ - length = info->length + info->align_mask; - if (length < info->length) - return -ENOMEM; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). */ gap_end = info->high_limit; - if (gap_end < length) + if (gap_end < info->length) return -ENOMEM; - high_limit = gap_end - length; + high_limit = gap_end - info->length; if (info->low_limit > high_limit) return -ENOMEM; while (true) { - if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) + if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length)) return -ENOMEM; gap_end = mas.last + 1; - gap_start = mas.min; + gap_start = mas.index; if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) return gap_end; diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index 7427358..34cabbd 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index f5b5b39..dd23f97 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software |