diff options
83 files changed, 3226 insertions, 1619 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h index 335147c..c9b1f62 100644 --- a/common/include/linux/version_compat_defs.h +++ b/common/include/linux/version_compat_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,4 +48,32 @@ typedef unsigned int __poll_t; #endif +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE +/* This is defined inside kbase for matching the default to kernel's + * mmap_min_addr, used inside file mali_kbase_mmap.c. + * Note: the value is set at compile time, matching a kernel's configuration + * value. It would not be able to track any runtime update of mmap_min_addr. + */ +#ifdef CONFIG_MMU +#define kbase_mmap_min_addr CONFIG_DEFAULT_MMAP_MIN_ADDR + +#ifdef CONFIG_LSM_MMAP_MIN_ADDR +#if (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) +/* Replace the default definition with CONFIG_LSM_MMAP_MIN_ADDR */ +#undef kbase_mmap_min_addr +#define kbase_mmap_min_addr CONFIG_LSM_MMAP_MIN_ADDR +#pragma message "kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update!" +#endif /* (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) */ +#endif /* CONFIG_LSM_MMAP_MIN_ADDR */ + +#if (kbase_mmap_min_addr == CONFIG_DEFAULT_MMAP_MIN_ADDR) +#pragma message "kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update!" +#endif + +#else /* CONFIG_MMU */ +#define kbase_mmap_min_addr (0UL) +#pragma message "kbase_mmap_min_addr compiled to (0UL), no runtime update!" +#endif /* CONFIG_MMU */ +#endif /* KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE */ + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h index 69bc44c..c83cedd 100644 --- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h @@ -27,12 +27,10 @@ #define _UAPI_KBASE_MODEL_LINUX_H_ /* Generic model IRQs */ -enum model_linux_irqs { - MODEL_LINUX_JOB_IRQ, - MODEL_LINUX_GPU_IRQ, - MODEL_LINUX_MMU_IRQ, - MODEL_LINUX_NONE_IRQ, - MODEL_LINUX_NUM_TYPE_IRQ -}; +#define MODEL_LINUX_JOB_IRQ (0x1 << 0) +#define MODEL_LINUX_GPU_IRQ (0x1 << 1) +#define MODEL_LINUX_MMU_IRQ (0x1 << 2) + +#define MODEL_LINUX_IRQ_MASK (MODEL_LINUX_JOB_IRQ | MODEL_LINUX_GPU_IRQ | MODEL_LINUX_MMU_IRQ) #endif /* _UAPI_KBASE_MODEL_LINUX_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index ec8c02f..c18c6fc 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -145,6 +145,9 @@ #define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0) #define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG) +/* Initial value for LATEST_FLUSH register */ +#define POWER_DOWN_LATEST_FLUSH_VALUE ((uint32_t)1) + /** * enum base_kcpu_command_type - Kernel CPU queue command type. * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index 642ca34..7c37cfc 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -79,11 +79,13 @@ * - prfcnt_block_metadata::block_idx gaps. * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed. * 1.18: + * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE + * before allocating GPU memory for the context. * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 17 +#define BASE_UK_VERSION_MINOR 18 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h index 75ae6a1..0ca5d90 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -27,4 +27,15 @@ #define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) #define STATUS 0x004 /* (RO) Status register */ +/* USER base address */ +#define USER_BASE 0x0010000 +#define USER_REG(r) (USER_BASE + (r)) + +/* USER register offsets */ +#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ + +/* DOORBELLS base address */ +#define DOORBELLS_BASE 0x0080000 +#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r)) + #endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h index 87f849d..9bfd6d2 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -43,4 +43,8 @@ #define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + #endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h index cdfcf8d..1f33167 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -36,6 +36,9 @@ #define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + #define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ #define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ @@ -62,6 +65,7 @@ #define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ #define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ /* MMU control registers */ @@ -70,6 +74,9 @@ #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) #define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ #define MMU_AS0 0x400 /* Configuration registers for address space 0 */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 902d0ce..ac6affe 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -140,10 +140,12 @@ * - prfcnt_block_metadata::block_idx gaps. * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed. * 11.38: + * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE + * before allocating GPU memory for the context. * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 37 +#define BASE_UK_VERSION_MINOR 38 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 32b4d37..73375f6 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -69,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"r42p0-01eac0"' +MALI_RELEASE_NAME ?= '"r43p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_DEBUG), y) MALI_UNIT_TEST = 1 diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index 1c5e1f8..3d5a14a 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -115,21 +115,6 @@ config MALI_MIDGARD_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_MIDGARD && MALI_CSF_SUPPORT - default y - help - Adds ability to request firmware core dump through the "fw_core_dump" - debugfs file - - Example: - * To explicitly request core dump: - echo 1 > /sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_MIDGARD && !MALI_CSF_SUPPORT @@ -181,7 +166,19 @@ menuconfig MALI_EXPERT if MALI_EXPERT -config MALI_2MB_ALLOC +config LARGE_PAGE_ALLOC_OVERRIDE + bool "Override default setting of 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + An override config for LARGE_PAGE_ALLOC config. + When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be + enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be + enabled when GPU HW satisfies requirements. + + If in doubt, say N + +config LARGE_PAGE_ALLOC bool "Attempt to allocate 2MB pages" depends on MALI_MIDGARD && MALI_EXPERT default n @@ -190,6 +187,10 @@ config MALI_2MB_ALLOC allocate 2MB pages from the kernel. This reduces TLB pressure and helps to prevent memory fragmentation. + Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config + is enabled and enabling this on a GPU HW that does not satisfy + requirements can cause serious problem. + If in doubt, say N config MALI_MEMORY_FULLY_BACKED @@ -225,14 +226,6 @@ config MALI_ERROR_INJECT help Enables insertion of errors to test module failure and recovery mechanisms. -config MALI_GEM5_BUILD - bool "Enable build of Mali kernel driver for GEM5" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - This option is to do a Mali GEM5 build. - If unsure, say N. - comment "Debug options" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 5d88b14..5b3e99b 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -58,10 +58,7 @@ ifeq ($(CONFIG_MALI_MIDGARD),m) endif ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) - CONFIG_MALI_FW_CORE_DUMP ?= y CONFIG_MALI_CORESIGHT ?= n - else - CONFIG_MALI_FW_CORE_DUMP ?= n endif # @@ -101,7 +98,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m) else # Prevent misuse when CONFIG_MALI_EXPERT=n CONFIG_MALI_CORESTACK = n - CONFIG_MALI_2MB_ALLOC = n + CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n + CONFIG_LARGE_PAGE_ALLOC = n CONFIG_MALI_PWRSOFT_765 = n CONFIG_MALI_MEMORY_FULLY_BACKED = n CONFIG_MALI_JOB_DUMP = n @@ -143,7 +141,6 @@ else CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n - CONFIG_MALI_FW_CORE_DUMP = n endif # All Mali CONFIG should be listed here @@ -155,14 +152,14 @@ CONFIGS := \ CONFIG_MALI_ARBITRATION \ CONFIG_MALI_PARTITION_MANAGER \ CONFIG_MALI_REAL_HW \ - CONFIG_MALI_GEM5_BUILD \ CONFIG_MALI_DEVFREQ \ CONFIG_MALI_MIDGARD_DVFS \ CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ CONFIG_MALI_EXPERT \ CONFIG_MALI_CORESTACK \ - CONFIG_MALI_2MB_ALLOC \ + CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \ + CONFIG_LARGE_PAGE_ALLOC \ CONFIG_MALI_PWRSOFT_765 \ CONFIG_MALI_MEMORY_FULLY_BACKED \ CONFIG_MALI_JOB_DUMP \ @@ -183,7 +180,6 @@ CONFIGS := \ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ CONFIG_MALI_XEN \ - CONFIG_MALI_FW_CORE_DUMP \ CONFIG_MALI_CORESIGHT @@ -267,6 +263,12 @@ ifeq ($(CONFIG_GCOV_KERNEL),y) EXTRA_CFLAGS += -DGCOV_PROFILE=1 endif +ifeq ($(CONFIG_MALI_KCOV),y) + KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp) + EXTRA_CFLAGS += -DKCOV=1 + EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1 +endif + all: $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index d5b3067..f398d1a 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -117,21 +117,6 @@ config MALI_MIDGARD_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_FW_CORE_DUMP - bool "Enable support for FW core dump" - depends on MALI_MIDGARD && MALI_CSF_SUPPORT - default y - help - Adds ability to request firmware core dump through the "fw_core_dump" - debugfs file - - Example: - * To explicitly request core dump: - echo 1 > /sys/kernel/debug/mali0/fw_core_dump - * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump - config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_MIDGARD && !MALI_CSF_SUPPORT @@ -250,14 +235,6 @@ config MALI_ERROR_INJECT depends on MALI_MIDGARD && MALI_EXPERT default y if !MALI_ERROR_INJECT_NONE -config MALI_GEM5_BUILD - bool "Enable build of Mali kernel driver for GEM5" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - This option is to do a Mali GEM5 build. - If unsure, say N. - config MALI_DEBUG bool "Enable debug build" depends on MALI_MIDGARD && MALI_EXPERT @@ -275,6 +252,14 @@ config MALI_GCOV_KERNEL coverage information. When built against a supporting kernel, the coverage information will be available via debugfs. +config MALI_KCOV + bool "Enable kcov coverage to support fuzzers" + depends on MALI_MIDGARD && MALI_DEBUG + default n + help + Choose this option to enable building with fuzzing-oriented + coverage, to improve the random test cases that are generated. + config MALI_FENCE_DEBUG bool "Enable debug sync fence usage" depends on MALI_MIDGARD && MALI_EXPERT diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 9a17494..7df2173 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -190,6 +190,27 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) return jc; } +static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js, + struct kbase_context *kctx) +{ + const ktime_t wait_loop_start = ktime_get_raw(); + const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms; + s64 diff = 0; + + /* wait for the JS_COMMAND_NEXT register to reach the given status value */ + do { + if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT))) + return true; + + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while (diff < max_timeout); + + dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js, + kctx->tgid, kctx->id); + + return false; +} + int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js) { struct kbase_context *kctx; @@ -203,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, kctx = katom->kctx; /* Command register must be available */ - if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx), - "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx)) + if (!kbasep_jm_wait_js_free(kbdev, js, kctx)) return -EPERM; dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index e4cff1f..bfd55a6 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,14 +52,6 @@ static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string } #endif -#if !MALI_USE_CSF -static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js, - struct kbase_context *kctx) -{ - return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); -} -#endif - /** * kbase_job_hw_submit() - Submit a job to the GPU * @kbdev: Device pointer diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 388b37f..7db2b35 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1001,17 +1001,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) other_slots_busy(kbdev, js)) break; -#ifdef CONFIG_MALI_GEM5_BUILD - if (!kbasep_jm_is_js_free(kbdev, js, - katom[idx]->kctx)) - break; -#endif /* Check if this job needs the cycle counter * enabled before submission */ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) - kbase_pm_request_gpu_cycle_counter_l2_is_on( - kbdev); + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; @@ -1025,9 +1019,12 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) /* Inform platform at start/finish of atom */ kbasep_platform_event_atom_submit(katom[idx]); - } - else + } else { + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + break; + } /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index 9d5f15e..dd16fb2 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -2024,8 +2024,6 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, is_low_word); - } else if (addr == USER_REG(LATEST_FLUSH)) { - *value = 0; } #endif else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c index 75b1e7e..f310cc7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,14 +25,17 @@ static struct kbase_error_atom *error_track_list; -unsigned int rand_seed; +#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM + +/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) +#define prandom_u32 get_random_u32 +#endif /*following error probability are set quite high in order to stress the driver*/ -unsigned int error_probability = 50; /* to be set between 0 and 100 */ +static unsigned int error_probability = 50; /* to be set between 0 and 100 */ /* probability to have multiple error give that there is an error */ -unsigned int multiple_error_probability = 50; - -#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM +static unsigned int multiple_error_probability = 50; /* all the error conditions supported by the model */ #define TOTAL_FAULTS 27 diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c index b37680d..e90e4df 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -105,7 +105,7 @@ static void serve_mmu_irq(struct work_struct *work) kmem_cache_free(kbdev->irq_slab, data); } -void gpu_device_raise_irq(void *model, enum model_linux_irqs irq) +void gpu_device_raise_irq(void *model, u32 irq) { struct model_irq_data *data; struct kbase_device *kbdev = gpu_device_get_data(model); diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h index a24db17..4cf1235 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h @@ -124,7 +124,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value); * * This hook is global to the model Linux framework. */ -void gpu_device_raise_irq(void *model, enum model_linux_irqs irq); +void gpu_device_raise_irq(void *model, u32 irq); /** * gpu_device_set_data() - Private model set data function. diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index c51b133..0caf63e 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -2575,26 +2575,33 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); #if MALI_USE_CSF +/** + * update_user_reg_page_mapping - Update the mapping for USER Register page + * + * @kbdev: The kbase device structure for the device. + * + * This function must be called to unmap the dummy or real page from USER Register page + * mapping whenever GPU is powered up or down. The dummy or real page would get + * appropriately mapped in when Userspace reads the LATEST_FLUSH value. + */ static void update_user_reg_page_mapping(struct kbase_device *kbdev) { + struct kbase_context *kctx, *n; + lockdep_assert_held(&kbdev->pm.lock); mutex_lock(&kbdev->csf.reg_lock); - - /* Only if the mappings for USER page exist, update all PTEs associated to it */ - if (kbdev->csf.nr_user_page_mapped > 0) { - if (likely(kbdev->csf.mali_file_inode)) { - /* This would zap the pte corresponding to the mapping of User - * register page for all the Kbase contexts. - */ - unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, - BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1); - } else { - dev_err(kbdev->dev, - "Device file inode not exist even if USER page previously mapped"); - } + list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) { + /* This would zap the PTE corresponding to the mapping of User + * Register page of the kbase context. The mapping will be reestablished + * when the context (user process) needs to access to the page. + */ + unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping, + kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1); + list_del_init(&kctx->csf.user_reg.link); + dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid, + kctx->id); } - mutex_unlock(&kbdev->csf.reg_lock); } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 5110e3d..7a4d662 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #include <mali_kbase.h> #include <mali_kbase_hwaccess_time.h> #if MALI_USE_CSF +#include <asm/arch_timer.h> +#include <linux/gcd.h> #include <csf/mali_kbase_csf_timeout.h> #endif #include <device/mali_kbase_device.h> @@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, /* Only for debug messages, safe default in case it's mis-maintained */ const char *selector_str = "(unknown)"; - if (WARN(!kbdev->lowest_gpu_freq_khz, - "Lowest frequency uninitialized! Using reference frequency for scaling")) { + if (!kbdev->lowest_gpu_freq_khz) { + dev_dbg(kbdev->dev, + "Lowest frequency uninitialized! Using reference frequency for scaling"); freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; } else { freq_khz = kbdev->lowest_gpu_freq_khz; } switch (selector) { + case MMU_AS_INACTIVE_WAIT_TIMEOUT: + selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; + nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; + break; case KBASE_TIMEOUT_SELECTOR_COUNT: default: #if !MALI_USE_CSF WARN(1, "Invalid timeout selector used! Using default value"); nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; break; + case JM_DEFAULT_JS_FREE_TIMEOUT: + selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; + nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; + break; #else /* Use Firmware timeout if invalid selection */ WARN(1, @@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) return lo | (((u64) hi1) << 32); } + +#if MALI_USE_CSF +u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts) +{ + if (WARN_ON(!kbdev)) + return 0; + + return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) + + kbdev->backend_time.offset; +} + +/** + * get_cpu_gpu_time() - Get current CPU and GPU timestamps. + * + * @kbdev: Kbase device. + * @cpu_ts: Output CPU timestamp. + * @gpu_ts: Output GPU timestamp. + * @gpu_cycle: Output GPU cycle counts. + */ +static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle) +{ + struct timespec64 ts; + + kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); + + if (cpu_ts) + *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} +#endif + +int kbase_backend_time_init(struct kbase_device *kbdev) +{ +#if MALI_USE_CSF + u64 cpu_ts = 0; + u64 gpu_ts = 0; + u64 freq; + u64 common_factor; + + get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); + freq = arch_timer_get_cntfrq(); + + if (!freq) { + dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); + return -EINVAL; + } + + common_factor = gcd(NSEC_PER_SEC, freq); + + kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor); + kbdev->backend_time.divisor = div64_u64(freq, common_factor); + + if (!kbdev->backend_time.divisor) { + dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); + return -EINVAL; + } + + kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, + kbdev->backend_time.divisor); +#endif + + return 0; +} diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index a563058..4f475ab 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -62,8 +62,11 @@ bob_defaults { mali_dma_buf_legacy_compat: { kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], }, + large_page_alloc_override: { + kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"], + }, large_page_alloc: { - kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], + kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"], }, mali_memory_fully_backed: { kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], @@ -86,9 +89,6 @@ bob_defaults { mali_error_inject: { kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], }, - mali_gem5_build: { - kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], - }, mali_debug: { kbuild_options: [ "CONFIG_MALI_DEBUG=y", @@ -137,9 +137,6 @@ bob_defaults { platform_is_fpga: { kbuild_options: ["CONFIG_MALI_IS_FPGA=y"], }, - mali_fw_core_dump: { - kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"], - }, mali_coresight: { kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], }, @@ -194,6 +191,15 @@ bob_kernel_module { "platform/*/*.c", "platform/*/*.h", "platform/*/Kbuild", + "platform/*/*/*.c", + "platform/*/*/*.h", + "platform/*/*/Kbuild", + "platform/*/*/*.c", + "platform/*/*/*.h", + "platform/*/*/Kbuild", + "platform/*/*/*/*.c", + "platform/*/*/*/*.h", + "platform/*/*/*/Kbuild", "thirdparty/*.c", "thirdparty/Kbuild", "debug/*.c", diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index 792f724..b8036b8 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,12 @@ /* * Base kernel context APIs */ +#include <linux/version.h> +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include <linux/sched/task.h> +#else +#include <linux/sched.h> +#endif #include <mali_kbase.h> #include <gpu/mali_kbase_gpu_regmap.h> @@ -129,13 +135,51 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; + kctx->task = NULL; atomic_set(&kctx->nonmapped_pages, 0); atomic_set(&kctx->permanent_mapped_pages, 0); kctx->tgid = current->tgid; kctx->pid = current->pid; + /* Check if this is a Userspace created context */ + if (likely(kctx->filp)) { + struct pid *pid_struct; + + rcu_read_lock(); + pid_struct = find_get_pid(kctx->tgid); + if (likely(pid_struct)) { + struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); + + if (likely(task)) { + /* Take a reference on the task to avoid slow lookup + * later on from the page allocation loop. + */ + get_task_struct(task); + kctx->task = task; + } else { + dev_err(kctx->kbdev->dev, + "Failed to get task pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + + put_pid(pid_struct); + } else { + dev_err(kctx->kbdev->dev, + "Failed to get pid pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + rcu_read_unlock(); + + if (unlikely(err)) + return err; + + kbase_mem_mmgrab(); + kctx->process_mm = current->mm; + } + atomic_set(&kctx->used_pages, 0); mutex_init(&kctx->reg_lock); @@ -168,13 +212,16 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; mutex_lock(&kctx->kbdev->kctx_list_lock); - err = kbase_insert_kctx_to_process(kctx); - if (err) - dev_err(kctx->kbdev->dev, - "(err:%d) failed to insert kctx to kbase_process\n", err); - mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (err) { + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process", err); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + } return err; } @@ -260,6 +307,11 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); } diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h index a0c51c9..7c90e27 100644 --- a/mali_kbase/context/mali_kbase_context.h +++ b/mali_kbase/context/mali_kbase_context.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -93,6 +93,19 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx, } /** + * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate + * in compatibility mode for 32-bit userspace. + * @kctx: kbase context + * + * Return: True if needs to maintain compatibility, False otherwise. + */ +static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx) +{ + return !IS_ENABLED(CONFIG_64BIT) || + (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT)); +} + +/** * kbase_ctx_flag_clear - Clear @flag on @kctx * @kctx: Pointer to kbase context * @flag: Flag to clear diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index dbfcfde..88a3975 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -39,7 +39,9 @@ #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) -#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) + +#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */ +#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096) #define PROTM_ALLOC_MAX_RETRIES ((u8)5) @@ -73,6 +75,38 @@ struct irq_idle_and_protm_track { s8 idle_slot; }; +/** + * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page. + * + * @kctx: Pointer to the kbase context + */ +static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + + if (unlikely(kctx->csf.user_reg.vma)) + dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d", + kctx->tgid, kctx->id); + if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link))) + list_del_init(&kctx->csf.user_reg.link); +} + +/** + * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page. + * + * @kctx: Pointer to the kbase context + * + * @return: 0 on success. + */ +static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->csf.user_reg.link); + kctx->csf.user_reg.vma = NULL; + kctx->csf.user_reg.file_offset = 0; + + return 0; +} + static void put_user_pages_mmap_handle(struct kbase_context *kctx, struct kbase_queue *queue) { @@ -262,7 +296,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], KBASEP_NUM_CS_USER_IO_PAGES, - queue->phys, false); + queue->phys, false, kctx->task); if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { /* Marking both the phys to zero for indicating there is no phys allocated */ queue->phys[0].tagged_addr = 0; @@ -288,11 +322,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct queue->db_file_offset = kbdev->csf.db_file_offsets; kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); -#else - WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); -#endif + WARN(kbase_refcount_read(&queue->refcount) != 1, + "Incorrect refcounting for queue object\n"); /* This is the second reference taken on the queue object and * would be dropped only when the IO mapping is removed either * explicitly by userspace or implicitly by kernel on process exit. @@ -364,21 +395,13 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr) static void get_queue(struct kbase_queue *queue) { -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - WARN_ON(!atomic_inc_not_zero(&queue->refcount)); -#else - WARN_ON(!refcount_inc_not_zero(&queue->refcount)); -#endif + WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount)); } static void release_queue(struct kbase_queue *queue) { lockdep_assert_held(&queue->kctx->csf.lock); -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - if (atomic_dec_and_test(&queue->refcount)) { -#else - if (refcount_dec_and_test(&queue->refcount)) { -#endif + if (kbase_refcount_dec_and_test(&queue->refcount)) { /* The queue can't still be on the per context list. */ WARN_ON(!list_empty(&queue->link)); WARN_ON(queue->group); @@ -394,7 +417,7 @@ static void release_queue(struct kbase_queue *queue) * would free up the GPU queue memory. */ kbase_gpu_vm_lock(queue->kctx); - kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg); + kbase_va_region_no_user_free_dec(queue->queue_reg); kbase_gpu_vm_unlock(queue->kctx); kfree(queue); @@ -500,17 +523,16 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->kctx = kctx; queue->base_addr = queue_addr; - queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region); + + queue->queue_reg = region; + kbase_va_region_no_user_free_inc(region); + queue->size = (queue_size << PAGE_SHIFT); queue->csi_index = KBASEP_IF_NR_INVALID; queue->enabled = false; queue->priority = reg->priority; -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - atomic_set(&queue->refcount, 1); -#else - refcount_set(&queue->refcount, 1); -#endif + kbase_refcount_set(&queue->refcount, 1); queue->group = NULL; queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; @@ -567,6 +589,13 @@ out: int kbase_csf_queue_register(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_register *reg) { + /* Validate the ring buffer configuration parameters */ + if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || + reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || + reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || + reg->buffer_gpu_addr & ~PAGE_MASK) + return -EINVAL; + return csf_queue_register_internal(kctx, reg, NULL); } @@ -585,6 +614,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, if (glb_version < kbase_csf_interface_version(1, 1, 0)) return -EINVAL; + /* Validate the ring buffer configuration parameters */ + if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || + reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || + reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || + reg->buffer_gpu_addr & ~PAGE_MASK) + return -EINVAL; + /* Validate the cs_trace configuration parameters */ if (reg->ex_buffer_size && ((reg->ex_event_size > max_size) || @@ -904,6 +940,9 @@ static void unbind_stopped_queue(struct kbase_context *kctx, { lockdep_assert_held(&kctx->csf.lock); + if (WARN_ON(queue->csi_index < 0)) + return; + if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { unsigned long flags; @@ -917,6 +956,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx, kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); put_user_pages_mmap_handle(kctx, queue); + WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID); queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; } } @@ -1094,7 +1134,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, /* Get physical page for a normal suspend buffer */ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, - &s_buf->phy[0], false); + &s_buf->phy[0], false, kctx->task); if (err < 0) { kfree(s_buf->phy); @@ -1534,6 +1574,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, } KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST int kbase_csf_queue_group_suspend(struct kbase_context *kctx, struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle) @@ -1564,6 +1605,7 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, return err; } +#endif void kbase_csf_add_group_fatal_error( struct kbase_queue_group *const group, @@ -1632,8 +1674,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) kbase_csf_event_init(kctx); - kctx->csf.user_reg_vma = NULL; - /* Mark all the cookies as 'free' */ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); @@ -1653,7 +1693,14 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) mutex_init(&kctx->csf.lock); INIT_WORK(&kctx->csf.pending_submission_work, pending_submission_worker); - } else + + err = kbasep_ctx_user_reg_page_mapping_init(kctx); + + if (unlikely(err)) + kbase_csf_tiler_heap_context_term(kctx); + } + + if (unlikely(err)) kbase_csf_kcpu_queue_context_term(kctx); } @@ -1811,17 +1858,14 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * only one reference left that was taken when queue was * registered. */ -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - WARN_ON(atomic_read(&queue->refcount) != 1); -#else - WARN_ON(refcount_read(&queue->refcount) != 1); -#endif + WARN_ON(kbase_refcount_read(&queue->refcount) != 1); list_del_init(&queue->link); release_queue(queue); } mutex_unlock(&kctx->csf.lock); + kbasep_ctx_user_reg_page_mapping_term(kctx); kbase_csf_tiler_heap_context_term(kctx); kbase_csf_kcpu_queue_context_term(kctx); kbase_csf_scheduler_context_term(kctx); @@ -2736,6 +2780,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c if ((req ^ ack) & CSG_REQ_IDLE_MASK) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_IDLE_MASK); @@ -3149,12 +3196,12 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) struct file *filp; int ret; - filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE); + filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE); if (IS_ERR(filp)) return PTR_ERR(filp); ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, - false); + false, NULL); if (ret <= 0) { fput(filp); @@ -3170,29 +3217,34 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) { - if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) { - struct page *page = as_page(kbdev->csf.dummy_user_reg_page); + if (kbdev->csf.user_reg.filp) { + struct page *page = as_page(kbdev->csf.user_reg.dummy_page); - kbase_mem_pool_free( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, - false); + kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); + fput(kbdev->csf.user_reg.filp); } } int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) { struct tagged_addr phys; + struct file *filp; struct page *page; u32 *addr; - int ret; - kbdev->csf.dummy_user_reg_page = as_tagged(0); + kbdev->csf.user_reg.filp = NULL; - ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, - false); + filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE); + if (IS_ERR(filp)) { + dev_err(kbdev->dev, "failed to get an unlinked file for user_reg"); + return PTR_ERR(filp); + } - if (ret <= 0) - return ret; + if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, + false, NULL) <= 0) { + fput(filp); + return -ENOMEM; + } page = as_page(phys); addr = kmap_atomic(page); @@ -3202,12 +3254,13 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) */ addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE; - kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32), + kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32), DMA_BIDIRECTIONAL); kunmap_atomic(addr); - kbdev->csf.dummy_user_reg_page = phys; - + kbdev->csf.user_reg.filp = filp; + kbdev->csf.user_reg.dummy_page = phys; + kbdev->csf.user_reg.file_offset = 0; return 0; } diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index 9fbc932..dd947dc 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -274,6 +274,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, */ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST /** * kbase_csf_queue_group_suspend - Suspend a GPU command queue group * @@ -291,6 +292,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); */ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); +#endif /** * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index f1af1b9..f09544c 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #include <linux/wait.h> #include "mali_kbase_csf_firmware.h" +#include "mali_kbase_refcount_defs.h" #include "mali_kbase_csf_event.h" #include <uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h> @@ -269,6 +270,8 @@ enum kbase_queue_group_priority { * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond * to a ping from KBase. * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang. + * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion + * of a MMU operation * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in * the enum. */ @@ -280,6 +283,7 @@ enum kbase_timeout_selector { CSF_FIRMWARE_BOOT_TIMEOUT, CSF_FIRMWARE_PING_TIMEOUT, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, + MMU_AS_INACTIVE_WAIT_TIMEOUT, /* Must be the last in the enum */ KBASE_TIMEOUT_SELECTOR_COUNT @@ -387,11 +391,7 @@ struct kbase_queue { int doorbell_nr; unsigned long db_file_offset; struct list_head link; -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - atomic_t refcount; -#else - refcount_t refcount; -#endif + kbase_refcount_t refcount; struct kbase_queue_group *group; struct kbase_va_region *queue_reg; struct work_struct oom_event_work; @@ -779,6 +779,23 @@ struct kbase_csf_event { }; /** + * struct kbase_csf_user_reg_context - Object containing members to manage the mapping + * of USER Register page for a context. + * + * @vma: Pointer to the VMA corresponding to the virtual mapping + * of the USER register page. + * @file_offset: File offset value that is assigned to userspace mapping + * of the USER Register page. It is in page units. + * @link: Links the context to the device list when mapping is pointing to + * either the dummy or the real Register page. + */ +struct kbase_csf_user_reg_context { + struct vm_area_struct *vma; + u32 file_offset; + struct list_head link; +}; + +/** * struct kbase_csf_context - Object representing CSF for a GPU address space. * * @event_pages_head: A list of pages allocated for the event memory used by @@ -816,13 +833,11 @@ struct kbase_csf_event { * used by GPU command queues, and progress timeout events. * @link: Link to this csf context in the 'runnable_kctxs' list of * the scheduler instance - * @user_reg_vma: Pointer to the vma corresponding to the virtual mapping - * of the USER register page. Currently used only for sanity - * checking. * @sched: Object representing the scheduler's context * @pending_submission_work: Work item to process pending kicked GPU command queues. * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. + * @user_reg: Collective information to support mapping to USER Register page. */ struct kbase_csf_context { struct list_head event_pages_head; @@ -837,12 +852,12 @@ struct kbase_csf_context { struct kbase_csf_tiler_heap_context tiler_heaps; struct workqueue_struct *wq; struct list_head link; - struct vm_area_struct *user_reg_vma; struct kbase_csf_scheduler_context sched; struct work_struct pending_submission_work; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_cpu_queue_context cpu_queue; #endif + struct kbase_csf_user_reg_context user_reg; }; /** @@ -1427,6 +1442,37 @@ struct kbase_csf_dump_on_fault { #endif /* CONFIG_DEBUG_FS*/ /** + * struct kbase_csf_user_reg - Object containing members to manage the mapping + * of USER Register page for all contexts + * + * @dummy_page: Address of a dummy page that is mapped in place + * of the real USER Register page just before the GPU + * is powered down. The USER Register page is mapped + * in the address space of every process, that created + * a Base context, to enable the access to LATEST_FLUSH + * register from userspace. + * @filp: Pointer to a dummy file, that along with @file_offset, + * facilitates the use of unique file offset for the userspace mapping + * created for USER Register page. + * The userspace mapping is made to point to this file + * inside the mmap handler. + * @file_offset: Counter that is incremented every time Userspace creates a mapping of + * USER Register page, to provide a unique file offset range for + * @filp file, so that the CPU PTE of the Userspace mapping can be zapped + * through the kernel function unmap_mapping_range(). + * It is incremented in page units. + * @list: Linked list to maintain user processes(contexts) + * having the mapping to USER Register page. + * It's protected by &kbase_csf_device.reg_lock. + */ +struct kbase_csf_user_reg { + struct tagged_addr dummy_page; + struct file *filp; + u32 file_offset; + struct list_head list; +}; + +/** * struct kbase_csf_device - Object representing CSF for an instance of GPU * platform device. * @@ -1463,20 +1509,6 @@ struct kbase_csf_dump_on_fault { * of the real Hw doorbell page for the active GPU * command queues after they are stopped or after the * GPU is powered down. - * @dummy_user_reg_page: Address of the dummy page that is mapped in place - * of the real User register page just before the GPU - * is powered down. The User register page is mapped - * in the address space of every process, that created - * a Base context, to enable the access to LATEST_FLUSH - * register from userspace. - * @nr_user_page_mapped: The number of clients using the mapping of USER page. - * This is used to maintain backward compatibility. - * It's protected by @reg_lock. - * @mali_file_inode: Pointer to the inode corresponding to mali device - * file. This is needed in order to switch to the - * @dummy_user_reg_page on GPU power down. - * All instances of the mali device file will point to - * the same inode. It's protected by @reg_lock. * @reg_lock: Lock to serialize the MCU firmware related actions * that affect all contexts such as allocation of * regions from shared interface area, assignment of @@ -1531,7 +1563,7 @@ struct kbase_csf_dump_on_fault { * the @p mcu_core_pwroff_dur_count as an update * to the latter is asynchronous. * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time - * window in unit of microseconds. The firmware does not + * window in unit of microseconds. The firmware does not * use it directly. * @gpu_idle_dur_count: The counterpart of the hysteresis time window in * interface required format, ready to be used @@ -1545,6 +1577,8 @@ struct kbase_csf_dump_on_fault { * @fw_core_dump: Contain members required for handling the firmware * core dump. * @dof: Structure for dump on fault. + * @user_reg: Collective information to support the mapping to + * USER Register page for user processes. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1558,9 +1592,6 @@ struct kbase_csf_device { struct file *db_filp; u32 db_file_offsets; struct tagged_addr dummy_db_page; - struct tagged_addr dummy_user_reg_page; - u32 nr_user_page_mapped; - struct inode *mali_file_inode; struct mutex reg_lock; wait_queue_head_t event_wait; bool interrupt_received; @@ -1597,6 +1628,7 @@ struct kbase_csf_device { */ struct kbase_debug_coresight_device coresight; #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + struct kbase_csf_user_reg user_reg; }; /** @@ -1613,6 +1645,10 @@ struct kbase_csf_device { * @bf_data: Data relating to Bus fault. * @gf_data: Data relating to GPU fault. * @current_setup: Stores the MMU configuration for this address space. + * @is_unresponsive: Flag to indicate MMU is not responding. + * Set if a MMU command isn't completed within + * &kbase_device:mmu_as_inactive_wait_time_ms. + * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. */ struct kbase_as { int number; @@ -1624,6 +1660,7 @@ struct kbase_as { struct kbase_fault bf_data; struct kbase_fault gf_data; struct kbase_mmu_setup current_setup; + bool is_unresponsive; }; #endif /* _KBASE_CSF_DEFS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index 4dc9de4..d69a4d4 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -201,8 +201,8 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev) if (!interface) return -EINVAL; - reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); + reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, + interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); if (reg) { mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, reg, @@ -296,19 +296,41 @@ static void boot_csf_firmware(struct kbase_device *kbdev) wait_for_firmware_boot(kbdev); } -static void wait_ready(struct kbase_device *kbdev) +/** + * wait_ready() - Wait for previously issued MMU command to complete. + * + * @kbdev: Kbase device to wait for a MMU command to complete. + * + * Reset GPU if the wait for previously issued command times out. + * + * Return: 0 on success, error code otherwise. + */ +static int wait_ready(struct kbase_device *kbdev) { - u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - u32 val; + const ktime_t wait_loop_start = ktime_get_raw(); + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + s64 diff; - val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); + do { + unsigned int i; - /* Wait for a while for the update command to take effect */ - while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) - val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); + for (i = 0; i < 1000; i++) { + /* Wait for the MMU status to indicate there is no active command */ + if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & + AS_STATUS_AS_ACTIVE)) + return 0; + } + + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while (diff < mmu_as_inactive_wait_time_ms); - if (max_loops == 0) - dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); + dev_err(kbdev->dev, + "AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); + + return -ETIMEDOUT; } static void unload_mmu_tables(struct kbase_device *kbdev) @@ -323,7 +345,7 @@ static void unload_mmu_tables(struct kbase_device *kbdev) mutex_unlock(&kbdev->mmu_hw_mutex); } -static void load_mmu_tables(struct kbase_device *kbdev) +static int load_mmu_tables(struct kbase_device *kbdev) { unsigned long irq_flags; @@ -334,7 +356,7 @@ static void load_mmu_tables(struct kbase_device *kbdev) mutex_unlock(&kbdev->mmu_hw_mutex); /* Wait for a while for the update command to take effect */ - wait_ready(kbdev); + return wait_ready(kbdev); } /** @@ -695,7 +717,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mem_pool_alloc_pages( kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), - num_pages_aligned, phys, false); + num_pages_aligned, phys, false, NULL); ignore_page_migration = false; } } @@ -2240,6 +2262,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_LIST_HEAD(&kbdev->csf.user_reg.list); INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); @@ -2403,7 +2426,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) kbase_pm_wait_for_l2_powered(kbdev); /* Load the MMU tables into the selected address space */ - load_mmu_tables(kbdev); + ret = load_mmu_tables(kbdev); + if (ret != 0) + goto err_out; boot_csf_firmware(kbdev); @@ -2445,9 +2470,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } -#ifdef CONFIG_MALI_FW_CORE_DUMP - kbase_csf_firmware_core_dump_init(kbdev); -#endif + if (kbdev->csf.fw_core_dump.available) + kbase_csf_firmware_core_dump_init(kbdev); /* Firmware loaded successfully, ret = 0 */ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, @@ -3029,7 +3053,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( goto page_list_alloc_error; ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, - phys, false); + phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; @@ -3040,8 +3064,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - num_pages, KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, + KBASE_REG_ZONE_MCU_SHARED); if (!va_reg) goto va_region_alloc_error; diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index 7976d90..37a7f21 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1124,6 +1124,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_LIST_HEAD(&kbdev->csf.user_reg.list); INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker); INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); @@ -1569,7 +1570,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( goto page_list_alloc_error; ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, - phys, false); + phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; @@ -1580,8 +1581,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, - num_pages, KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, + KBASE_REG_ZONE_MCU_SHARED); if (!va_reg) goto va_region_alloc_error; diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c index 42d19e1..7c14b8e 100644 --- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c +++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -100,10 +100,10 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct lockdep_assert_held(&ctx_alloc->lock); - /* There is no need to take vm_lock here as the ctx_alloc region is no_user_free - * refcounted. The region and the backing page can't disappear whilst this - * function is executing. - * Flush type is passed as FLUSH_PT to CLN+INV L2 only. + /* There is no need to take vm_lock here as the ctx_alloc region is protected + * via a nonzero no_user_free_count. The region and the backing page can't + * disappear whilst this function is executing. Flush type is passed as FLUSH_PT + * to CLN+INV L2 only. */ kbase_mmu_flush_pa_range(kctx->kbdev, kctx, heap_context_pa, ctx_alloc->heap_context_size_aligned, @@ -181,14 +181,9 @@ void kbase_csf_heap_context_allocator_term( if (ctx_alloc->region) { kbase_gpu_vm_lock(kctx); - /* - * We can't enforce (nor check) the no_user_free refcount - * to be 0 here as other code regions can take such a reference. - * Anyway, this isn't an issue as the region will eventually - * be freed by the region tracker if its refcount didn't drop - * to 0. - */ - kbase_va_region_no_user_free_put(kctx, ctx_alloc->region); + WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region)); + + kbase_va_region_no_user_free_dec(ctx_alloc->region); kbase_mem_free_region(kctx, ctx_alloc->region); kbase_gpu_vm_unlock(kctx); } diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index 8c1fcdb..0797224 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -365,15 +365,16 @@ static int kbase_kcpu_jit_allocate_prepare( { struct kbase_context *const kctx = kcpu_queue->kctx; void __user *data = u64_to_user_ptr(alloc_info->info); - struct base_jit_alloc_info *info; + struct base_jit_alloc_info *info = NULL; u32 count = alloc_info->count; int ret = 0; u32 i; lockdep_assert_held(&kcpu_queue->lock); - if (!data || count > kcpu_queue->kctx->jit_max_allocations || - count > ARRAY_SIZE(kctx->jit_alloc)) { + if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) || + (count > kcpu_queue->kctx->jit_max_allocations) || (!data) || + !kbase_mem_allow_alloc(kctx)) { ret = -EINVAL; goto out; } @@ -610,6 +611,7 @@ out: return ret; } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static int kbase_csf_queue_group_suspend_prepare( struct kbase_kcpu_command_queue *kcpu_queue, struct base_kcpu_command_group_suspend_info *suspend_buf, @@ -681,8 +683,7 @@ static int kbase_csf_queue_group_suspend_prepare( (kbase_reg_current_backed_size(reg) < nr_pages) || !(reg->flags & KBASE_REG_CPU_WR) || (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || - (kbase_is_region_shrinkable(reg)) || - (kbase_va_region_is_no_user_free(kctx, reg))) { + (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) { ret = -EINVAL; goto out_clean_pages; } @@ -726,6 +727,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, { return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); } +#endif static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { @@ -1037,9 +1039,12 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, queue->kctx, cqs_wait_operation->objs[i].addr, &mapping); u64 val = 0; - /* GPUCORE-28172 RDT to review */ - if (!queue->command_started) + if (!queue->command_started) { queue->command_started = true; + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( + kbdev, queue); + } + if (!evt) { dev_warn(kbdev->dev, @@ -1089,7 +1094,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, queue->has_error = true; } - /* GPUCORE-28172 RDT to review */ + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( + kbdev, queue, *(u32 *)evt); queue->command_started = false; } @@ -1232,8 +1238,6 @@ static void kbase_kcpu_cqs_set_operation_process( evt = (uintptr_t)kbase_phy_alloc_mapping_get( queue->kctx, cqs_set_operation->objs[i].addr, &mapping); - /* GPUCORE-28172 RDT to review */ - if (!evt) { dev_warn(kbdev->dev, "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); @@ -1258,7 +1262,8 @@ static void kbase_kcpu_cqs_set_operation_process( break; } - /* GPUCORE-28172 RDT to review */ + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( + kbdev, queue, *(u32 *)evt ? 1 : 0); /* Always propagate errors */ *(u32 *)evt = queue->has_error; @@ -1622,11 +1627,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q /* Set reference to KCPU metadata and increment refcount */ kcpu_fence->metadata = kcpu_queue->metadata; -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount)); -#else - WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); -#endif + WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); /* create a sync_file fd representing the fence */ *sync_file = sync_file_create(fence_out); @@ -2056,7 +2057,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, break; } - case BASE_KCPU_COMMAND_TYPE_JIT_FREE: + case BASE_KCPU_COMMAND_TYPE_JIT_FREE: { KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue); status = kbase_kcpu_jit_free_process(queue, cmd); @@ -2066,6 +2067,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( kbdev, queue); break; + } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { struct kbase_suspend_copy_buffer *sus_buf = cmd->info.suspend_buf_copy.sus_buf; @@ -2082,24 +2085,25 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( kbdev, queue, status); + } - if (!sus_buf->cpu_alloc) { - int i; + if (!sus_buf->cpu_alloc) { + int i; - for (i = 0; i < sus_buf->nr_pages; i++) - put_page(sus_buf->pages[i]); - } else { - kbase_mem_phy_alloc_kernel_unmapped( - sus_buf->cpu_alloc); - kbase_mem_phy_alloc_put( - sus_buf->cpu_alloc); - } + for (i = 0; i < sus_buf->nr_pages; i++) + put_page(sus_buf->pages[i]); + } else { + kbase_mem_phy_alloc_kernel_unmapped( + sus_buf->cpu_alloc); + kbase_mem_phy_alloc_put( + sus_buf->cpu_alloc); } kfree(sus_buf->pages); kfree(sus_buf); break; } +#endif default: dev_dbg(kbdev->dev, "Unrecognized command type"); @@ -2174,12 +2178,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( } case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: { - /* GPUCORE-28172 RDT to review */ + const struct base_cqs_wait_operation_info *waits = + cmd->info.cqs_wait_operation.objs; + u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags; + unsigned int i; + + for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( + kbdev, queue, waits[i].addr, waits[i].val, + waits[i].operation, waits[i].data_type, + (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0); + } break; } case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: { - /* GPUCORE-28172 RDT to review */ + const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs; + unsigned int i; + + for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( + kbdev, queue, sets[i].addr, sets[i].val, + sets[i].operation, sets[i].data_type); + } break; } case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: @@ -2226,11 +2247,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); break; } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( kbdev, queue, cmd->info.suspend_buf_copy.sus_buf, cmd->info.suspend_buf_copy.group_handle); break; +#endif default: dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type); break; @@ -2387,11 +2410,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ret = kbase_kcpu_jit_free_prepare(queue, &command.info.jit_free, kcpu_cmd); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: ret = kbase_csf_queue_group_suspend_prepare(queue, &command.info.suspend_buf_copy, kcpu_cmd); break; +#endif default: dev_dbg(queue->kctx->kbdev->dev, "Unknown command type %u", command.type); @@ -2467,6 +2492,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, { struct kbase_kcpu_command_queue *queue; int idx; + int n; int ret = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; @@ -2519,6 +2545,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); if (!metadata) { + destroy_workqueue(queue->wq); kfree(queue); ret = -ENOMEM; goto out; @@ -2526,14 +2553,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, metadata->kbdev = kctx->kbdev; metadata->kctx_id = kctx->id; - snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id, - kctx->tgid, kctx->id, queue->fence_context); + n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", + kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); + if (WARN_ON(n >= MAX_TIMELINE_NAME)) { + destroy_workqueue(queue->wq); + kfree(queue); + kfree(metadata); + ret = -EINVAL; + goto out; + } -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - atomic_set(&metadata->refcount, 1); -#else - refcount_set(&metadata->refcount, 1); -#endif + kbase_refcount_set(&metadata->refcount, 1); queue->metadata = metadata; atomic_inc(&kctx->kbdev->live_fence_metadata); #endif /* CONFIG_SYNC_FILE */ diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index b8099fd..6d5145e 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -186,6 +186,7 @@ struct kbase_suspend_copy_buffer { struct kbase_mem_phy_alloc *cpu_alloc; }; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST /** * struct kbase_kcpu_command_group_suspend_info - structure which contains * suspend buffer data captured for a suspended queue group. @@ -198,6 +199,7 @@ struct kbase_kcpu_command_group_suspend_info { struct kbase_suspend_copy_buffer *sus_buf; u8 group_handle; }; +#endif /** @@ -232,7 +234,9 @@ struct kbase_kcpu_command { struct kbase_kcpu_command_import_info import; struct kbase_kcpu_command_jit_alloc_info jit_alloc; struct kbase_kcpu_command_jit_free_info jit_free; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST struct kbase_kcpu_command_group_suspend_info suspend_buf_copy; +#endif } info; }; diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c index 77e19db..4056a9d 100644 --- a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c +++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c @@ -613,7 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, int err, i; INIT_LIST_HEAD(&csg_reg->link); - reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, + reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, KBASE_REG_ZONE_MCU_SHARED); if (!reg) { @@ -668,16 +668,17 @@ fail_userio_pages_map_fail: while (i-- > 0) { vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true); + KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, + MCU_AS_NR, true); } vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, MCU_AS_NR, true); + nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); fail_pmod_map_fail: vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, MCU_AS_NR, true); + nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); fail_susp_map_fail: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); @@ -701,15 +702,16 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, for (i = 0; i < nr_csis; i++) { vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true); + KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, + MCU_AS_NR, true); } vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, MCU_AS_NR, true); + nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, MCU_AS_NR, true); + nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); @@ -738,7 +740,7 @@ int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev) return -ENOMEM; if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, - &shared_regs->dummy_phys[0], false) <= 0) + &shared_regs->dummy_phys[0], false, NULL) <= 0) return -ENOMEM; shared_regs->dummy_phys_allocated = true; diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h index 82389e5..b5bf7bb 100644 --- a/mali_kbase/csf/mali_kbase_csf_registers.h +++ b/mali_kbase/csf/mali_kbase_csf_registers.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,10 +31,6 @@ * Begin register sets */ -/* DOORBELLS base address */ -#define DOORBELLS_BASE 0x0080000 -#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r)) - /* CS_KERNEL_INPUT_BLOCK base address */ #define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 #define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) @@ -71,10 +67,6 @@ #define GLB_OUTPUT_BLOCK_BASE 0x0000 #define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) -/* USER base address */ -#define USER_BASE 0x0010000 -#define USER_REG(r) (USER_BASE + (r)) - /* End register sets */ /* @@ -267,9 +259,6 @@ #define GLB_DEBUG_ARG_OUT0 0x0FE0 #endif /* CONFIG_MALI_CORESIGHT */ -/* USER register offsets */ -#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ - /* End register offsets */ /* CS_KERNEL_INPUT_BLOCK register set definitions */ @@ -728,6 +717,27 @@ #define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A #define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B #define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 +#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0 +#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1 +#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2 +#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3 +#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4 +#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8 +#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9 +#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA +#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB +#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9 +#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA +#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB +#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0 +#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4 +#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5 +#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6 +#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7 +#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8 +#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9 +#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA +#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB /* End of CS_FAULT_EXCEPTION_TYPE values */ #define CS_FAULT_EXCEPTION_DATA_SHIFT 8 #define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index 755df75..bbae94a 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1554,11 +1554,13 @@ static void program_cs(struct kbase_device *kbdev, WARN_ON(csi_index >= ginfo->stream_num)) return; - assign_user_doorbell_to_queue(kbdev, queue); - if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) - return; + if (queue->enabled) { + assign_user_doorbell_to_queue(kbdev, queue); + if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) + return; - WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); + WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); + } if (queue->enabled && queue_group_suspended_locked(group)) program_cs_extract_init(queue); @@ -1860,6 +1862,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) unsigned long flags; struct kbase_csf_cmd_stream_group_info *ginfo = &global_iface->groups[slot]; + u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : CSG_REQ_STATE_TERMINATE; @@ -1877,8 +1880,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) csg_slot[slot].trigger_jiffies = jiffies; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); - KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); + KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend); } } @@ -3433,6 +3436,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) /* The on slot csg is now stopped */ clear_bit(i, slot_mask); + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); + if (likely(group)) { bool as_fault; /* Only do save/cleanup if the @@ -5071,6 +5077,9 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo /* The on slot csg is now stopped */ clear_bit(i, slot_mask_local); + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); + group = scheduler->csg_slots[i].resident_group; if (likely(group)) { /* Only do save/cleanup if the @@ -5129,8 +5138,13 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) if (all_addr_spaces_used) { for (i = 0; i != total_csg_slots; ++i) { - if (scheduler->csg_slots[i].resident_group != NULL) + if (scheduler->csg_slots[i].resident_group != NULL) { + if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr < + 0)) + continue; + as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++; + } } } @@ -5151,6 +5165,9 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && ((lru_idle_group == NULL) || (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { + if (WARN_ON(group->kctx->as_nr < 0)) + continue; + /* If all address spaces are used, we need to ensure the group does not * share the AS with other active CSGs. Or CSG would be freed without AS * and this optimization would not work. diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 14d8097..8072a8b 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -228,11 +228,11 @@ static void remove_unlinked_chunk(struct kbase_context *kctx, kbase_vunmap(kctx, &chunk->map); /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. - * For "no user free", we check that the refcount is 1 as it is a shrinkable region; + * For "no user free count", we check that the count is 1 as it is a shrinkable region; * no other code part within kbase can take a reference to it. */ - WARN_ON(chunk->region->no_user_free_refcnt > 1); - kbase_va_region_no_user_free_put(kctx, chunk->region); + WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1); + kbase_va_region_no_user_free_dec(chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif @@ -315,8 +315,8 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * * It should be fine and not a security risk if we let the region leak till * region tracker termination in such a case. */ - if (unlikely(chunk->region->no_user_free_refcnt > 1)) { - dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n"); + if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) { + dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n"); goto unroll_region; } @@ -371,7 +371,7 @@ unroll_region: /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT * regions), and so we must clear that flag too before freeing. */ - kbase_va_region_no_user_free_put(kctx, chunk->region); + kbase_va_region_no_user_free_dec(chunk->region); #if !defined(CONFIG_MALI_VECTOR_DUMP) chunk->region->flags &= ~KBASE_REG_DONT_NEED; #endif @@ -531,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) if (heap->buf_desc_reg) { kbase_vunmap(kctx, &heap->buf_desc_map); kbase_gpu_vm_lock(kctx); - kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); + kbase_va_region_no_user_free_dec(heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } @@ -741,7 +741,8 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ */ heap->buf_desc_va = buf_desc_va; - heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg); + heap->buf_desc_reg = buf_desc_reg; + kbase_va_region_no_user_free_inc(buf_desc_reg); vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, KBASE_REG_CPU_RD, &heap->buf_desc_map, @@ -834,7 +835,7 @@ heap_context_alloc_failed: buf_desc_vmap_failed: if (heap->buf_desc_reg) { kbase_gpu_vm_lock(kctx); - kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg); + kbase_va_region_no_user_free_dec(heap->buf_desc_reg); kbase_gpu_vm_unlock(kctx); } buf_desc_not_suitable: @@ -967,7 +968,12 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); if (unlikely(err)) { - dev_err(kctx->kbdev->dev, + /* The allocation request can be legitimate, but be invoked on a heap + * that has already reached the maximum pre-configured capacity. This + * is useful debug information, but should not be treated as an error, + * since the request will be re-sent at a later point. + */ + dev_dbg(kctx->kbdev->dev, "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)", gpu_heap_va, err); mutex_unlock(&kctx->csf.tiler_heaps.lock); diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c index 162b40f..910ba22 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,9 +31,7 @@ #include "mali_kbase_pm.h" #include "mali_kbase_hwaccess_time.h" -#include <linux/gcd.h> #include <linux/math64.h> -#include <asm/arch_timer.h> #if IS_ENABLED(CONFIG_DEBUG_FS) #include "tl/mali_kbase_timeline_priv.h" @@ -98,81 +96,6 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) #endif /** - * get_cpu_gpu_time() - Get current CPU and GPU timestamps. - * - * @kbdev: Kbase device. - * @cpu_ts: Output CPU timestamp. - * @gpu_ts: Output GPU timestamp. - * @gpu_cycle: Output GPU cycle counts. - */ -static void get_cpu_gpu_time( - struct kbase_device *kbdev, - u64 *cpu_ts, - u64 *gpu_ts, - u64 *gpu_cycle) -{ - struct timespec64 ts; - - kbase_pm_context_active(kbdev); - kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); - kbase_pm_context_idle(kbdev); - - if (cpu_ts) - *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; -} - - -/** - * kbase_ts_converter_init() - Initialize system timestamp converter. - * - * @self: System Timestamp Converter instance. - * @kbdev: Kbase device pointer - * - * Return: Zero on success, -1 otherwise. - */ -static int kbase_ts_converter_init( - struct kbase_ts_converter *self, - struct kbase_device *kbdev) -{ - u64 cpu_ts = 0; - u64 gpu_ts = 0; - u64 freq; - u64 common_factor; - - get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); - freq = arch_timer_get_cntfrq(); - - if (!freq) { - dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); - return -1; - } - - common_factor = gcd(NSEC_PER_SEC, freq); - - self->multiplier = div64_u64(NSEC_PER_SEC, common_factor); - self->divisor = div64_u64(freq, common_factor); - self->offset = - cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor); - - return 0; -} - -/** - * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp. - * - * @self: System Timestamp Converter instance. - * @gpu_ts: System timestamp value to converter. - * - * Return: The CPU timestamp. - */ -static u64 __maybe_unused -kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts) -{ - return div64_u64(gpu_ts * self->multiplier, self->divisor) + - self->offset; -} - -/** * tl_reader_overflow_notify() - Emit stream overflow tracepoint. * * @self: CSFFW TL Reader instance. @@ -322,8 +245,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) { struct kbase_csffw_tl_message *msg = (struct kbase_csffw_tl_message *) csffw_data_it; - msg->timestamp = kbase_ts_converter_convert(&self->ts_converter, - msg->timestamp); + msg->timestamp = + kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp); } /* Copy the message out to the tl_stream. */ @@ -397,9 +320,6 @@ static int tl_reader_init_late( return -1; } - if (kbase_ts_converter_init(&self->ts_converter, kbdev)) - return -1; - self->kbdev = kbdev; self->trace_buffer = tb; self->tl_header.data = hdr; diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h index d554d56..12b285f 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,37 +40,6 @@ struct kbase_tlstream; struct kbase_device; /** - * struct kbase_ts_converter - System timestamp to CPU timestamp converter state. - * - * @multiplier: Numerator of the converter's fraction. - * @divisor: Denominator of the converter's fraction. - * @offset: Converter's offset term. - * - * According to Generic timer spec, system timer: - * - Increments at a fixed frequency - * - Starts operating from zero - * - * Hence CPU time is a linear function of System Time. - * - * CPU_ts = alpha * SYS_ts + beta - * - * Where - * - alpha = 10^9/SYS_ts_freq - * - beta is calculated by two timer samples taken at the same time: - * beta = CPU_ts_s - SYS_ts_s * alpha - * - * Since alpha is a rational number, we minimizing possible - * rounding error by simplifying the ratio. Thus alpha is stored - * as a simple `multiplier / divisor` ratio. - * - */ -struct kbase_ts_converter { - u64 multiplier; - u64 divisor; - s64 offset; -}; - -/** * struct kbase_csf_tl_reader - CSFFW timeline reader state. * * @read_timer: Timer used for periodical tracebufer reading. @@ -106,7 +75,6 @@ struct kbase_csf_tl_reader { size_t size; size_t btc; } tl_header; - struct kbase_ts_converter ts_converter; bool got_first_event; bool is_active; diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index 217a056..492684f 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -123,6 +123,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_update_l2_features; + err = kbase_backend_time_init(kbdev); + if (err) + goto fail_update_l2_features; + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); kbase_pm_context_idle(kbdev); @@ -285,8 +289,10 @@ static const struct kbase_device_init dev_init[] = { "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, - { registers_map, registers_unmap, "Register map failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) + { registers_map, registers_unmap, "Register map failed" }, +#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, @@ -359,7 +365,6 @@ static void kbase_device_term_partial(struct kbase_device *kbdev, void kbase_device_term(struct kbase_device *kbdev) { - kbdev->csf.mali_file_inode = NULL; kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); kbase_mem_halt(kbdev); } diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index c104fa4..b46180f 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -100,6 +100,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_update_l2_features; + err = kbase_backend_time_init(kbdev); + if (err) + goto fail_update_l2_features; + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); /* Idle the GPU and/or cores, if the policy wants it to */ @@ -211,17 +215,19 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd static const struct kbase_device_init dev_init[] = { #if !IS_ENABLED(CONFIG_MALI_REAL_HW) - { kbase_gpu_device_create, kbase_gpu_device_destroy, - "Dummy model initialization failed" }, + { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, - { registers_map, registers_unmap, "Register map failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) + { registers_map, registers_unmap, "Register map failed" }, +#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, + { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, @@ -237,7 +243,6 @@ static const struct kbase_device_init dev_init[] = { "Timeline stream initialization failed" }, { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, "Clock rate trace manager initialization failed" }, - { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, { kbase_instr_backend_init, kbase_instr_backend_term, "Instrumentation backend initialization failed" }, { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index 4f5ac22..15839ae 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,6 +35,7 @@ #include <mali_kbase.h> #include <mali_kbase_defs.h> #include <mali_kbase_hwaccess_instr.h> +#include <mali_kbase_hwaccess_time.h> #include <mali_kbase_hw.h> #include <mali_kbase_config_defaults.h> #include <linux/priority_control_manager.h> @@ -308,7 +309,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) #endif /* MALI_USE_CSF */ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - + kbdev->mmu_as_inactive_wait_time_ms = + kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT); mutex_init(&kbdev->kctx_list_lock); INIT_LIST_HEAD(&kbdev->kctx_list); diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c index 15bfd03..60ba9be 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -105,6 +105,70 @@ const char *kbase_gpu_exception_name(u32 const exception_code) case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT: e = "GPU_CACHEABILITY_FAULT"; break; + /* MMU Fault */ + case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0: + e = "TRANSLATION_FAULT at level 0"; + break; + case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1: + e = "TRANSLATION_FAULT at level 1"; + break; + case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2: + e = "TRANSLATION_FAULT at level 2"; + break; + case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3: + e = "TRANSLATION_FAULT at level 3"; + break; + case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4: + e = "TRANSLATION_FAULT"; + break; + case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0: + e = "PERMISSION_FAULT at level 0"; + break; + case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1: + e = "PERMISSION_FAULT at level 1"; + break; + case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2: + e = "PERMISSION_FAULT at level 2"; + break; + case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3: + e = "PERMISSION_FAULT at level 3"; + break; + case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1: + e = "ACCESS_FLAG at level 1"; + break; + case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2: + e = "ACCESS_FLAG at level 2"; + break; + case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3: + e = "ACCESS_FLAG at level 3"; + break; + case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN: + e = "ADDRESS_SIZE_FAULT_IN"; + break; + case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0: + e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0"; + break; + case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1: + e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1"; + break; + case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2: + e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2"; + break; + case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3: + e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3"; + break; + case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0: + e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0"; + break; + case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1: + e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1"; + break; + case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2: + e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2"; + break; + case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3: + e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3"; + break; /* Any other exception code is unknown */ default: e = "UNKNOWN"; diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h index 380ec30..f86f493 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -108,7 +108,6 @@ #define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ #define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ -#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ #define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ #define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ #define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ @@ -125,8 +124,6 @@ #define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ #define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ -#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) - #define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ #define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h index 8b50a5d..6a937a5 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_fault.h +++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,9 +27,9 @@ * * @exception_code: exception code * - * This function is called from the interrupt handler when a GPU fault occurs. + * This function is called by error handlers when GPU reports an error. * - * Return: name associated with the exception code + * Return: Error string associated with the exception code */ const char *kbase_gpu_exception_name(u32 exception_code); diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h index 907a872..e51791f 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h +++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h @@ -51,9 +51,7 @@ #define MMU_FEATURES 0x014 /* (RO) MMU features */ #define AS_PRESENT 0x018 /* (RO) Address space slots present */ #define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ #define GPU_IRQ_MASK 0x028 /* (RW) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ #define GPU_COMMAND 0x030 /* (WO) */ #define GPU_STATUS 0x034 /* (RO) */ @@ -176,14 +174,9 @@ /* Job control registers */ #define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ /* MMU control registers */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - #define MMU_AS1 0x440 /* Configuration registers for address space 1 */ #define MMU_AS2 0x480 /* Configuration registers for address space 2 */ #define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index 424a360..27acfc6 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -289,6 +289,8 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; + memset(cycle_counts, 0, sizeof(cycle_counts)); + /* Read cycle count from CSF interface for both clock domains. */ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); @@ -308,6 +310,8 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; + memset(cycle_counts, 0, sizeof(cycle_counts)); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, @@ -558,7 +562,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe u32 insert_index_to_stop) { u32 raw_idx; - unsigned long flags; + unsigned long flags = 0UL; u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; @@ -639,7 +643,7 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) { struct kbase_hwcnt_backend_csf_info *csf_info = info; struct kbase_hwcnt_backend_csf *backend_csf; - unsigned long flags; + unsigned long flags = 0UL; csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); @@ -658,8 +662,8 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) /* 3. dump state indicates no other dumping is in progress. */ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) || (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { - u32 extract_index; - u32 insert_index; + u32 extract_index = 0U; + u32 insert_index = 0U; /* Read the raw extract and insert indexes from the CSF interface. */ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index); @@ -700,11 +704,11 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) */ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) { - unsigned long flags; + unsigned long flags = 0ULL; struct kbase_hwcnt_backend_csf *backend_csf; u32 insert_index_to_acc; - u32 extract_index; - u32 insert_index; + u32 extract_index = 0U; + u32 insert_index = 0U; WARN_ON(!work); backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); @@ -776,10 +780,10 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) */ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) { - unsigned long flags; + unsigned long flags = 0ULL; struct kbase_hwcnt_backend_csf *backend_csf; - u32 extract_index; - u32 insert_index; + u32 extract_index = 0U; + u32 insert_index = 0U; WARN_ON(!work); @@ -920,7 +924,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *back const struct kbase_hwcnt_enable_map *enable_map) { int errcode; - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend_csf) @@ -954,7 +958,7 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_disable = false; @@ -1050,7 +1054,7 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, u64 *dump_time_ns) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_request = false; bool watchdog_dumping = false; @@ -1157,7 +1161,7 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac /* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int errcode; @@ -1365,7 +1369,7 @@ alloc_error: static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, struct kbase_hwcnt_backend **out_backend) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = NULL; struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info; int errcode; @@ -1407,7 +1411,7 @@ static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info * /* CSF backend implementation of kbase_hwcnt_backend_term_fn */ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend) @@ -1619,7 +1623,7 @@ void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface * void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; @@ -1639,7 +1643,7 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface) { - unsigned long flags; + unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index 124fd4c..e4a963d 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -329,7 +329,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Get physical page for the buffer */ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, - phys, false); + phys, false, NULL); if (ret != num_pages) goto phys_mem_pool_alloc_error; @@ -482,7 +482,8 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, - fw_ring_buf->num_pages, MCU_AS_NR, true)); + fw_ring_buf->num_pages, fw_ring_buf->num_pages, + MCU_AS_NR, true)); vunmap(fw_ring_buf->cpu_dump_base); diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c index e724572..34deb5d 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt.c +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -362,7 +362,7 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 * bool cur_map_any_enabled; struct kbase_hwcnt_enable_map *cur_map; bool new_map_any_enabled = false; - u64 dump_time_ns; + u64 dump_time_ns = 0; struct kbase_hwcnt_accumulator *accum; WARN_ON(!hctx); diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c index cd5a9bf..5a204ae 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -455,16 +455,14 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = { }, }; - -#define IPA_POWER_MODEL_OPS(gpu, init_token) \ - const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ - .name = "mali-" #gpu "-power-model", \ - .init = kbase_ ## init_token ## _power_model_init, \ - .term = kbase_ipa_vinstr_common_model_term, \ - .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ - .reset_counter_data = kbase_ipa_vinstr_reset_data, \ - }; \ - KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_##init_token##_power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + .reset_counter_data = kbase_ipa_vinstr_reset_data, \ + } #define STANDARD_POWER_MODEL(gpu, reference_voltage) \ static int kbase_ ## gpu ## _power_model_init(\ diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h index fe8995a..debc3ad 100644 --- a/mali_kbase/jm/mali_kbase_jm_defs.h +++ b/mali_kbase/jm/mali_kbase_jm_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -127,10 +127,17 @@ /** * enum kbase_timeout_selector - The choice of which timeout to get scaled * using the lowest GPU frequency. + * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion + * of a MMU operation + * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT + * to be updated on HW side so a Job Slot is + * considered free. * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in * the enum. */ enum kbase_timeout_selector { + MMU_AS_INACTIVE_WAIT_TIMEOUT, + JM_DEFAULT_JS_FREE_TIMEOUT, /* Must be the last in the enum */ KBASE_TIMEOUT_SELECTOR_COUNT @@ -852,6 +859,10 @@ struct jsctx_queue { * @pf_data: Data relating to Page fault. * @bf_data: Data relating to Bus fault. * @current_setup: Stores the MMU configuration for this address space. + * @is_unresponsive: Flag to indicate MMU is not responding. + * Set if a MMU command isn't completed within + * &kbase_device:mmu_as_inactive_wait_time_ms. + * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. */ struct kbase_as { int number; @@ -861,6 +872,7 @@ struct kbase_as { struct kbase_fault pf_data; struct kbase_fault bf_data; struct kbase_mmu_setup current_setup; + bool is_unresponsive; }; #endif /* _KBASE_JM_DEFS_H_ */ diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h index 924a685..2b93d3d 100644 --- a/mali_kbase/jm/mali_kbase_js_defs.h +++ b/mali_kbase/jm/mali_kbase_js_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -277,6 +277,7 @@ typedef u32 kbase_atom_ordering_flag_t; * @nr_contexts_runnable:Number of contexts that can either be pulled from or * arecurrently running * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT + * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free. * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts * independently of the Run Pool. * Of course, you don't need the Run Pool lock to access this. @@ -329,6 +330,8 @@ struct kbasep_js_device_data { u32 nr_contexts_pullable; atomic_t nr_contexts_runnable; atomic_t soft_job_timeout_ms; + u32 js_free_wait_time_ms; + struct mutex queue_mutex; /* * Run Pool mutex, for managing contexts within the runpool. diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index c6fea79..11aedef 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -40,6 +40,7 @@ enum base_hw_feature { BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_CORE_FEATURES, BASE_HW_FEATURE_PBHA_HWU, + BASE_HW_FEATURE_LARGE_PAGE_ALLOC, BASE_HW_FEATURE_END }; diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 2dc0402..0fbdec0 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -796,6 +796,19 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2 BASE_HW_ISSUE_END }; +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_TITANHW_2710, + BASE_HW_ISSUE_TITANHW_2679, + BASE_HW_ISSUE_GPU2022PRO_148, + BASE_HW_ISSUE_END +}; + __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index 5035ed5..96529a3 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -221,6 +221,16 @@ enum { */ #define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ +/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT + * is updated on HW side so a Job Slot is considered free. + * This timeout will only take effect on GPUs with low value for the minimum + * GPU clock frequency (<= 100MHz). + * + * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher + * value for minimum GPU clock frequency. + */ +#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000) + #endif /* MALI_USE_CSF */ /* Default timeslice that a context is scheduled in for, in nanoseconds. @@ -257,5 +267,12 @@ enum { */ #define DEFAULT_IR_THRESHOLD (192) +/* Waiting time in clock cycles for the completion of a MMU operation. + * + * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush. + * + * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen. + * It corresponds to 0.5s in GPU @ 100Mhz. + */ +#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024) #endif /* _KBASE_CONFIG_DEFAULTS_H_ */ - diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index a8f8791..4179091 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1565,7 +1565,6 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, cpu_queue_info->size); } -#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) static int kbase_ioctl_read_user_page(struct kbase_context *kctx, union kbase_ioctl_read_user_page *user_page) { @@ -2051,6 +2050,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_cs_cpu_queue_info, kctx); break; + /* This IOCTL will be kept for backward compatibility */ case KBASE_IOCTL_READ_USER_PAGE: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page, union kbase_ioctl_read_user_page, kctx); @@ -2217,7 +2217,10 @@ KBASE_EXPORT_TEST_API(kbase_event_wakeup); #if MALI_USE_CSF int kbase_event_pending(struct kbase_context *ctx) { - WARN_ON_ONCE(!ctx); + KBASE_DEBUG_ASSERT(ctx); + + if (unlikely(!ctx)) + return -EPERM; return (atomic_read(&ctx->event_count) != 0) || kbase_csf_event_error_pending(ctx) || @@ -2228,6 +2231,9 @@ int kbase_event_pending(struct kbase_context *ctx) { KBASE_DEBUG_ASSERT(ctx); + if (unlikely(!ctx)) + return -EPERM; + return (atomic_read(&ctx->event_count) != 0) || (atomic_read(&ctx->event_closed) != 0); } @@ -4276,7 +4282,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev) kfree(kbdev->protected_dev); } -#if !IS_ENABLED(CONFIG_MALI_REAL_HW) +#if IS_ENABLED(CONFIG_MALI_NO_MALI) static int kbase_common_reg_map(struct kbase_device *kbdev) { return 0; @@ -4284,7 +4290,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { } -#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ +#else /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ static int kbase_common_reg_map(struct kbase_device *kbdev) { int err = 0; @@ -4320,7 +4326,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) kbdev->reg_size = 0; } } -#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ +#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ int registers_map(struct kbase_device * const kbdev) { diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index beb2928..dc6feb9 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -242,6 +242,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { struct kbase_context *kctx; + kbdev->as[i].is_unresponsive = false; #if MALI_USE_CSF if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c index 598d8f5..418bb19 100644 --- a/mali_kbase/mali_kbase_debug_mem_allocs.c +++ b/mali_kbase/mali_kbase_debug_mem_allocs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,7 @@ #include "mali_kbase_debug_mem_allocs.h" #include "mali_kbase.h" -#include <string.h> +#include <linux/string.h> #include <linux/list.h> #include <linux/file.h> diff --git a/mali_kbase/mali_kbase_debugfs_helper.c b/mali_kbase/mali_kbase_debugfs_helper.c index 4c1aa28..c846491 100644 --- a/mali_kbase/mali_kbase_debugfs_helper.c +++ b/mali_kbase/mali_kbase_debugfs_helper.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -90,11 +90,10 @@ set_attr_from_string(char *const buf, void *const array, size_t const nelems, int kbase_debugfs_string_validator(char *const buf) { - size_t index; int err = 0; char *ptr = buf; - for (index = 0; *ptr; ++index) { + while (*ptr) { unsigned long test_number; size_t len; diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index e98ab45..6236f70 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -238,12 +238,25 @@ struct kbase_fault { bool protected_mode; }; +/** Maximum number of memory pages that should be allocated for the array + * of pointers to free PGDs. + * + * This number has been pre-calculated to deal with the maximum allocation + * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE. + * This is supposed to be enough for almost the entirety of MMU operations. + * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down + * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE + * bytes. + * + * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes. + */ +#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) + +/* Maximum number of pointers to free PGDs */ +#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) + /** * struct kbase_mmu_table - object representing a set of GPU page tables - * @mmu_teardown_pages: Array containing pointers to 3 separate pages, used - * to cache the entries of top (L0) & intermediate level - * page tables (L1 & L2) to avoid repeated calls to - * kmap_atomic() during the MMU teardown. * @mmu_lock: Lock to serialize the accesses made to multi level GPU * page tables * @pgd: Physical address of the page allocated for the top @@ -255,14 +268,40 @@ struct kbase_fault { * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @kctx: If this set of MMU tables belongs to a context then * this is a back-reference to the context, otherwise - * it is NULL + * it is NULL. + * @scratch_mem: Scratch memory used for MMU operations, which are + * serialized by the @mmu_lock. */ struct kbase_mmu_table { - u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL]; struct mutex mmu_lock; phys_addr_t pgd; u8 group_id; struct kbase_context *kctx; + union { + /** + * @teardown_pages: Scratch memory used for backup copies of whole + * PGD pages when tearing down levels upon + * termination of the MMU table. + */ + struct { + /** + * @levels: Array of PGD pages, large enough to copy one PGD + * for each level of the MMU table. + */ + u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; + } teardown_pages; + /** + * @free_pgds: Scratch memory user for insertion, update and teardown + * operations to store a temporary list of PGDs to be freed + * at the end of the operation. + */ + struct { + /** @pgds: Array of pointers to PGDs to free. */ + struct page *pgds[MAX_FREE_PGDS]; + /** @head_index: Index of first free element in the PGDs array. */ + size_t head_index; + } free_pgds; + } scratch_mem; }; /** @@ -286,6 +325,8 @@ struct kbase_reg_zone { #include "jm/mali_kbase_jm_defs.h" #endif +#include "mali_kbase_hwaccess_time.h" + static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) { @@ -754,6 +795,8 @@ struct kbase_mem_migrate { * GPU adrress spaces assigned to them. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. + * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are + * supported and used where possible. * @gpu_props: Object containing complete information about the * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues @@ -799,6 +842,7 @@ struct kbase_mem_migrate { * GPU reset. * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used * to calculate suitable timeouts for wait operations. + * @backend_time: Kbase backend time related attributes. * @cache_clean_in_progress: Set when a cache clean has been started, and * cleared when it has finished. This prevents multiple * cache cleans being done simultaneously. @@ -993,6 +1037,9 @@ struct kbase_mem_migrate { * KCPU queue. These structures may outlive kbase module * itself. Therefore, in such a case, a warning should be * be produced. + * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of + * a MMU operation + * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. */ struct kbase_device { u32 hw_quirks_sc; @@ -1049,6 +1096,8 @@ struct kbase_device { spinlock_t mmu_mask_change; + bool pagesize_2mb; + struct kbase_gpu_props gpu_props; unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; @@ -1102,6 +1151,10 @@ struct kbase_device { u64 lowest_gpu_freq_khz; +#if MALI_USE_CSF + struct kbase_backend_time backend_time; +#endif + bool cache_clean_in_progress; u32 cache_clean_queued; wait_queue_head_t cache_clean_wait; @@ -1283,6 +1336,8 @@ struct kbase_device { #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) atomic_t live_fence_metadata; #endif + u32 mmu_as_inactive_wait_time_ms; + struct kmem_cache *va_region_slab; }; /** @@ -1636,11 +1691,13 @@ struct kbase_sub_alloc { * is scheduled in and an atom is pulled from the context's per * slot runnable tree in JM GPU or GPU command queue * group is programmed on CSG slot in CSF GPU. - * @mm_update_lock: lock used for handling of special tracking page. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, - * to the memory consumed by the process. + * to the memory consumed by the process. A reference is taken + * on this descriptor for the Userspace created contexts so that + * Kbase can safely access it to update the memory usage counters. + * The reference is dropped on context termination. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all * tiler heaps of the kbase context. @@ -1762,6 +1819,10 @@ struct kbase_sub_alloc { * @limited_core_mask: The mask that is applied to the affinity in case of atoms * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * @platform_data: Pointer to platform specific per-context data. + * @task: Pointer to the task structure of the main thread of the process + * that created the Kbase context. It would be set only for the + * contexts created by the Userspace and not for the contexts + * created internally by the Kbase. * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1849,8 +1910,7 @@ struct kbase_context { atomic_t refcount; - spinlock_t mm_update_lock; - struct mm_struct __rcu *process_mm; + struct mm_struct *process_mm; u64 gpu_va_end; #if MALI_USE_CSF u32 running_total_tiler_heap_nr_chunks; @@ -1913,6 +1973,8 @@ struct kbase_context { #if !MALI_USE_CSF void *platform_data; #endif + + struct task_struct *task; }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -2015,5 +2077,4 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con #define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 /* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ #define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 - #endif /* _KBASE_DEFS_H_ */ diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h index 25986f6..f4507ac 100644 --- a/mali_kbase/mali_kbase_fence.h +++ b/mali_kbase/mali_kbase_fence.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,6 +32,7 @@ #include <linux/list.h> #include "mali_kbase_fence_defs.h" #include "mali_kbase.h" +#include "mali_kbase_refcount_defs.h" #if MALI_USE_CSF /* Maximum number of characters in DMA fence timeline name. */ @@ -49,11 +50,7 @@ * @timeline_name: String of timeline name for associated fence object. */ struct kbase_kcpu_dma_fence_meta { -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - atomic_t refcount; -#else - refcount_t refcount; -#endif + kbase_refcount_t refcount; struct kbase_device *kbdev; int kctx_id; char timeline_name[MAX_TIMELINE_NAME]; @@ -225,11 +222,7 @@ static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_f static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata) { -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) - if (atomic_dec_and_test(&metadata->refcount)) { -#else - if (refcount_dec_and_test(&metadata->refcount)) { -#endif + if (kbase_refcount_dec_and_test(&metadata->refcount)) { atomic_dec(&metadata->kbdev->live_fence_metadata); kfree(metadata); } diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c index 16cccee..0eba889 100644 --- a/mali_kbase/mali_kbase_gwt.c +++ b/mali_kbase/mali_kbase_gwt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,14 +125,17 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx) return 0; } - +#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) +static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b) +#else static int list_cmp_function(void *priv, struct list_head *a, struct list_head *b) +#endif { - struct kbasep_gwt_list_element *elementA = container_of(a, - struct kbasep_gwt_list_element, link); - struct kbasep_gwt_list_element *elementB = container_of(b, - struct kbasep_gwt_list_element, link); + const struct kbasep_gwt_list_element *elementA = + container_of(a, struct kbasep_gwt_list_element, link); + const struct kbasep_gwt_list_element *elementB = + container_of(b, struct kbasep_gwt_list_element, link); CSTD_UNUSED(priv); diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c index c658fb7..b07327a 100644 --- a/mali_kbase/mali_kbase_hw.c +++ b/mali_kbase/mali_kbase_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -232,6 +232,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, + { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, { U32_MAX, NULL } } }, { GPU_ID2_PRODUCT_LTUX, @@ -239,6 +240,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, + { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, { U32_MAX, NULL } } }, { GPU_ID2_PRODUCT_TTIX, @@ -303,21 +305,20 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( */ issues = fallback_issues; - dev_warn(kbdev->dev, - "GPU hardware issue table may need updating:\n" - "r%dp%d status %d is unknown; treating as r%dp%d status %d", - (gpu_id & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT, - (fallback_version & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (fallback_version & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (fallback_version & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT); + dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n", + (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT); + dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n", + (fallback_version & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (fallback_version & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (fallback_version & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT); + dev_notice(kbdev->dev, + "Execution proceeding normally with fallback match\n"); gpu_id &= ~GPU_ID2_VERSION; gpu_id |= fallback_version; @@ -343,7 +344,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = kbase_hw_get_issues_for_new_id(kbdev); if (issues == NULL) { dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); + "HW product - Unknown GPU ID %x", gpu_id); return -EINVAL; } @@ -407,7 +408,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) break; default: dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); + "HW issues - Unknown GPU ID %x", gpu_id); return -EINVAL; } } diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h index 27e2cb7..ac2a26d 100644 --- a/mali_kbase/mali_kbase_hwaccess_time.h +++ b/mali_kbase/mali_kbase_hwaccess_time.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,49 @@ #ifndef _KBASE_BACKEND_TIME_H_ #define _KBASE_BACKEND_TIME_H_ +#if MALI_USE_CSF +/** + * struct kbase_backend_time - System timestamp attributes. + * + * @multiplier: Numerator of the converter's fraction. + * @divisor: Denominator of the converter's fraction. + * @offset: Converter's offset term. + * + * According to Generic timer spec, system timer: + * - Increments at a fixed frequency + * - Starts operating from zero + * + * Hence CPU time is a linear function of System Time. + * + * CPU_ts = alpha * SYS_ts + beta + * + * Where + * - alpha = 10^9/SYS_ts_freq + * - beta is calculated by two timer samples taken at the same time: + * beta = CPU_ts_s - SYS_ts_s * alpha + * + * Since alpha is a rational number, we minimizing possible + * rounding error by simplifying the ratio. Thus alpha is stored + * as a simple `multiplier / divisor` ratio. + * + */ +struct kbase_backend_time { + u64 multiplier; + u64 divisor; + s64 offset; +}; + +/** + * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp. + * + * @kbdev: Kbase device pointer + * @gpu_ts: System timestamp value to converter. + * + * Return: The CPU timestamp. + */ +u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts); +#endif + /** * kbase_backend_get_gpu_time() - Get current GPU time * @kbdev: Device pointer @@ -46,9 +89,6 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, struct timespec64 *ts); - -#endif /* _KBASE_BACKEND_TIME_H_ */ - /** * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled * GPU frequency, using a choice from @@ -70,3 +110,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, * Return: Snapshot of the GPU cycle count register. */ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev); + +/** + * kbase_backend_time_init() - Initialize system timestamp converter. + * + * @kbdev: Kbase device pointer + * + * This function should only be called after GPU is powered-up and + * L2 cached power-up has been initiated. + * + * Return: Zero on success, error code otherwise. + */ +int kbase_backend_time_init(struct kbase_device *kbdev); + +#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index d623aca..6e803bd 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,6 +34,7 @@ #include "mali_kbase_jm.h" #include "mali_kbase_hwaccess_jm.h" +#include <mali_kbase_hwaccess_time.h> #include <linux/priority_control_manager.h> /* @@ -531,6 +532,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); + jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", @@ -555,6 +557,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->ctx_timeslice_ns); dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", atomic_read(&jsdd->soft_job_timeout_ms)); + dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index b18b1e2..1c94e9c 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -44,6 +44,9 @@ #include <mali_kbase_config_defaults.h> #include <mali_kbase_trace_gpu_mem.h> +#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" +#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) + #if MALI_JIT_PRESSURE_LIMIT_BASE /* @@ -92,10 +95,8 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) #error "Unknown CPU VA width for this architecture" #endif -#if IS_ENABLED(CONFIG_64BIT) - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kbase_ctx_compat_mode(kctx)) cpu_va_bits = 32; -#endif return cpu_va_bits; } @@ -130,18 +131,14 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, else { u64 same_va_end; -#if IS_ENABLED(CONFIG_64BIT) - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { -#endif /* CONFIG_64BIT */ + if (kbase_ctx_compat_mode(kctx)) { same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; -#if IS_ENABLED(CONFIG_64BIT) } else { struct kbase_reg_zone *same_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_SAME_VA); same_va_end = kbase_reg_zone_end_pfn(same_va_zone); } -#endif /* CONFIG_64BIT */ if (gpu_pfn >= same_va_end) rbtree = &kctx->reg_rbtree_custom; @@ -383,6 +380,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, struct rb_node *rbnext; struct kbase_va_region *next = NULL; struct rb_root *reg_rbtree = NULL; + struct kbase_va_region *orig_reg = reg; int merged_front = 0; int merged_back = 0; @@ -447,9 +445,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ struct kbase_va_region *free_reg; - free_reg = kbase_alloc_free_region(reg_rbtree, - reg->start_pfn, reg->nr_pages, - reg->flags & KBASE_REG_ZONE_MASK); + free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, + reg->flags & KBASE_REG_ZONE_MASK); if (!free_reg) { /* In case of failure, we cannot allocate a replacement * free region, so we will be left with a 'gap' in the @@ -480,6 +477,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev, rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); } + /* This operation is always safe because the function never frees + * the region. If the region has been merged to both front and back, + * then it's the previous region that is supposed to be freed. + */ + orig_reg->start_pfn = 0; + out: return; } @@ -490,6 +493,7 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region); * kbase_insert_va_region_nolock - Insert a VA region to the list, * replacing the existing one. * + * @kbdev: The kbase device * @new_reg: The new region to insert * @at_reg: The region to replace * @start_pfn: The Page Frame Number to insert at @@ -497,8 +501,10 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region); * * Return: 0 on success, error code otherwise. */ -static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, - struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, + struct kbase_va_region *new_reg, + struct kbase_va_region *at_reg, u64 start_pfn, + size_t nr_pages) { struct rb_root *reg_rbtree = NULL; int err = 0; @@ -542,10 +548,9 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, else { struct kbase_va_region *new_front_reg; - new_front_reg = kbase_alloc_free_region(reg_rbtree, - at_reg->start_pfn, - start_pfn - at_reg->start_pfn, - at_reg->flags & KBASE_REG_ZONE_MASK); + new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, + start_pfn - at_reg->start_pfn, + at_reg->flags & KBASE_REG_ZONE_MASK); if (new_front_reg) { at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; @@ -682,8 +687,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, goto exit; } - err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, - nr_pages); + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); if (err) { dev_warn(dev, "Failed to insert va region"); err = -ENOMEM; @@ -708,8 +712,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, nr_pages, align_offset, align_mask, &start_pfn); if (tmp) { - err = kbase_insert_va_region_nolock(reg, tmp, - start_pfn, nr_pages); + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); if (unlikely(err)) { dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", start_pfn, nr_pages); @@ -847,7 +850,7 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) if (rbnode) { rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); - WARN_ON(reg->va_refcnt != 1); + WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); if (kbase_page_migration_enabled) kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); /* Reset the start_pfn - as the rbtree is being @@ -933,9 +936,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) #endif /* all have SAME_VA */ - same_va_reg = - kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base, - same_va_pages, KBASE_REG_ZONE_SAME_VA); + same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, + same_va_pages, KBASE_REG_ZONE_SAME_VA); if (!same_va_reg) { err = -ENOMEM; @@ -944,10 +946,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, same_va_pages); -#if IS_ENABLED(CONFIG_64BIT) - /* 32-bit clients have custom VA zones */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { -#endif + if (kbase_ctx_compat_mode(kctx)) { if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { err = -EINVAL; goto fail_free_same_va; @@ -959,10 +958,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx) if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - custom_va_reg = kbase_alloc_free_region( - &kctx->reg_rbtree_custom, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, + KBASE_REG_ZONE_CUSTOM_VA_BASE, + custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { err = -ENOMEM; @@ -971,11 +969,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_CUSTOM_VA_BASE, custom_va_size); -#if IS_ENABLED(CONFIG_64BIT) } else { custom_va_size = 0; } -#endif #if MALI_USE_CSF /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ @@ -986,17 +982,15 @@ int kbase_region_tracker_init(struct kbase_context *kctx) */ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; -#if IS_ENABLED(CONFIG_64BIT) - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (kbase_ctx_compat_mode(kctx)) { exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; } -#endif kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, KBASE_REG_ZONE_EXEC_VA_SIZE); - exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base, + exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); if (!exec_va_reg) { @@ -1010,8 +1004,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); exec_fixed_va_reg = - kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base, - KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, + kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, + exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, KBASE_REG_ZONE_EXEC_FIXED_VA); if (!exec_fixed_va_reg) { @@ -1024,7 +1018,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); - fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base, + fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); kctx->gpu_va_end = fixed_va_end; @@ -1163,7 +1157,6 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) return false; } -#if IS_ENABLED(CONFIG_64BIT) static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 jit_va_pages) { @@ -1212,9 +1205,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, * Create a custom VA zone at the end of the VA for allocations which * JIT can use so it doesn't have to allocate VA from the kernel. */ - custom_va_reg = - kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start, - jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); + custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, + jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); /* * The context will be destroyed if we fail here so no point @@ -1231,7 +1223,6 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, kbase_region_tracker_insert(custom_va_reg); return 0; } -#endif int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, int max_allocations, int trim_level, int group_id, @@ -1272,10 +1263,8 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, goto exit_unlock; } -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (!kbase_ctx_compat_mode(kctx)) err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); -#endif /* * Nothing to do for 32-bit clients, JIT uses the existing * custom VA zone. @@ -1351,17 +1340,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages goto exit_unlock; } -#if IS_ENABLED(CONFIG_64BIT) - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { -#endif + if (kbase_ctx_compat_mode(kctx)) { /* 32-bit client: take from CUSTOM_VA zone */ target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; -#if IS_ENABLED(CONFIG_64BIT) } else { /* 64-bit client: take from SAME_VA zone */ target_zone_bits = KBASE_REG_ZONE_SAME_VA; } -#endif + target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; @@ -1389,10 +1375,8 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages /* Taken from the end of the target zone */ exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; - exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, - exec_va_start, - exec_va_pages, - KBASE_REG_ZONE_EXEC_VA); + exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, + exec_va_pages, KBASE_REG_ZONE_EXEC_VA); if (!exec_va_reg) { err = -ENOMEM; goto exit_unlock; @@ -1435,10 +1419,9 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) kbdev->csf.shared_reg_rbtree = RB_ROOT; - shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, - shared_reg_start_pfn, - shared_reg_size, - KBASE_REG_ZONE_MCU_SHARED); + shared_reg = + kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, + shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); if (!shared_reg) return -ENOMEM; @@ -1447,10 +1430,30 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) } #endif +static void kbasep_mem_page_size_init(struct kbase_device *kbdev) +{ +#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) +#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) + kbdev->pagesize_2mb = true; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) { + dev_warn( + kbdev->dev, + "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n"); + } +#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ + kbdev->pagesize_2mb = false; +#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ +#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ + /* Set it to the default based on which GPU is present */ + kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); +#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ +} + int kbase_mem_init(struct kbase_device *kbdev) { int err = 0; struct kbasep_mem_device *memdev; + char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE]; #if IS_ENABLED(CONFIG_OF) struct device_node *mgm_node = NULL; #endif @@ -1459,6 +1462,19 @@ int kbase_mem_init(struct kbase_device *kbdev) memdev = &kbdev->memdev; + kbasep_mem_page_size_init(kbdev); + + scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s", + kbdev->devname); + + /* Initialize slab cache for kbase_va_regions */ + kbdev->va_region_slab = + kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL); + if (kbdev->va_region_slab == NULL) { + dev_err(kbdev->dev, "Failed to create va_region_slab\n"); + return -ENOMEM; + } + kbase_mem_migrate_init(kbdev); kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, KBASE_MEM_POOL_MAX_SIZE_KCTX); @@ -1550,6 +1566,9 @@ void kbase_mem_term(struct kbase_device *kbdev) kbase_mem_migrate_term(kbdev); + kmem_cache_destroy(kbdev->va_region_slab); + kbdev->va_region_slab = NULL; + WARN_ON(kbdev->total_gpu_pages); WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); @@ -1563,6 +1582,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); /** * kbase_alloc_free_region - Allocate a free region object. * + * @kbdev: kbase device * @rbtree: Backlink to the red-black tree of memory regions. * @start_pfn: The Page Frame Number in GPU virtual address space. * @nr_pages: The size of the region in pages. @@ -1575,8 +1595,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); * * Return: pointer to the allocated region object on success, NULL otherwise. */ -struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone) +struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone) { struct kbase_va_region *new_reg; @@ -1588,13 +1608,13 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); - new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); + new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); if (!new_reg) return NULL; - new_reg->va_refcnt = 1; - new_reg->no_user_free_refcnt = 0; + kbase_refcount_set(&new_reg->va_refcnt, 1); + atomic_set(&new_reg->no_user_free_count, 0); new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ new_reg->rbtree = rbtree; @@ -1726,7 +1746,6 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long gwt_mask = ~0; int group_id; struct kbase_mem_phy_alloc *alloc; - bool ignore_page_migration = false; #ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) @@ -1755,41 +1774,46 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); for (i = 0; i < alloc->imported.alias.nents; i++) { if (alloc->imported.alias.aliased[i].alloc) { - err = kbase_mmu_insert_pages( + err = kbase_mmu_insert_aliased_pages( kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, - NULL, ignore_page_migration); + NULL); if (err) - goto bad_insert; + goto bad_aliased_insert; /* Note: mapping count is tracked at alias * creation time */ } else { - err = kbase_mmu_insert_single_page( - kctx, reg->start_pfn + i * stride, - kctx->aliasing_sink_page, + err = kbase_mmu_insert_single_aliased_page( + kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page, alloc->imported.alias.aliased[i].length, - (reg->flags & mask & gwt_mask) | attr, - group_id, mmu_sync_info); + (reg->flags & mask & gwt_mask) | attr, group_id, + mmu_sync_info); if (err) - goto bad_insert; + goto bad_aliased_insert; } } } else { if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || - reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) - ignore_page_migration = true; - - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, group_id, - mmu_sync_info, reg, ignore_page_migration); + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + + err = kbase_mmu_insert_imported_pages( + kctx->kbdev, &kctx->mmu, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); + } else { + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, group_id, + mmu_sync_info, reg, true); + } + if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(alloc); @@ -1799,9 +1823,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && reg->gpu_alloc->imported.umm.current_mapping_usage_count) { - /* For padded imported dma-buf memory, map the dummy aliasing - * page from the end of the dma-buf pages, to the end of the - * region using a read only mapping. + /* For padded imported dma-buf or user-buf memory, map the dummy + * aliasing page from the end of the imported pages, to the end of + * the region using a read only mapping. * * Only map when it's imported dma-buf memory that is currently * mapped. @@ -1809,22 +1833,32 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, * Assume reg->gpu_alloc->nents is the number of actual pages * in the dma-buf memory. */ - err = kbase_mmu_insert_single_page( - kctx, reg->start_pfn + reg->gpu_alloc->nents, - kctx->aliasing_sink_page, + err = kbase_mmu_insert_single_imported_page( + kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page, reg->nr_pages - reg->gpu_alloc->nents, - (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK, mmu_sync_info); + (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, + mmu_sync_info); if (err) goto bad_insert; } return err; -bad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - reg->nr_pages, kctx->as_nr, ignore_page_migration); +bad_aliased_insert: + while (i-- > 0) { + struct tagged_addr *phys_alloc = NULL; + u64 const stride = alloc->imported.alias.stride; + + if (alloc->imported.alias.aliased[i].alloc != NULL) + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + + alloc->imported.alias.aliased[i].offset; + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), + phys_alloc, alloc->imported.alias.aliased[i].length, + alloc->imported.alias.aliased[i].length, kctx->as_nr, + false); + } +bad_insert: kbase_remove_va_region(kctx->kbdev, reg); return err; @@ -1870,26 +1904,49 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * alloc->imported.alias.stride), phys_alloc, alloc->imported.alias.aliased[i].length, - kctx->as_nr, false); + alloc->imported.alias.aliased[i].length, kctx->as_nr, + false); if (WARN_ON_ONCE(err_loop)) err = err_loop; } } break; - case KBASE_MEM_TYPE_IMPORTED_UMM: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr, true); + case KBASE_MEM_TYPE_IMPORTED_UMM: { + size_t nr_phys_pages = reg->nr_pages; + size_t nr_virt_pages = reg->nr_pages; + /* If the region has import padding and falls under the threshold for + * issuing a partial GPU cache flush, we want to reduce the number of + * physical pages that get flushed. + + * This is symmetric with case of mapping the memory, which first maps + * each imported physical page to a separate virtual page, and then + * maps the single aliasing sink page to each of the virtual padding + * pages. + */ + if (reg->flags & KBASE_REG_IMPORT_PAD) + nr_phys_pages = alloc->nents + 1; + + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, nr_phys_pages, nr_virt_pages, + kctx->as_nr, true); + } break; - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr, true); + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); + + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, nr_reg_pages, nr_reg_pages, + kctx->as_nr, true); + } break; - default: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, kbase_reg_current_backed_size(reg), - kctx->as_nr, false); + default: { + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); + + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, nr_reg_pages, nr_reg_pages, + kctx->as_nr, false); + } break; } @@ -2214,7 +2271,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); - if (kbase_va_region_is_no_user_free(kctx, reg)) { + if (kbase_va_region_is_no_user_free(reg)) { dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); return -EINVAL; } @@ -2435,7 +2492,7 @@ int kbase_update_region_flags(struct kbase_context *kctx, if (flags & BASEP_MEM_NO_USER_FREE) { kbase_gpu_vm_lock(kctx); - kbase_va_region_no_user_free_get(kctx, reg); + kbase_va_region_no_user_free_inc(reg); kbase_gpu_vm_unlock(kctx); } @@ -2489,15 +2546,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, tp = alloc->pages + alloc->nents; -#ifdef CONFIG_MALI_2MB_ALLOC /* Check if we have enough pages requested so we can allocate a large * page (512 * 4KB = 2MB ) */ - if (nr_left >= (SZ_2M / SZ_4K)) { + if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) { int nr_lp = nr_left / (SZ_2M / SZ_4K); res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], - nr_lp * (SZ_2M / SZ_4K), tp, true); + nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task); if (res > 0) { nr_left -= res; @@ -2551,7 +2607,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, err = kbase_mem_pool_grow( &kctx->mem_pools.large[alloc->group_id], - 1); + 1, kctx->task); if (err) break; } while (1); @@ -2592,12 +2648,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, } } } -no_new_partial: -#endif +no_new_partial: if (nr_left) { res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left, - tp, false); + tp, false, kctx->task); if (res <= 0) goto alloc_failed; } @@ -2656,18 +2711,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( lockdep_assert_held(&pool->pool_lock); -#if !defined(CONFIG_MALI_2MB_ALLOC) - WARN_ON(pool->order); -#endif + kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; + + if (!kbdev->pagesize_2mb) + WARN_ON(pool->order); if (alloc->reg) { if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) goto invalid_request; } - kctx = alloc->imported.native.kctx; - kbdev = kctx->kbdev; - lockdep_assert_held(&kctx->mem_partials_lock); if (nr_pages_requested == 0) @@ -2686,8 +2740,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( tp = alloc->pages + alloc->nents; new_pages = tp; -#ifdef CONFIG_MALI_2MB_ALLOC - if (pool->order) { + if (kbdev->pagesize_2mb && pool->order) { int nr_lp = nr_left / (SZ_2M / SZ_4K); res = kbase_mem_pool_alloc_pages_locked(pool, @@ -2771,15 +2824,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( if (nr_left) goto alloc_failed; } else { -#endif res = kbase_mem_pool_alloc_pages_locked(pool, nr_left, tp); if (res <= 0) goto alloc_failed; -#ifdef CONFIG_MALI_2MB_ALLOC } -#endif KBASE_TLSTREAM_AUX_PAGESALLOC( kbdev, @@ -2800,8 +2850,7 @@ alloc_failed: struct tagged_addr *start_free = alloc->pages + alloc->nents; -#ifdef CONFIG_MALI_2MB_ALLOC - if (pool->order) { + if (kbdev->pagesize_2mb && pool->order) { while (nr_pages_to_free) { if (is_huge_head(*start_free)) { kbase_mem_pool_free_pages_locked( @@ -2819,15 +2868,12 @@ alloc_failed: } } } else { -#endif kbase_mem_pool_free_pages_locked(pool, nr_pages_to_free, start_free, false, /* not dirty */ true); /* return to pool */ -#ifdef CONFIG_MALI_2MB_ALLOC } -#endif } kbase_process_page_usage_dec(kctx, nr_pages_requested); @@ -3816,8 +3862,8 @@ static void kbase_jit_destroy_worker(struct work_struct *work) * by implementing "free on putting the last reference", * but only for JIT regions. */ - WARN_ON(reg->no_user_free_refcnt > 1); - kbase_va_region_no_user_free_put(kctx, reg); + WARN_ON(atomic_read(®->no_user_free_count) > 1); + kbase_va_region_no_user_free_dec(reg); kbase_mem_free_region(kctx, reg); kbase_gpu_vm_unlock(kctx); } while (1); @@ -4078,18 +4124,14 @@ static int kbase_jit_grow(struct kbase_context *kctx, delta = info->commit_pages - reg->gpu_alloc->nents; pages_required = delta; -#ifdef CONFIG_MALI_2MB_ALLOC - if (pages_required >= (SZ_2M / SZ_4K)) { + if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ pages_required += ((SZ_2M / SZ_4K) - 1); pages_required /= (SZ_2M / SZ_4K); } else { -#endif pool = &kctx->mem_pools.small[kctx->jit_group_id]; -#ifdef CONFIG_MALI_2MB_ALLOC } -#endif if (reg->cpu_alloc != reg->gpu_alloc) pages_required *= 2; @@ -4110,7 +4152,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, spin_unlock(&kctx->mem_partials_lock); kbase_gpu_vm_unlock(kctx); - ret = kbase_mem_pool_grow(pool, pool_delta); + ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); kbase_gpu_vm_lock(kctx); if (ret) @@ -4374,14 +4416,14 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; -#ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); - if (!prealloc_sas[i]) - goto end; + if (kctx->kbdev->pagesize_2mb) { + /* Preallocate memory for the sub-allocation structs */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) + goto end; + } } -#endif kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); @@ -4561,7 +4603,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, /* Similarly to tiler heap init, there is a short window of time * where the (either recycled or newly allocated, in our case) region has - * "no user free" refcount incremented but is still missing the DONT_NEED flag, and + * "no user free" count incremented but is still missing the DONT_NEED flag, and * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the * allocation is the least bad option that doesn't lead to a security issue down the * line (it will eventually be cleaned up during context termination). @@ -4570,9 +4612,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * flags. */ kbase_gpu_vm_lock(kctx); - if (unlikely(reg->no_user_free_refcnt > 1)) { + if (unlikely(atomic_read(®->no_user_free_count) > 1)) { kbase_gpu_vm_unlock(kctx); - dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n"); + dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n"); mutex_lock(&kctx->jit_evict_lock); list_move(®->jit_node, &kctx->jit_pool_head); @@ -4728,8 +4770,8 @@ bool kbase_jit_evict(struct kbase_context *kctx) * by implementing "free on putting the last reference", * but only for JIT regions. */ - WARN_ON(reg->no_user_free_refcnt > 1); - kbase_va_region_no_user_free_put(kctx, reg); + WARN_ON(atomic_read(®->no_user_free_count) > 1); + kbase_va_region_no_user_free_dec(reg); kbase_mem_free_region(kctx, reg); } @@ -4757,8 +4799,8 @@ void kbase_jit_term(struct kbase_context *kctx) * by implementing "free on putting the last reference", * but only for JIT regions. */ - WARN_ON(walker->no_user_free_refcnt > 1); - kbase_va_region_no_user_free_put(kctx, walker); + WARN_ON(atomic_read(&walker->no_user_free_count) > 1); + kbase_va_region_no_user_free_dec(walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -4776,8 +4818,8 @@ void kbase_jit_term(struct kbase_context *kctx) * by implementing "free on putting the last reference", * but only for JIT regions. */ - WARN_ON(walker->no_user_free_refcnt > 1); - kbase_va_region_no_user_free_put(kctx, walker); + WARN_ON(atomic_read(&walker->no_user_free_count) > 1); + kbase_va_region_no_user_free_dec(walker); kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } @@ -5023,9 +5065,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, * region, otherwise the initial content of memory would be wrong. */ for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, - DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); - + dma_addr_t dma_addr; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); +#endif err = dma_mapping_error(dev, dma_addr); if (err) goto unwind; @@ -5041,9 +5087,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, - kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); + err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, + mmu_sync_info, NULL); if (err == 0) return 0; @@ -5064,8 +5111,12 @@ unwind: dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); +#endif } /* The user buffer could already have been previously pinned before @@ -5182,9 +5233,13 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem } /* Notice: use the original DMA address to unmap the whole memory page. */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, + DMA_BIDIRECTIONAL); +#else dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); - +#endif if (writeable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF @@ -5308,6 +5363,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), + kbase_reg_current_backed_size(reg), kctx->as_nr, true); } diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 83872a1..02e5509 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -38,6 +38,7 @@ /* Required for kbase_mem_evictable_unmake */ #include "mali_kbase_mem_linux.h" #include "mali_kbase_mem_migrate.h" +#include "mali_kbase_refcount_defs.h" static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages); @@ -419,8 +420,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * @jit_usage_id: The last just-in-time memory usage ID for this region. * @jit_bin_id: The just-in-time memory bin this region came from. * @va_refcnt: Number of users of this region. Protected by reg_lock. - * @no_user_free_refcnt: Number of users that want to prevent the region from - * being freed by userspace. + * @no_user_free_count: Number of contexts that want to prevent the region + * from being freed by userspace. * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of * an allocated region * The object can be one of: @@ -681,8 +682,8 @@ struct kbase_va_region { size_t used_pages; #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ - int va_refcnt; - int no_user_free_refcnt; + kbase_refcount_t va_refcnt; + atomic_t no_user_free_count; }; /** @@ -759,15 +760,12 @@ static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, static inline struct kbase_va_region *kbase_va_region_alloc_get( struct kbase_context *kctx, struct kbase_va_region *region) { - lockdep_assert_held(&kctx->reg_lock); - - WARN_ON(!region->va_refcnt); - WARN_ON(region->va_refcnt == INT_MAX); + WARN_ON(!kbase_refcount_read(®ion->va_refcnt)); + WARN_ON(kbase_refcount_read(®ion->va_refcnt) == INT_MAX); - /* non-atomic as kctx->reg_lock is held */ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", - region->va_refcnt, (void *)region); - region->va_refcnt++; + kbase_refcount_read(®ion->va_refcnt), (void *)region); + kbase_refcount_inc(®ion->va_refcnt); return region; } @@ -775,17 +773,14 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( static inline struct kbase_va_region *kbase_va_region_alloc_put( struct kbase_context *kctx, struct kbase_va_region *region) { - lockdep_assert_held(&kctx->reg_lock); - - WARN_ON(region->va_refcnt <= 0); + WARN_ON(kbase_refcount_read(®ion->va_refcnt) <= 0); WARN_ON(region->flags & KBASE_REG_FREE); - /* non-atomic as kctx->reg_lock is held */ - region->va_refcnt--; - dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", - region->va_refcnt, (void *)region); - if (!region->va_refcnt) + if (kbase_refcount_dec_and_test(®ion->va_refcnt)) kbase_region_refcnt_free(kctx->kbdev, region); + else + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", + kbase_refcount_read(®ion->va_refcnt), (void *)region); return NULL; } @@ -799,58 +794,44 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( * Hence, callers cannot rely on this check alone to determine if a region might be shrunk * by any part of kbase. Instead they should use kbase_is_region_shrinkable(). * - * @kctx: Pointer to kbase context. * @region: Pointer to region. * * Return: true if userspace cannot free the region, false if userspace can free the region. */ -static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx, - struct kbase_va_region *region) +static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region) { - lockdep_assert_held(&kctx->reg_lock); - return region->no_user_free_refcnt > 0; + return atomic_read(®ion->no_user_free_count) > 0; } /** - * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region. + * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region. * Calling this function will prevent the region to be shrunk by parts of kbase that - * don't own the region (as long as the refcount stays above zero). Refer to + * don't own the region (as long as the count stays above zero). Refer to * kbase_va_region_is_no_user_free() for more information. * - * @kctx: Pointer to kbase context. * @region: Pointer to region (not shrinkable). * * Return: the pointer to the region passed as argument. */ -static inline struct kbase_va_region * -kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region) +static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region) { - lockdep_assert_held(&kctx->reg_lock); - WARN_ON(kbase_is_region_shrinkable(region)); - WARN_ON(region->no_user_free_refcnt == INT_MAX); + WARN_ON(atomic_read(®ion->no_user_free_count) == INT_MAX); /* non-atomic as kctx->reg_lock is held */ - region->no_user_free_refcnt++; - - return region; + atomic_inc(®ion->no_user_free_count); } /** - * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region. + * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region. * - * @kctx: Pointer to kbase context. * @region: Pointer to region (not shrinkable). */ -static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx, - struct kbase_va_region *region) +static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region) { - lockdep_assert_held(&kctx->reg_lock); + WARN_ON(!kbase_va_region_is_no_user_free(region)); - WARN_ON(!kbase_va_region_is_no_user_free(kctx, region)); - - /* non-atomic as kctx->reg_lock is held */ - region->no_user_free_refcnt--; + atomic_dec(®ion->no_user_free_count); } /* Common functions */ @@ -1148,6 +1129,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * @pages: Pointer to array where the physical address of the allocated * pages will be stored. * @partial_allowed: If fewer pages allocated is allowed + * @page_owner: Pointer to the task that created the Kbase context for which + * the pages are being allocated. It can be NULL if the pages + * won't be associated with any Kbase context. * * Like kbase_mem_pool_alloc() but optimized for allocating many pages. * @@ -1164,7 +1148,8 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. */ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed); + struct tagged_addr *pages, bool partial_allowed, + struct task_struct *page_owner); /** * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool @@ -1276,13 +1261,17 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); * kbase_mem_pool_grow - Grow the pool * @pool: Memory pool to grow * @nr_to_grow: Number of pages to add to the pool + * @page_owner: Pointer to the task that created the Kbase context for which + * the memory pool is being grown. It can be NULL if the pages + * to be allocated won't be associated with any Kbase context. * * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to * become larger than the maximum size specified. * * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages */ -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, + struct task_struct *page_owner); /** * kbase_mem_pool_trim - Grow or shrink the pool to a new size @@ -1398,8 +1387,8 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address( struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, u64 gpu_addr); -struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone); +struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone); void kbase_free_alloced_region(struct kbase_va_region *reg); int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); @@ -1410,6 +1399,32 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, bool kbase_check_alloc_flags(unsigned long flags); bool kbase_check_import_flags(unsigned long flags); +static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages) +{ + if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { + dev_dbg( + kbdev->dev, + "Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", + (unsigned long long)va_pages); + return false; + } + + return true; +} + +static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages) +{ + if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { + dev_dbg( + kbdev->dev, + "Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", + (unsigned long long)va_pages); + return false; + } + + return true; +} + /** * kbase_check_alloc_sizes - check user space sizes parameters for an * allocation @@ -1737,7 +1752,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * * @prealloc_sa: Information about the partial allocation if the amount of memory requested * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be - * allocated by the caller iff CONFIG_MALI_2MB_ALLOC is enabled. + * allocated by the caller if kbdev->pagesize_2mb is enabled. * * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be @@ -1765,7 +1780,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * This ensures that the pool can be grown to the required size and that the allocation can * complete without another thread using the newly grown pages. * - * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then @pool must be one of the + * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the * mempools from alloc->imported.native.kctx->mem_pools.small[]. * @@ -2494,8 +2509,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed * @kctx: Pointer to kbase context * - * Don't allow the allocation of GPU memory until user space has set up the - * tracking page (which sets kctx->process_mm) or if the ioctl has been issued + * Don't allow the allocation of GPU memory if the ioctl has been issued * from the forked child process using the mali device file fd inherited from * the parent process. * @@ -2503,13 +2517,23 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) */ static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx) { - bool allow_alloc = true; - - rcu_read_lock(); - allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm); - rcu_read_unlock(); + return (kctx->process_mm == current->mm); +} - return allow_alloc; +/** + * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process + */ +static inline void kbase_mem_mmgrab(void) +{ + /* This merely takes a reference on the memory descriptor structure + * i.e. mm_struct of current process and not on its address space and + * so won't block the freeing of address space on process exit. + */ +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + atomic_inc(¤t->mm->mm_count); +#else + mmgrab(current->mm); +#endif } /** diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index e577452..7d30790 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -37,7 +37,7 @@ #include <linux/memory_group_manager.h> #include <linux/math64.h> #include <linux/migrate.h> - +#include <linux/version.h> #include <mali_kbase.h> #include <mali_kbase_mem_linux.h> #include <tl/mali_kbase_tracepoints.h> @@ -385,8 +385,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages zone = KBASE_REG_ZONE_CUSTOM_VA; } - reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va), - va_pages, zone); + reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone); if (!reg) { dev_err(dev, "Failed to allocate free region"); @@ -481,22 +480,22 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } else /* we control the VA */ { size_t align = 1; -#ifdef CONFIG_MALI_2MB_ALLOC - /* If there's enough (> 33 bits) of GPU VA space, align to 2MB - * boundaries. The similar condition is used for mapping from - * the SAME_VA zone inside kbase_context_get_unmapped_area(). - */ - if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { - if (va_pages >= (SZ_2M / SZ_4K)) - align = (SZ_2M / SZ_4K); - } - if (*gpu_va) - align = 1; + if (kctx->kbdev->pagesize_2mb) { + /* If there's enough (> 33 bits) of GPU VA space, align to 2MB + * boundaries. The similar condition is used for mapping from + * the SAME_VA zone inside kbase_context_get_unmapped_area(). + */ + if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { + if (va_pages >= (SZ_2M / SZ_4K)) + align = (SZ_2M / SZ_4K); + } + if (*gpu_va) + align = 1; #if !MALI_USE_CSF - if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) - align = 1; + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) + align = 1; #endif /* !MALI_USE_CSF */ -#endif /* CONFIG_MALI_2MB_ALLOC */ + } if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); @@ -999,7 +998,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in * & GPU queue ringbuffer and none of them needs to be explicitly marked * as evictable by Userspace. */ - if (kbase_va_region_is_no_user_free(kctx, reg)) + if (kbase_va_region_is_no_user_free(reg)) goto out_unlock; /* Is the region being transitioning between not needed and needed? */ @@ -1319,10 +1318,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, - kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true); + err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, + mmu_sync_info, NULL); if (err) goto bad_insert; @@ -1335,11 +1335,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, * Assume alloc->nents is the number of actual pages in the * dma-buf memory. */ - err = kbase_mmu_insert_single_page( - kctx, reg->start_pfn + alloc->nents, - kctx->aliasing_sink_page, reg->nr_pages - alloc->nents, - (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK, mmu_sync_info); + err = kbase_mmu_insert_single_imported_page( + kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page, + reg->nr_pages - alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, + mmu_sync_info); if (err) goto bad_pad_insert; } @@ -1348,7 +1348,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, bad_pad_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - alloc->nents, kctx->as_nr, true); + alloc->nents, alloc->nents, kctx->as_nr, true); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1377,7 +1377,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int err; err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, kctx->as_nr, true); + alloc->pages, reg->nr_pages, reg->nr_pages, + kctx->as_nr, true); WARN_ON(err); } @@ -1449,6 +1450,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, return NULL; } + if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) + return NULL; + /* ignore SAME_VA */ *flags &= ~BASE_MEM_SAME_VA; @@ -1469,23 +1473,21 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) need_sync = true; -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_ctx_compat_mode(kctx)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. */ shared_zone = true; } -#endif if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, - 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages, + KBASE_REG_ZONE_SAME_VA); } else { - reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages, + KBASE_REG_ZONE_CUSTOM_VA); } if (!reg) { @@ -1618,21 +1620,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* 64-bit address range is the max */ goto bad_size; + if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) + goto bad_size; + /* SAME_VA generally not supported with imported memory (no known use cases) */ *flags &= ~BASE_MEM_SAME_VA; if (*flags & BASE_MEM_IMPORT_SHARED) shared_zone = true; -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_ctx_compat_mode(kctx)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. */ shared_zone = true; } -#endif if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; @@ -1641,7 +1644,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( } else rbtree = &kctx->reg_rbtree_custom; - reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone); + reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone); if (!reg) goto no_region; @@ -1667,11 +1670,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( user_buf->address = address; user_buf->nr_pages = *va_pages; user_buf->mm = current->mm; -#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE - atomic_inc(¤t->mm->mm_count); -#else - mmgrab(current->mm); -#endif + kbase_mem_mmgrab(); if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); else @@ -1746,10 +1745,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( * region, otherwise the initial content of memory would be wrong. */ for (i = 0; i < faulted_pages; i++) { - dma_addr_t dma_addr = - dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - + dma_addr_t dma_addr; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); +#endif if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; @@ -1776,8 +1778,12 @@ unwind_dma_map: dma_addr_t dma_addr = user_buf->dma_addrs[i]; dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); +#endif } fault_mismatch: if (pages) { @@ -1853,22 +1859,19 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* calculate the number of pages this alias will cover */ *num_pages = nents * stride; -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages)) + goto bad_size; + + if (!kbase_ctx_compat_mode(kctx)) { /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, - *num_pages, - KBASE_REG_ZONE_SAME_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages, + KBASE_REG_ZONE_SAME_VA); } else { -#else - if (1) { -#endif - reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - 0, *num_pages, - KBASE_REG_ZONE_CUSTOM_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); } if (!reg) @@ -1919,7 +1922,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, goto bad_handle; /* Not found/already free */ if (kbase_is_region_shrinkable(aliasing_reg)) goto bad_handle; /* Ephemeral region */ - if (kbase_va_region_is_no_user_free(kctx, aliasing_reg)) + if (kbase_va_region_is_no_user_free(aliasing_reg)) goto bad_handle; /* JIT regions can't be * aliased. NO_USER_FREE flag * covers the entire lifetime @@ -1974,8 +1977,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, } } -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_ctx_compat_mode(kctx)) { /* Bind to a cookie */ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); @@ -1990,10 +1992,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* relocate to correct base */ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); gpu_va <<= PAGE_SHIFT; - } else /* we control the VA */ { -#else - if (1) { -#endif + } else { + /* we control the VA */ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, mmu_sync_info) != 0) { dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); @@ -2010,9 +2010,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, return gpu_va; -#if IS_ENABLED(CONFIG_64BIT) no_cookie: -#endif no_mmap: bad_handle: /* Marking the source allocs as not being mapped on the GPU and putting @@ -2227,7 +2225,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int ret = 0; ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, - alloc->pages + new_pages, delta, kctx->as_nr, false); + alloc->pages + new_pages, delta, delta, kctx->as_nr, false); return ret; } @@ -2295,7 +2293,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (kbase_is_region_shrinkable(reg)) goto out_unlock; - if (kbase_va_region_is_no_user_free(kctx, reg)) + if (kbase_va_region_is_no_user_free(reg)) goto out_unlock; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -2398,18 +2396,19 @@ int kbase_mem_shrink(struct kbase_context *const kctx, kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); -#ifdef CONFIG_MALI_2MB_ALLOC - if (kbase_reg_current_backed_size(reg) > new_pages) { - old_pages = new_pages; - new_pages = kbase_reg_current_backed_size(reg); - - /* Update GPU mapping. */ - err = kbase_mem_grow_gpu_mapping(kctx, reg, - new_pages, old_pages, CALLER_MMU_ASYNC); + + if (kctx->kbdev->pagesize_2mb) { + if (kbase_reg_current_backed_size(reg) > new_pages) { + old_pages = new_pages; + new_pages = kbase_reg_current_backed_size(reg); + + /* Update GPU mapping. */ + err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, + CALLER_MMU_ASYNC); + } + } else { + WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); } -#else - WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); -#endif } return err; @@ -2707,8 +2706,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, goto out; } - new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages, - KBASE_REG_ZONE_SAME_VA); + new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages, + KBASE_REG_ZONE_SAME_VA); if (!new_reg) { err = -ENOMEM; WARN_ON(1); @@ -3378,79 +3377,29 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { - struct mm_struct *mm; + struct mm_struct *mm = kctx->process_mm; - rcu_read_lock(); - mm = rcu_dereference(kctx->process_mm); - if (mm) { - atomic_add(pages, &kctx->nonmapped_pages); -#ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -#else - spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); - spin_unlock(&mm->page_table_lock); -#endif - } - rcu_read_unlock(); -} - -static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -{ - int pages; - struct mm_struct *mm; - - spin_lock(&kctx->mm_update_lock); - mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); - if (!mm) { - spin_unlock(&kctx->mm_update_lock); + if (unlikely(!mm)) return; - } - rcu_assign_pointer(kctx->process_mm, NULL); - spin_unlock(&kctx->mm_update_lock); - synchronize_rcu(); - - pages = atomic_xchg(&kctx->nonmapped_pages, 0); + atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx; - - kctx = vma->vm_private_data; - kbasep_os_process_page_usage_drain(kctx); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { - /* check that this is the only tracking page */ - spin_lock(&kctx->mm_update_lock); - if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { - spin_unlock(&kctx->mm_update_lock); - return -EFAULT; - } - - rcu_assign_pointer(kctx->process_mm, current->mm); - - spin_unlock(&kctx->mm_update_lock); + if (vma_pages(vma) != 1) + return -EINVAL; /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; return 0; } @@ -3723,23 +3672,27 @@ static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma) static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; + struct kbase_device *kbdev; - if (!kctx) { + if (unlikely(!kctx)) { pr_debug("Close function called for the unexpected mapping"); return; } - if (unlikely(!kctx->csf.user_reg_vma)) - dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL"); + kbdev = kctx->kbdev; - kctx->csf.user_reg_vma = NULL; + if (unlikely(!kctx->csf.user_reg.vma)) + dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", + kctx->tgid, kctx->id); - mutex_lock(&kctx->kbdev->csf.reg_lock); - if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0)) - dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter"); - else - kctx->kbdev->csf.nr_user_page_mapped--; - mutex_unlock(&kctx->kbdev->csf.reg_lock); + mutex_lock(&kbdev->csf.reg_lock); + list_del_init(&kctx->csf.user_reg.link); + mutex_unlock(&kbdev->csf.reg_lock); + + kctx->csf.user_reg.vma = NULL; + + /* Now as the VMA is closed, drop the reference on mali device file */ + fput(kctx->filp); } /** @@ -3784,10 +3737,11 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) unsigned long flags; /* Few sanity checks up front */ - if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) || - (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) { - pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", - current->comm, current->tgid, current->pid); + + if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) || + (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) { + pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", + current->comm, current->tgid, current->pid); return VM_FAULT_SIGBUS; } @@ -3796,22 +3750,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); mutex_lock(&kbdev->csf.reg_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - /* Don't map in the actual register page if GPU is powered down. - * Always map in the dummy page in no mali builds. + /* Dummy page will be mapped during GPU off. + * + * In no mail builds, always map in the dummy page. */ -#if IS_ENABLED(CONFIG_MALI_NO_MALI) - pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); -#else - if (!kbdev->pm.backend.gpu_powered) - pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); -#endif + if (IS_ENABLED(CONFIG_MALI_NO_MALI) || !kbdev->pm.backend.gpu_powered) + pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list); ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_FW, vma, vma->vm_start, pfn, vma->vm_page_prot); + mutex_unlock(&kbdev->csf.reg_lock); return ret; @@ -3824,20 +3778,6 @@ static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { .fault = kbase_csf_user_reg_vm_fault }; -/** - * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page. - * - * @kctx: Pointer of the kernel context. - * @vma: Pointer to the struct containing the information about - * the userspace mapping of USER page. - * - * Return: 0 on success, error code otherwise. - * - * Note: - * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf. - * But this function needs to be kept for backward-compatibility as old Base (<=1.12) - * will try to mmap USER page for direct access when it creates a base context. - */ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma) { @@ -3845,7 +3785,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct kbase_device *kbdev = kctx->kbdev; /* Few sanity checks */ - if (kctx->csf.user_reg_vma) + if (kctx->csf.user_reg.vma) return -EBUSY; if (nr_pages != 1) @@ -3864,19 +3804,21 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, */ vma->vm_flags |= VM_PFNMAP; - kctx->csf.user_reg_vma = vma; + kctx->csf.user_reg.vma = vma; mutex_lock(&kbdev->csf.reg_lock); - kbdev->csf.nr_user_page_mapped++; - - if (!kbdev->csf.mali_file_inode) - kbdev->csf.mali_file_inode = kctx->filp->f_inode; - - if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode)) - dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts"); - + kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++; mutex_unlock(&kbdev->csf.reg_lock); + /* Make VMA point to the special internal file, but don't drop the + * reference on mali device file (that would be done later when the + * VMA is closed). + */ + vma->vm_file = kctx->kbdev->csf.user_reg.filp; + get_file(vma->vm_file); + + /* Also adjust the vm_pgoff */ + vma->vm_pgoff = kctx->csf.user_reg.file_offset; vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c index 9c4b0d9..1dc76d0 100644 --- a/mali_kbase/mali_kbase_mem_migrate.c +++ b/mali_kbase/mali_kbase_mem_migrate.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -60,6 +60,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a lock_page(p); #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) __SetPageMovable(p, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); #else /* In some corner cases, the driver may attempt to allocate memory pages * even before the device file is open and the mapping for address space @@ -78,8 +79,10 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a * is enabled and because the pages may always return to memory pools and * gain the movable property later on in their life cycle. */ - if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) + if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) { __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } #endif unlock_page(p); @@ -112,6 +115,7 @@ static void kbase_free_pages_worker(struct work_struct *work) container_of(work, struct kbase_mem_migrate, free_pages_work); struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate); struct page *p, *tmp; + struct kbase_page_metadata *page_md; LIST_HEAD(free_list); spin_lock(&mem_migrate->free_pages_lock); @@ -123,8 +127,11 @@ static void kbase_free_pages_worker(struct work_struct *work) list_del_init(&p->lru); lock_page(p); - if (PageMovable(p)) + page_md = kbase_page_private(p); + if (IS_PAGE_MOVABLE(page_md->status)) { __ClearPageMovable(p); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + } unlock_page(p); kbase_free_page_metadata(kbdev, p, &group_id); @@ -189,9 +196,12 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) __SetPageMovable(new_page, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); #else - if (kbdev->mem_migrate.inode->i_mapping) + if (kbdev->mem_migrate.inode->i_mapping) { __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } #endif SetPagePrivate(new_page); get_page(new_page); @@ -257,9 +267,12 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa /* Set PG_movable to the new page. */ #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) __SetPageMovable(new_page, &movable_ops); + page_md->status = PAGE_MOVABLE_SET(page_md->status); #else - if (kctx->kbdev->mem_migrate.inode->i_mapping) + if (kctx->kbdev->mem_migrate.inode->i_mapping) { __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping); + page_md->status = PAGE_MOVABLE_SET(page_md->status); + } #endif } else dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -288,7 +301,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) CSTD_UNUSED(mode); - if (!PageMovable(p) || !page_md) + if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) return false; if (!spin_trylock(&page_md->migrate_lock)) @@ -327,6 +340,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) case NOT_MOVABLE: /* Opportunistically clear the movable property for these pages */ __ClearPageMovable(p); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); break; default: /* State should always fall in one of the previous cases! @@ -401,7 +415,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum #endif CSTD_UNUSED(mode); - if (!PageMovable(old_page) || !page_md) + if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) return -EINVAL; if (!spin_trylock(&page_md->migrate_lock)) @@ -469,8 +483,10 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum * error is returned are called putback on, which may not be what we * expect. */ - if (err < 0 && err != -EAGAIN) + if (err < 0 && err != -EAGAIN) { __ClearPageMovable(old_page); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + } return err; } @@ -542,6 +558,7 @@ static void kbase_page_putback(struct page *p) if (status_mem_pool || status_free_isolated_in_progress || status_free_pt_isolated_in_progress) { __ClearPageMovable(p); + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); if (!WARN_ON_ONCE(!kbdev)) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h index 30d0803..76bbc99 100644 --- a/mali_kbase/mali_kbase_mem_migrate.h +++ b/mali_kbase/mali_kbase_mem_migrate.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,13 +23,22 @@ * DOC: Base kernel page migration implementation. */ -#define PAGE_STATUS_MASK ((u8)0x7F) +#define PAGE_STATUS_MASK ((u8)0x3F) #define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK) #define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK)) + #define PAGE_ISOLATE_SHIFT (7) +#define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT) #define PAGE_ISOLATE_SET(status, value) \ - ((status & PAGE_STATUS_MASK) | (value << PAGE_ISOLATE_SHIFT)) -#define IS_PAGE_ISOLATED(status) ((bool)(status & ~PAGE_STATUS_MASK)) + ((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT)) +#define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK)) + +#define PAGE_MOVABLE_SHIFT (6) +#define PAGE_MOVABLE_MASK ((u8)1 << PAGE_MOVABLE_SHIFT) +#define PAGE_MOVABLE_CLEAR(status) ((status) & ~PAGE_MOVABLE_MASK) +#define PAGE_MOVABLE_SET(status) (status | PAGE_MOVABLE_MASK) + +#define IS_PAGE_MOVABLE(status) ((bool)(status & PAGE_MOVABLE_MASK)) /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index 75569cc..fa8f34d 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -28,6 +28,11 @@ #include <linux/shrinker.h> #include <linux/atomic.h> #include <linux/version.h> +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include <linux/sched/signal.h> +#else +#include <linux/signal.h> +#endif #define pool_dbg(pool, format, ...) \ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ @@ -39,6 +44,47 @@ #define NOT_DIRTY false #define NOT_RECLAIMED false +/** + * can_alloc_page() - Check if the current thread can allocate a physical page + * + * @pool: Pointer to the memory pool. + * @page_owner: Pointer to the task/process that created the Kbase context + * for which a page needs to be allocated. It can be NULL if + * the page won't be associated with Kbase context. + * @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. + * + * This function checks if the current thread is a kernel thread and can make a + * request to kernel to allocate a physical page. If the kernel thread is allocating + * a page for the Kbase context and the process that created the context is exiting + * or is being killed, then there is no point in doing a page allocation. + * + * The check done by the function is particularly helpful when the system is running + * low on memory. When a page is allocated from the context of a kernel thread, OoM + * killer doesn't consider the kernel thread for killing and kernel keeps retrying + * to allocate the page as long as the OoM killer is able to kill processes. + * The check allows kernel thread to quickly exit the page allocation loop once OoM + * killer has initiated the killing of @page_owner, thereby unblocking the context + * termination for @page_owner and freeing of GPU memory allocated by it. This helps + * in preventing the kernel panic and also limits the number of innocent processes + * that get killed. + * + * Return: true if the page can be allocated otherwise false. + */ +static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, + const bool alloc_from_kthread) +{ + if (likely(!alloc_from_kthread || !page_owner)) + return true; + + if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { + dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", + __func__, page_owner->comm, task_pid_nr(page_owner)); + return false; + } + + return true; +} + static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) { ssize_t max_size = kbase_mem_pool_max_size(pool); @@ -342,10 +388,12 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, return nr_freed; } -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow) +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, + struct task_struct *page_owner) { struct page *p; size_t i; + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); kbase_mem_pool_lock(pool); @@ -360,6 +408,9 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow) } kbase_mem_pool_unlock(pool); + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + return -ENOMEM; + p = kbase_mem_alloc_page(pool); if (!p) { kbase_mem_pool_lock(pool); @@ -392,7 +443,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) if (new_size < cur_size) kbase_mem_pool_shrink(pool, cur_size - new_size); else if (new_size > cur_size) - err = kbase_mem_pool_grow(pool, new_size - cur_size); + err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL); if (err) { size_t grown_size = kbase_mem_pool_size(pool); @@ -656,13 +707,15 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, } int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed) + struct tagged_addr *pages, bool partial_allowed, + struct task_struct *page_owner) { struct page *p; size_t nr_from_pool; size_t i = 0; int err = -ENOMEM; size_t nr_pages_internal; + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); nr_pages_internal = nr_4k_pages / (1u << (pool->order)); @@ -697,7 +750,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, if (i != nr_4k_pages && pool->next_pool) { /* Allocate via next pool */ err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i, - partial_allowed); + partial_allowed, page_owner); if (err < 0) goto err_rollback; @@ -706,6 +759,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, } else { /* Get any remaining pages from kernel */ while (i != nr_4k_pages) { + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + goto err_rollback; + p = kbase_mem_alloc_page(pool); if (!p) { if (partial_allowed) diff --git a/mali_kbase/mali_kbase_refcount_defs.h b/mali_kbase/mali_kbase_refcount_defs.h new file mode 100644 index 0000000..c517a2d --- /dev/null +++ b/mali_kbase/mali_kbase_refcount_defs.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_REFCOUNT_DEFS_H_ +#define _KBASE_REFCOUNT_DEFS_H_ + +/* + * The Refcount API is available from 4.11 onwards + * This file hides the compatibility issues with this for the rest the driver + */ + +#include <linux/version.h> +#include <linux/types.h> + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + +#define kbase_refcount_t atomic_t +#define kbase_refcount_read(x) atomic_read(x) +#define kbase_refcount_set(x, v) atomic_set(x, v) +#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x) +#define kbase_refcount_dec(x) atomic_dec(x) +#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x) +#define kbase_refcount_inc(x) atomic_inc(x) + +#else + +#include <linux/refcount.h> + +#define kbase_refcount_t refcount_t +#define kbase_refcount_read(x) refcount_read(x) +#define kbase_refcount_set(x, v) refcount_set(x, v) +#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x) +#define kbase_refcount_dec(x) refcount_dec(x) +#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x) +#define kbase_refcount_inc(x) refcount_inc(x) + +#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */ + +#endif /* _KBASE_REFCOUNT_DEFS_H_ */ diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index b64bbc1..f494a8f 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -943,6 +943,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) int ret; u32 i; + if (!kbase_mem_allow_alloc(kctx)) { + dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", + current->comm, current->pid, kctx->tgid, kctx->id); + ret = -EINVAL; + goto fail; + } + /* For backwards compatibility, and to prevent reading more than 1 jit * info struct on jit version 1 */ diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index 4a09265..4cac787 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -150,17 +150,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, "true" : "false"; int as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at PA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "PA_VALID: %s\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, addr_valid, status, exception_type, kbase_gpu_exception_name(exception_type), @@ -557,6 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].gf_data.addr = 0ULL; + kbdev->as[i].is_unresponsive = false; kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); if (!kbdev->as[i].pf_wq) diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index 83605c3..d716ce0 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, u32 const exception_data = (status >> 8) & 0xFFFFFF; int const as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at PA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "exception data 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, status, exception_type, kbase_gpu_exception_name(exception_type), exception_data, @@ -428,6 +429,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; + kbdev->as[i].is_unresponsive = false; kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); if (!kbdev->as[i].pf_wq) diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 41876ff..ea58381 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -144,35 +144,21 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz enum kbase_mmu_op_type op) { u32 flush_op; - int ret; - - if (WARN_ON(kbdev == NULL)) - return; lockdep_assert_held(&kbdev->hwaccess_lock); /* Translate operation to command */ - if (op == KBASE_MMU_OP_FLUSH_PT) { + if (op == KBASE_MMU_OP_FLUSH_PT) flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2; - } else if (op == KBASE_MMU_OP_FLUSH_MEM) { + else if (op == KBASE_MMU_OP_FLUSH_MEM) flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; - } else { + else { dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); return; } - ret = kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op); - - if (ret) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover - */ - dev_err(kbdev->dev, - "Flush for physical address range did not complete. Issuing GPU soft-reset to recover"); - - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - } + if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op)) + dev_err(kbdev->dev, "Flush for physical address range did not complete"); } #endif @@ -190,21 +176,15 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, const struct kbase_mmu_hw_op_param *op_param) { - int err = 0; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { as_nr = kctx ? kctx->as_nr : as_nr; - err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param); - } - - if (err) { - dev_err(kbdev->dev, - "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover"); - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param)) + dev_err(kbdev->dev, + "Invalidate after GPU page table update did not complete"); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -215,25 +195,14 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { - int err = 0; unsigned long flags; /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_powered) - err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param); - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover. - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - - if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu_locked(kbdev); - } + if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) + dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); @@ -308,7 +277,6 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, const struct kbase_mmu_hw_op_param *op_param) { - int err = 0; unsigned long flags; /* AS transaction begin */ @@ -317,19 +285,8 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { as_nr = kctx ? kctx->as_nr : as_nr; - err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], - op_param); - } - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover. - */ - dev_err(kbdev->dev, - "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param)) + dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -405,13 +362,11 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb * @level: The level of MMU page table. * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. - * @free_pgds_list: Linked list of the page directory pages to free. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, - struct list_head *free_pgds_list); + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { @@ -485,14 +440,17 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl phys_addr_t pgd) { struct page *p; + bool page_is_isolated = false; lockdep_assert_held(&mmut->mmu_lock); p = pfn_to_page(PFN_DOWN(pgd)); + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); - - kbase_mmu_account_freed_pgd(kbdev, mmut); + if (likely(!page_is_isolated)) { + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); + kbase_mmu_account_freed_pgd(kbdev, mmut); + } } /** @@ -500,41 +458,42 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl * * @kbdev: Device pointer. * @mmut: GPU MMU page table. - * @free_pgds_list: Linked list of the page directory pages to free. * * This function will call kbase_mmu_free_pgd() on each page directory page - * present in the @free_pgds_list. + * present in the list of free PGDs inside @mmut. * * The function is supposed to be called after the GPU cache and MMU TLB has * been invalidated post the teardown loop. + * + * The mmu_lock shall be held prior to calling the function. */ -static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - struct list_head *free_pgds_list) +static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { - struct page *page, *next_page; + size_t i; - mutex_lock(&mmut->mmu_lock); + lockdep_assert_held(&mmut->mmu_lock); - list_for_each_entry_safe(page, next_page, free_pgds_list, lru) { - list_del_init(&page->lru); - kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page)); - } + for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++) + kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i])); - mutex_unlock(&mmut->mmu_lock); + mmut->scratch_mem.free_pgds.head_index = 0; } -static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - struct page *p, struct list_head *free_pgds_list) +static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p) { - bool page_is_isolated = false; - lockdep_assert_held(&mmut->mmu_lock); - page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1))) + return; + + mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p; +} + +static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) +{ + lockdep_assert_held(&mmut->mmu_lock); - if (likely(!page_is_isolated)) - list_add(&p->lru, free_pgds_list); + mmut->scratch_mem.free_pgds.head_index = 0; } /** @@ -627,6 +586,7 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; struct kbase_mmu_hw_op_param op_param; + int ret = 0; mutex_lock(&kbdev->mmu_hw_mutex); @@ -645,16 +605,20 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); + ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } else { mmu_hw_operation_begin(kbdev); - kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); + ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } mutex_unlock(&kbdev->mmu_hw_mutex); + if (ret) + dev_err(kbdev->dev, + "Flush for GPU page fault due to write access did not complete"); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); } @@ -869,17 +833,13 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return false; } -#ifdef CONFIG_MALI_2MB_ALLOC - if (new_pages >= (SZ_2M / SZ_4K)) { + if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) { root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; *grow_2mb_pool = true; } else { -#endif root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; *grow_2mb_pool = false; -#ifdef CONFIG_MALI_2MB_ALLOC } -#endif if (region->gpu_alloc != region->cpu_alloc) new_pages *= 2; @@ -1128,22 +1088,22 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) } page_fault_retry: -#ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - if (!prealloc_sas[i]) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); - + if (kbdev->pagesize_2mb) { + /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { if (!prealloc_sas[i]) { - kbase_mmu_report_fault_and_kill( - kctx, faulting_as, - "Failed pre-allocating memory for sub-allocations' metadata", - fault); - goto fault_done; + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + + if (!prealloc_sas[i]) { + kbase_mmu_report_fault_and_kill( + kctx, faulting_as, + "Failed pre-allocating memory for sub-allocations' metadata", + fault); + goto fault_done; + } } } } -#endif /* CONFIG_MALI_2MB_ALLOC */ /* so we have a translation fault, * let's see if it is for growable memory @@ -1457,8 +1417,7 @@ page_fault_retry: * Otherwise fail the allocation. */ if (pages_to_grow > 0) { -#ifdef CONFIG_MALI_2MB_ALLOC - if (grow_2mb_pool) { + if (kbdev->pagesize_2mb && grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = &kctx->mem_pools.large[ @@ -1469,18 +1428,15 @@ page_fault_retry: >> lp_mem_pool->order; ret = kbase_mem_pool_grow(lp_mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); } else { -#endif struct kbase_mem_pool *const mem_pool = &kctx->mem_pools.small[ region->gpu_alloc->group_id]; ret = kbase_mem_pool_grow(mem_pool, - pages_to_grow); -#ifdef CONFIG_MALI_2MB_ALLOC + pages_to_grow, kctx->task); } -#endif } if (ret < 0) { /* failed to extend, handle as a normal PF */ @@ -1570,15 +1526,24 @@ alloc_free: return KBASE_MMU_INVALID_PGD_ADDRESS; } -/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the - * new table from the pool if needed and possible +/** + * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1 + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @pgd: Physical addresse of level N page directory. + * @vpfn: The virtual page frame number. + * @level: The level of MMU page table (N). + * + * Return: + * * 0 - OK + * * -EFAULT - level N+1 PGD does not exist + * * -EINVAL - kmap() failed for level N PGD PFN */ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd, - u64 *dirty_pgds) + phys_addr_t *pgd, u64 vpfn, int level) { u64 *page; - u64 pgd_vpfn = vpfn; phys_addr_t target_pgd; struct page *p; @@ -1594,67 +1559,15 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (page == NULL) { - dev_warn(kbdev->dev, "%s: kmap failure", __func__); + dev_err(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; } if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { - unsigned int current_valid_entries; - u64 managed_pte; - - target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { - dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__); - kunmap(p); - return -ENOMEM; - } - - current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page); - kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd); - page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); - kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1); - - /* Rely on the caller to update the address space flags. */ - if (newly_created_pgd && !*newly_created_pgd) { - *newly_created_pgd = true; - if (dirty_pgds) - *dirty_pgds |= 1ULL << level; - } - - /* A new valid entry is added to an existing PGD. Perform the - * invalidate operation for GPU cache as it could be having a - * cacheline that contains the entry (in an invalid form). - * Even if the parent PGD was newly created, invalidation of - * GPU cache is still needed. For explanation, please refer - * the comment in kbase_mmu_insert_pages_no_flush(). - */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, - *pgd + (vpfn * sizeof(u64)), - kbase_dma_addr(p) + (vpfn * sizeof(u64)), - sizeof(u64), KBASE_MMU_OP_FLUSH_PT); - - /* Update the new target_pgd page to its stable state */ - if (kbase_page_migration_enabled) { - struct kbase_page_metadata *page_md = - kbase_page_private(phys_to_page(target_pgd)); - - spin_lock(&page_md->migrate_lock); - - WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || - IS_PAGE_ISOLATED(page_md->status)); - - if (mmut->kctx) { - page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); - page_md->data.pt_mapped.mmut = mmut; - page_md->data.pt_mapped.pgd_vpfn_level = - PGD_VPFN_LEVEL_SET(pgd_vpfn, level); - } else { - page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); - } - - spin_unlock(&page_md->migrate_lock); - } + dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, + vpfn); + kunmap(p); + return -EFAULT; } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( @@ -1667,12 +1580,69 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * return 0; } +/** + * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @vpfn: The virtual page frame number. + * @in_level: The level of MMU page table (N). + * @out_level: Set to the level of the lowest valid PGD found on success. + * Invalid on error. + * @out_pgd: Set to the lowest valid PGD found on success. + * Invalid on error. + * + * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or + * closest to in_level + * + * Terminology: + * Level-0 = Top-level = highest + * Level-3 = Bottom-level = lowest + * + * Return: + * * 0 - OK + * * -EINVAL - kmap() failed during page table walk. + */ +static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd) +{ + phys_addr_t pgd; + int l; + int err = 0; + + lockdep_assert_held(&mmut->mmu_lock); + pgd = mmut->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) { + err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + + /* Handle failure condition */ + if (err) { + dev_dbg(kbdev->dev, + "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", + __func__, l + 1); + break; + } + } + + *out_pgd = pgd; + *out_level = l; + + /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid. + * This implies that we have found the lowest valid pgd. Reset the error code. + */ + if (err == -EFAULT) + err = 0; + + return err; +} + /* - * Returns the PGD for the specified level of translation + * On success, sets out_pgd to the PGD for the specified level of translation + * Returns -EFAULT if a valid PGD is not found */ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - int level, phys_addr_t *out_pgd, bool *newly_created_pgd, - u64 *dirty_pgds) + int level, phys_addr_t *out_pgd) { phys_addr_t pgd; int l; @@ -1681,12 +1651,12 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab pgd = mmut->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = - mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); + int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); /* Handle failure condition */ if (err) { - dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__, - l); + dev_err(kbdev->dev, + "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", + __func__, l + 1); return err; } } @@ -1696,17 +1666,9 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab return 0; } -static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds) -{ - return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd, - newly_created_pgd, dirty_pgds); -} - static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, u64 to_vpfn, u64 *dirty_pgds, - struct list_head *free_pgds_list, struct tagged_addr *phys, bool ignore_page_migration) { u64 vpfn = from_vpfn; @@ -1719,6 +1681,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, lockdep_assert_held(&mmut->mmu_lock); mmu_mode = kbdev->mmu_mode; + kbase_mmu_reset_free_pgds_list(mmut); while (vpfn < to_vpfn) { unsigned int idx = vpfn & 0x1FF; @@ -1779,11 +1742,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, if (!num_of_valid_entries) { kunmap(p); - kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(mmut, p); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - KBASE_MMU_OP_NONE, dirty_pgds, - free_pgds_list); + KBASE_MMU_OP_NONE, dirty_pgds); vpfn += count; continue; } @@ -1863,34 +1825,209 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); } -/* - * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' +/** + * update_parent_pgds() - Updates the page table from bottom level towards + * the top level to insert a new ATE + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @cur_level: The level of MMU page table where the ATE needs to be added. + * The bottom PGD level. + * @insert_level: The level of MMU page table where the chain of newly allocated + * PGDs needs to be linked-in/inserted. + * The top-most PDG level to be updated. + * @insert_vpfn: The virtual page frame number for the ATE. + * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains + * the physical addresses of newly allocated PGDs from index + * insert_level+1 to cur_level, and an existing PGD at index + * insert_level. + * + * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD + * at insert_level which already exists in the MMU Page Tables.Migration status is also + * updated for all the newly allocated PGD pages. + * + * Return: + * * 0 - OK + * * -EFAULT - level N+1 PGD does not exist + * * -EINVAL - kmap() failed for level N PGD PFN + */ +static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int cur_level, int insert_level, u64 insert_vpfn, + phys_addr_t *pgds_to_insert) +{ + int pgd_index; + int err = 0; + + /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1) + * Loop runs from the bottom-most to the top-most level so that all entries in the chain + * are valid when they are inserted into the MMU Page table via the insert_level PGD. + */ + for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) { + int parent_index = pgd_index - 1; + phys_addr_t parent_pgd = pgds_to_insert[parent_index]; + unsigned int current_valid_entries; + u64 pte; + phys_addr_t target_pgd = pgds_to_insert[pgd_index]; + u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF; + struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); + u64 *parent_page_va; + + if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) { + err = -EFAULT; + goto failure_recovery; + } + + parent_page_va = kmap(parent_page); + if (unlikely(parent_page_va == NULL)) { + dev_err(kbdev->dev, "%s: kmap failure", __func__); + err = -EINVAL; + goto failure_recovery; + } + + current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va); + + kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); + parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); + kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); + kunmap(parent_page); + + if (parent_index != insert_level) { + /* Newly allocated PGDs */ + kbase_mmu_sync_pgd_cpu( + kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), + sizeof(u64)); + } else { + /* A new valid entry is added to an existing PGD. Perform the + * invalidate operation for GPU cache as it could be having a + * cacheline that contains the entry (in an invalid form). + */ + kbase_mmu_sync_pgd( + kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)), + kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), + sizeof(u64), KBASE_MMU_OP_FLUSH_PT); + } + + /* Update the new target_pgd page to its stable state */ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = + kbase_page_private(phys_to_page(target_pgd)); + + spin_lock(&page_md->migrate_lock); + + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || + IS_PAGE_ISOLATED(page_md->status)); + + if (mmut->kctx) { + page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); + page_md->data.pt_mapped.mmut = mmut; + page_md->data.pt_mapped.pgd_vpfn_level = + PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index); + } else { + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + } + + spin_unlock(&page_md->migrate_lock); + } + } + + return 0; + +failure_recovery: + /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */ + for (; pgd_index < cur_level; pgd_index++) { + phys_addr_t pgd = pgds_to_insert[pgd_index]; + struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); + u64 *pgd_page_va = kmap(pgd_page); + u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; + + kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); + kunmap(pgd_page); + } + + return err; +} + +/** + * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to + * level_high (inclusive) + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @level_low: The lower bound for the levels for which the PGD allocs are required + * @level_high: The higher bound for the levels for which the PGD allocs are required + * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the + * newly allocated PGD addresses to. + * + * Numerically, level_low < level_high, not to be confused with top level and + * bottom level concepts for MMU PGDs. They are only used as low and high bounds + * in an incrementing for-loop. + * + * Return: + * * 0 - OK + * * -ENOMEM - allocation failed for a PGD. */ -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info) +static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t *new_pgds, int level_low, int level_high) +{ + int err = 0; + int i; + + lockdep_assert_held(&mmut->mmu_lock); + + for (i = level_low; i <= level_high; i++) { + do { + new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); + if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) + break; + + mutex_unlock(&mmut->mmu_lock); + err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], + level_high, NULL); + mutex_lock(&mmut->mmu_lock); + if (err) { + dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d", + __func__, err); + + /* Free all PGDs allocated in previous successful iterations + * from (i-1) to level_low + */ + for (i = (i - 1); i >= level_low; i--) { + if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); + } + + return err; + } + } while (1); + } + + return 0; +} + +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; - /* In case the insert_single_page only partially completes - * we need to be able to recover - */ - bool recover_required = false; - u64 start_vpfn = vpfn; - size_t recover_count = 0; + u64 insert_vpfn = start_vpfn; size_t remain = nr; int err; struct kbase_device *kbdev; - enum kbase_mmu_op_type flush_op; u64 dirty_pgds = 0; - LIST_HEAD(free_pgds_list); + unsigned int i; + phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + enum kbase_mmu_op_type flush_op; + struct kbase_mmu_table *mmut = &kctx->mmu; + int l, cur_level, insert_level; if (WARN_ON(kctx == NULL)) return -EINVAL; /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); kbdev = kctx->kbdev; @@ -1901,7 +2038,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, /* If page migration is enabled, pages involved in multiple GPU mappings * are always treated as not movable. */ - if (kbase_page_migration_enabled) { + if (kbase_page_migration_enabled && !ignore_page_migration) { struct page *phys_page = as_page(phys); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); @@ -1912,12 +2049,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } } - mutex_lock(&kctx->mmu.mmu_lock); + mutex_lock(&mmut->mmu_lock); while (remain) { - unsigned int i; - unsigned int index = vpfn & 0x1FF; - unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + unsigned int vindex = insert_vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; struct page *p; register unsigned int num_of_valid_entries; bool newly_created_pgd = false; @@ -1925,64 +2061,61 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (count > remain) count = remain; + cur_level = MIDGARD_MMU_BOTTOMLEVEL; + insert_level = cur_level; + /* - * Repeatedly calling mmu_get_bottom_pgd() is clearly + * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly * suboptimal. We don't have to re-parse the whole tree * each time (just cache the l0-l2 sequence). * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd, - &dirty_pgds); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow( - &kbdev->mem_pools.small[ - kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu.mmu_lock); - } while (!err); + /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ + err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, + &pgd); + if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__); - if (recover_required) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, - start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list, - NULL, true); - } + dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", + __func__, err); goto fail_unlock; } + /* No valid pgd at cur_level */ + if (insert_level != cur_level) { + /* Allocate new pgds for all missing levels from the required level + * down to the lowest valid pgd at insert_level + */ + err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), + cur_level); + if (err) + goto fail_unlock; + + newly_created_pgd = true; + + new_pgds[insert_level] = pgd; + + /* If we didn't find an existing valid pgd at cur_level, + * we've now allocated one. The ATE in the next step should + * be inserted in this newly allocated pgd. + */ + pgd = new_pgds[cur_level]; + } + p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure", __func__); - if (recover_required) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, - start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list, - NULL, true); - } + dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; - goto fail_unlock; + + goto fail_unlock_free_pgds; } num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); for (i = 0; i < count; i++) { - unsigned int ofs = index + i; + unsigned int ofs = vindex + i; /* Fail if the current page is a valid ATE entry */ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); @@ -1994,50 +2127,87 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, kbdev->mmu_mode->set_num_valid_entries( pgd_page, num_of_valid_entries + count); - vpfn += count; - remain -= count; - - if (count > 0 && !newly_created_pgd) - dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL; + dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL); /* MMU cache flush operation here will depend on whether bottom level * PGD is newly created or not. * - * If bottom level PGD is newly created then no cache maintenance is + * If bottom level PGD is newly created then no GPU cache maintenance is * required as the PGD will not exist in GPU cache. Otherwise GPU cache * maintenance is required for existing PGD. */ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; - kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64), + kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)), + kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), flush_op); + if (newly_created_pgd) { + err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, + new_pgds); + if (err) { + dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", + __func__, err); + + kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); + + kunmap(p); + goto fail_unlock_free_pgds; + } + } + + insert_vpfn += count; + remain -= count; kunmap(p); - /* We have started modifying the page table. - * If further pages need inserting and fail we need to undo what - * has already taken place - */ - recover_required = true; - recover_count += count; } - mutex_unlock(&kctx->mmu.mmu_lock); - mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info, false); + mutex_unlock(&mmut->mmu_lock); + + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, + false); return 0; +fail_unlock_free_pgds: + /* Free the pgds allocated by us from insert_level+1 to bottom level */ + for (l = cur_level; l > insert_level; l--) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + fail_unlock: - mutex_unlock(&kctx->mmu.mmu_lock); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially completed */ + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds, + NULL, true); + } - mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info, true); - kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, + true); + kbase_mmu_free_pgds_list(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); return err; } +int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ + return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, + false); +} + +int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ + return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, + false); +} + static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, struct kbase_va_region *reg, struct kbase_mmu_table *mmut, const u64 vpfn) @@ -2139,7 +2309,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu size_t remain = nr; int err; struct kbase_mmu_mode const *mmu_mode; - LIST_HEAD(free_pgds_list); + unsigned int i; + phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + int l, cur_level, insert_level; /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -2154,13 +2326,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mutex_lock(&mmut->mmu_lock); while (remain) { - unsigned int i; unsigned int vindex = insert_vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; struct page *p; - int cur_level; register unsigned int num_of_valid_entries; bool newly_created_pgd = false; + enum kbase_mmu_op_type flush_op; if (count > remain) count = remain; @@ -2170,57 +2341,53 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu else cur_level = MIDGARD_MMU_BOTTOMLEVEL; + insert_level = cur_level; + /* - * Repeatedly calling mmu_get_pgd_at_level() is clearly + * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly * suboptimal. We don't have to re-parse the whole tree * each time (just cache the l0-l2 sequence). * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - do { - err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd, - &newly_created_pgd, dirty_pgds); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&mmut->mmu_lock); - err = kbase_mem_pool_grow( - &kbdev->mem_pools.small[mmut->group_id], - cur_level); - mutex_lock(&mmut->mmu_lock); - } while (!err); + /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ + err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, + &pgd); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__); - if (insert_vpfn != start_vpfn) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds, - &free_pgds_list, phys, - ignore_page_migration); - } + dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", + __func__, err); goto fail_unlock; } + /* No valid pgd at cur_level */ + if (insert_level != cur_level) { + /* Allocate new pgds for all missing levels from the required level + * down to the lowest valid pgd at insert_level + */ + err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), + cur_level); + if (err) + goto fail_unlock; + + newly_created_pgd = true; + + new_pgds[insert_level] = pgd; + + /* If we didn't find an existing valid pgd at cur_level, + * we've now allocated one. The ATE in the next step should + * be inserted in this newly allocated pgd. + */ + pgd = new_pgds[cur_level]; + } + p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure", __func__); - if (insert_vpfn != start_vpfn) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds, - &free_pgds_list, phys, - ignore_page_migration); - } + dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; - goto fail_unlock; + + goto fail_unlock_free_pgds; } num_of_valid_entries = @@ -2262,34 +2429,39 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); - if (dirty_pgds && !newly_created_pgd) - *dirty_pgds |= 1ULL << cur_level; - - phys += count; - insert_vpfn += count; - remain -= count; + if (dirty_pgds) + *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level); - /* Even if mmu_get_pgd_at_level() allocated a new bottom level - * table page, the invalidation of L2 cache is still needed for - * for the valid entries written in that page. This is because a - * race can happen as soon as the entry of parent level table is - * updated to point to the page of bottom level table. - * GPU can try to access within the the same virtual range that - * is being mapped, before the valid entries of bottom level table - * page are flushed to the memory from the CPU's cache. And if that - * happens then the invalid entries from memory could get fetched - * into the L2 cache and so those entries won't be affected by the - * MMU TLB invalidation done by sending the UNLOCK command. - * If the memory is growable then this could result in unexpected - * page faults happening repeatedly, until the invalid entry is - * evicted from the L2 cache, as Driver would consider the page - * faults for mapped memory as duplicate and won't take any action - * effectively. + /* MMU cache flush operation here will depend on whether bottom level + * PGD is newly created or not. + * + * If bottom level PGD is newly created then no GPU cache maintenance is + * required as the PGD will not exist in GPU cache. Otherwise GPU cache + * maintenance is required for existing PGD. */ + flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), - KBASE_MMU_OP_FLUSH_PT); + flush_op); + + if (newly_created_pgd) { + err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, + new_pgds); + if (err) { + dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", + __func__, err); + kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); + + kunmap(p); + goto fail_unlock_free_pgds; + } + } + + phys += count; + insert_vpfn += count; + remain -= count; kunmap(p); } @@ -2297,12 +2469,22 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu return 0; +fail_unlock_free_pgds: + /* Free the pgds allocated by us from insert_level+1 to bottom level */ + for (l = cur_level; l > insert_level; l--) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + fail_unlock: - mutex_unlock(&mmut->mmu_lock); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially completed */ + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, + phys, ignore_page_migration); + } mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); - kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); + kbase_mmu_free_pgds_list(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); return err; } @@ -2318,7 +2500,6 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m { int err; u64 dirty_pgds = 0; - LIST_HEAD(free_pgds_list); /* Early out if there is nothing to do */ if (nr == 0) @@ -2336,58 +2517,56 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); -/** - * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches - * without retaining the kbase context. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * - * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any - * other locking. - */ -static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) +int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg) { - struct kbase_device *kbdev = kctx->kbdev; int err; - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. + u64 dirty_pgds = 0; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + /* Imported allocations don't have metadata and therefore always ignore the + * page migration logic. */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - struct kbase_mmu_hw_op_param op_param; + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds, reg, true); + if (err) + return err; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); + + return 0; +} + +int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg) +{ + int err; + u64 dirty_pgds = 0; /* Early out if there is nothing to do */ if (nr == 0) - return; + return 0; - /* flush L2 and unlock the VA (resumes the MMU) */ - op_param.vpfn = vpfn; - op_param.nr = nr; - op_param.op = KBASE_MMU_OP_FLUSH_MEM; - op_param.kctx_id = kctx->id; - op_param.mmu_sync_info = mmu_sync_info; - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { - /* Value used to prevent skipping of any levels when flushing */ - op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); - err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], - &op_param); - } else { - err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], - &op_param); - } + /* Memory aliases are always built on top of existing allocations, + * therefore the state of physical pages shall be updated. + */ + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds, reg, false); + if (err) + return err; - if (err) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to recover - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); - if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) - kbase_reset_gpu_locked(kbdev); - } + return 0; } void kbase_mmu_update(struct kbase_device *kbdev, @@ -2412,6 +2591,14 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) void kbase_mmu_disable(struct kbase_context *kctx) { + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_mmu_hw_op_param op_param = { 0 }; + int lock_err, flush_err; + /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * @@ -2422,16 +2609,49 @@ void kbase_mmu_disable(struct kbase_context *kctx) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); - /* - * The address space is being disabled, drain all knowledge of it out - * from the caches as pages and page tables might be freed after this. - * - * The job scheduler code will already be holding the locks and context - * so just do the flush. + op_param.vpfn = 0; + op_param.nr = ~0; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + +#if MALI_USE_CSF + /* 0xF value used to prevent skipping of any levels when flushing */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); +#endif + + /* lock MMU to prevent existing jobs on GPU from executing while the AS is + * not yet disabled + */ + lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (lock_err) + dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, + kctx->id); + + /* Issue the flush command only when L2 cache is in stable power on state. + * Any other state for L2 cache implies that shader cores are powered off, + * which in turn implies there is no execution happening on the GPU. */ - kbase_mmu_flush_noretain(kctx, 0, ~0); + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { + flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + if (flush_err) + dev_err(kbdev->dev, + "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", + kctx->as_nr, kctx->tgid, kctx->id); + } + kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); + + if (!lock_err) { + /* unlock the MMU to allow it to resume */ + lock_err = + kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (lock_err) + dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, + kctx->tgid, kctx->id); + } - kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); #if !MALI_USE_CSF /* * JM GPUs has some L1 read only caches that need to be invalidated @@ -2439,7 +2659,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) * the slot_rb tracking field so such invalidation is performed when * a new katom is executed on the affected slots. */ - kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); + kbase_backend_slot_kctx_purge_locked(kbdev, kctx); #endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); @@ -2447,8 +2667,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, - struct list_head *free_pgds_list) + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) { int current_level; @@ -2480,7 +2699,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_pgd + (index * sizeof(u64)), sizeof(u64), flush_op); - kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(mmut, p); } else { current_valid_entries--; @@ -2500,13 +2719,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, /** * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. * - * @kbdev: Pointer to kbase device. - * @kctx: Pointer to kbase context. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @phys: Array of physical pages to flush. - * @op_param: Non-NULL pointer to struct containing information about the flush - * operation to perform. + * @kbdev: Pointer to kbase device. + * @kctx: Pointer to kbase context. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @phys: Array of physical pages to flush. + * @phys_page_nr: Number of physical pages to flush. + * @op_param: Non-NULL pointer to struct containing information about the flush + * operation to perform. * * This function will do one of three things: * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the @@ -2514,10 +2734,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is * supported on GPU or, * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. + * + * When performing a partial GPU cache flush, the number of physical + * pages does not have to be identical to the number of virtual pages on the MMU, + * to support a single physical address flush for an aliased page. */ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, - struct tagged_addr *phys, + struct tagged_addr *phys, size_t phys_page_nr, struct kbase_mmu_hw_op_param *op_param) { if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { @@ -2536,7 +2760,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, mmu_invalidate(kbdev, kctx, as_nr, op_param); - for (i = 0; !flush_done && i < op_param->nr; i++) { + for (i = 0; !flush_done && i < phys_page_nr; i++) { spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, @@ -2549,76 +2773,15 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, #endif } -/** - * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table - * - * @kbdev: Pointer to kbase device. - * @mmut: Pointer to GPU MMU page table. - * @vpfn: Start page frame number of the GPU virtual pages to unmap. - * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is used for GPU cache maintenance - * and page migration support. - * @nr: Number of pages to unmap. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @ignore_page_migration: Whether page migration metadata should be ignored. - * - * We actually discard the ATE and free the page table pages if no valid entries - * exist in PGD. - * - * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. - * - * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, - * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, - * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches - * instead of specific physical address ranges. - * - * Return: 0 on success, otherwise an error code. - */ -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr, - bool ignore_page_migration) +static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, size_t nr, u64 *dirty_pgds, + struct list_head *free_pgds_list, + enum kbase_mmu_op_type flush_op) { - const size_t requested_nr = nr; - u64 start_vpfn = vpfn; - enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; - struct kbase_mmu_mode const *mmu_mode; - struct kbase_mmu_hw_op_param op_param; - int err = -EFAULT; - u64 dirty_pgds = 0; - LIST_HEAD(free_pgds_list); - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - if (nr == 0) { - /* early out if nothing to do */ - return 0; - } - - /* MMU cache flush strategy depends on the number of pages to unmap. In both cases - * the operation is invalidate but the granularity of cache maintenance may change - * according to the situation. - * - * If GPU control command operations are present and the number of pages is "small", - * then the optimal strategy is flushing on the physical address range of the pages - * which are affected by the operation. That implies both the PGDs which are modified - * or removed from the page table and the physical pages which are freed from memory. - * - * Otherwise, there's no alternative to invalidating the whole GPU cache. - */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) - flush_op = KBASE_MMU_OP_FLUSH_PT; - - mutex_lock(&mmut->mmu_lock); + struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; - mmu_mode = kbdev->mmu_mode; + lockdep_assert_held(&mmut->mmu_lock); + kbase_mmu_reset_free_pgds_list(mmut); while (nr) { unsigned int index = vpfn & 0x1FF; @@ -2703,7 +2866,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table } if (pcount > 0) - dirty_pgds |= 1ULL << level; + *dirty_pgds |= 1ULL << level; num_of_valid_entries = mmu_mode->get_num_valid_entries(page); if (WARN_ON_ONCE(num_of_valid_entries < pcount)) @@ -2725,11 +2888,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pgd + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); - kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list); + kbase_mmu_add_to_free_pgds_list(mmut, p); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - flush_op, &dirty_pgds, - &free_pgds_list); + flush_op, dirty_pgds); vpfn += count; nr -= count; @@ -2746,19 +2908,77 @@ next: vpfn += count; nr -= count; } - err = 0; out: + return 0; +} + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr, bool ignore_page_migration) +{ + u64 start_vpfn = vpfn; + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; + struct kbase_mmu_hw_op_param op_param; + int err = -EFAULT; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + /* This function performs two operations: MMU maintenance and flushing + * the caches. To ensure internal consistency between the caches and the + * MMU, it does not make sense to be able to flush only the physical pages + * from the cache and keep the PTE, nor does it make sense to use this + * function to remove a PTE and keep the physical pages in the cache. + * + * However, we have legitimate cases where we can try to tear down a mapping + * with zero virtual and zero physical pages, so we must have the following + * behaviour: + * - if both physical and virtual page counts are zero, return early + * - if either physical and virtual page counts are zero, return early + * - if there are fewer physical pages than virtual pages, return -EINVAL + */ + if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0)) + return 0; + + if (unlikely(nr_virt_pages < nr_phys_pages)) + return -EINVAL; + + /* MMU cache flush strategy depends on the number of pages to unmap. In both cases + * the operation is invalidate but the granularity of cache maintenance may change + * according to the situation. + * + * If GPU control command operations are present and the number of pages is "small", + * then the optimal strategy is flushing on the physical address range of the pages + * which are affected by the operation. That implies both the PGDs which are modified + * or removed from the page table and the physical pages which are freed from memory. + * + * Otherwise, there's no alternative to invalidating the whole GPU cache. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && + nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) + flush_op = KBASE_MMU_OP_FLUSH_PT; + + mutex_lock(&mmut->mmu_lock); + + err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, + &free_pgds_list, flush_op); + /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ .vpfn = start_vpfn, - .nr = requested_nr, + .nr = nr_virt_pages, .mmu_sync_info = mmu_sync_info, .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : KBASE_MMU_OP_FLUSH_MEM, .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; - mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages, + &op_param); /* If page migration is enabled: the status of all physical pages involved * shall be updated, unless they are not movable. Their status shall be @@ -2766,15 +2986,14 @@ out: * requests to migrate the pages, if they have been isolated. */ if (kbase_page_migration_enabled && phys && !ignore_page_migration) - kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr); + kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); - mutex_unlock(&mmut->mmu_lock); + kbase_mmu_free_pgds_list(kbdev, mmut); - kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); + mutex_unlock(&mmut->mmu_lock); return err; } - KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); /** @@ -2834,7 +3053,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb if (is_huge(*phys) && (index == index_in_large_page(*phys))) cur_level = MIDGARD_MMU_LEVEL(2); - err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds); + err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd); if (WARN_ON(err)) goto fail_unlock; @@ -3119,9 +3338,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p } } - ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL); + ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd); if (ret) { - dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__); + dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__); goto get_pgd_at_level_error; } @@ -3167,10 +3386,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p if (ret < 0) { mutex_unlock(&kbdev->mmu_hw_mutex); mutex_unlock(&kbdev->pm.lock); - dev_err(kbdev->dev, - "%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.", - __func__); - goto gpu_reset; + dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__); + goto undo_mappings; } /* Copy memory content. @@ -3270,7 +3487,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p /* Checking the final migration transaction error state */ if (ret < 0) { dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); - goto gpu_reset; + goto undo_mappings; } /* Undertaking metadata transfer, while we are holding the mmu_lock */ @@ -3305,19 +3522,13 @@ new_page_map_error: old_page_map_error: return ret; -gpu_reset: - /* Unlock the MMU table before resetting the GPU and undo - * mappings. - */ +undo_mappings: + /* Unlock the MMU table and undo mappings. */ mutex_unlock(&mmut->mmu_lock); kunmap(phys_to_page(pgd)); kunmap(as_page(new_phys)); kunmap(as_page(old_phys)); - /* Reset the GPU because of an unrecoverable error in locking or flushing. */ - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - return ret; } @@ -3329,7 +3540,6 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; u64 *pgd_page_buffer = NULL; - bool page_is_isolated = false; struct page *p = phys_to_page(pgd); lockdep_assert_held(&mmut->mmu_lock); @@ -3342,7 +3552,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl /* Copy the page to our preallocated buffer so that we can minimize * kmap_atomic usage */ - pgd_page_buffer = mmut->mmu_teardown_pages[level]; + pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level]; memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); } @@ -3370,41 +3580,27 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl } } - /* Top level PGD page is excluded from migration process. */ - if (level != MIDGARD_MMU_TOPLEVEL) - page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); - - if (likely(!page_is_isolated)) - kbase_mmu_free_pgd(kbdev, mmut, pgd); + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, int const group_id) { - int level; - if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || WARN_ON(group_id < 0)) return -EINVAL; + compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)), + "List of free PGDs may not be large enough."); + compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL, + "Array of MMU levels is not large enough."); + mmut->group_id = group_id; mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; - /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */ - for (level = MIDGARD_MMU_TOPLEVEL; - level < MIDGARD_MMU_BOTTOMLEVEL; level++) { - mmut->mmu_teardown_pages[level] = - kmalloc(PAGE_SIZE, GFP_KERNEL); - - if (!mmut->mmu_teardown_pages[level]) { - kbase_mmu_term(kbdev, mmut); - return -ENOMEM; - } - } - /* We allocate pages into the kbdev memory pool, then * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. @@ -3414,7 +3610,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, err = kbase_mem_pool_grow( &kbdev->mem_pools.small[mmut->group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; @@ -3430,8 +3626,6 @@ int kbase_mmu_init(struct kbase_device *const kbdev, void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { - int level; - WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", mmut->kctx->tgid, mmut->kctx->id); @@ -3445,13 +3639,6 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); } - for (level = MIDGARD_MMU_TOPLEVEL; - level < MIDGARD_MMU_BOTTOMLEVEL; level++) { - if (!mmut->mmu_teardown_pages[level]) - break; - kfree(mmut->mmu_teardown_pages[level]); - } - mutex_destroy(&mmut->mmu_lock); } diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 2b3e6c0..49b42e0 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -152,21 +152,71 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev, struct tagged_addr phy, unsigned long flags, int level, int group_id); int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int group_id, u64 *dirty_pgds, struct kbase_va_region *reg, bool ignore_page_migration); int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, struct kbase_va_region *reg, bool ignore_page_migration); -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info); +int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg); +int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg); +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, + size_t nr, unsigned long flags, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + bool ignore_page_migration); +int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); +int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); +/** + * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table + * + * @kbdev: Pointer to kbase device. + * @mmut: Pointer to GPU MMU page table. + * @vpfn: Start page frame number of the GPU virtual pages to unmap. + * @phys: Array of physical pages currently mapped to the virtual + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. + * @nr_phys_pages: Number of physical pages to flush. + * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. + * + * We actually discard the ATE and free the page table pages if no valid entries + * exist in PGD. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + * + * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, + * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, + * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches + * instead of specific physical address ranges. + * + * Return: 0 on success, otherwise an error code. + */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr, - bool ignore_page_migration); + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr, bool ignore_page_migration); + int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 68bc697..1a892dc 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,7 @@ #include <mali_kbase.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_mem.h> +#include <mali_kbase_reset_gpu.h> #include <mmu/mali_kbase_mmu_hw.h> #include <tl/mali_kbase_tracepoints.h> #include <linux/delay.h> @@ -156,37 +157,60 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, return 0; } -static int wait_ready(struct kbase_device *kbdev, - unsigned int as_nr) +/** + * wait_ready() - Wait for previously issued MMU command to complete. + * + * @kbdev: Kbase device to wait for a MMU command to complete. + * @as_nr: Address space to wait for a MMU command to complete. + * + * Reset GPU if the wait for previously issued command fails. + * + * Return: 0 on successful completion. negative error on failure. + */ +static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) { - u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + const ktime_t wait_loop_start = ktime_get_raw(); + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + s64 diff; - /* Wait for the MMU status to indicate there is no active command. */ - while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & - AS_STATUS_AS_ACTIVE) { - ; - } + if (unlikely(kbdev->as[as_nr].is_unresponsive)) + return -EBUSY; - if (WARN_ON_ONCE(max_loops == 0)) { - dev_err(kbdev->dev, - "AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector", - as_nr); - return -1; - } + do { + unsigned int i; - return 0; + for (i = 0; i < 1000; i++) { + /* Wait for the MMU status to indicate there is no active command */ + if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & + AS_STATUS_AS_ACTIVE)) + return 0; + } + + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while (diff < mmu_as_inactive_wait_time_ms); + + dev_err(kbdev->dev, + "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", + as_nr); + kbdev->as[as_nr].is_unresponsive = true; + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); + + return -ETIMEDOUT; } static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) { - int status; - /* write AS_COMMAND when MMU is ready to accept another command */ - status = wait_ready(kbdev, as_nr); - if (status == 0) + const int status = wait_ready(kbdev, as_nr); + + if (likely(status == 0)) kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); - else { + else if (status == -EBUSY) { + dev_dbg(kbdev->dev, + "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u", + as_nr, cmd); + } else { dev_err(kbdev->dev, "Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u", as_nr, cmd); @@ -259,17 +283,21 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c /* Wait for the LOCK MMU command to complete, issued by the caller */ ret = wait_ready(kbdev, as_nr); - if (ret) + if (unlikely(ret)) return ret; ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, GPU_COMMAND_CACHE_CLN_INV_LSC); - if (ret) + if (unlikely(ret)) return ret; ret = wait_cores_power_trans_complete(kbdev); - if (ret) + if (unlikely(ret)) { + if (kbase_prepare_to_reset_gpu_locked(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); return ret; + } /* As LSC is guaranteed to have been flushed we can use FLUSH_PT * MMU command to only flush the L2. @@ -397,12 +425,21 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); - if (!ret) - write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + if (likely(!ret)) + ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK); return ret; } +/** + * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to a struct containing information about the MMU operation. + * + * Return: 0 if issuing the LOCK command was successful, otherwise an error code. + */ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { @@ -443,10 +480,10 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as * ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); /* Wait for UNLOCK command to complete */ - if (!ret) + if (likely(!ret)) ret = wait_ready(kbdev, as->number); - if (!ret) { + if (likely(!ret)) { u64 lock_addr = 0x0; /* read MMU_AS_CONTROL.LOCKADDR register */ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) @@ -478,6 +515,16 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, return ret; } +/** + * mmu_hw_do_flush - Flush MMU and wait for its completion. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to a struct containing information about the MMU operation. + * @hwaccess_locked: Flag to indicate if the lock has been held. + * + * Return: 0 if flushing MMU was successful, otherwise an error code. + */ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) { @@ -508,12 +555,9 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, return ret; #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) - /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here - * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is - * supported, and this function doesn't gets called for the GPUs where - * FLUSH_MEM/PT command is deprecated. - */ - if (mmu_cmd == AS_COMMAND_FLUSH_MEM) { + /* WA for the BASE_HW_ISSUE_GPU2019_3901. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) && + mmu_cmd == AS_COMMAND_FLUSH_MEM) { if (!hwaccess_locked) { unsigned long flags = 0; @@ -529,12 +573,13 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, } #endif - write_cmd(kbdev, as->number, mmu_cmd); + ret = write_cmd(kbdev, as->number, mmu_cmd); /* Wait for the command to complete */ - ret = wait_ready(kbdev, as->number); + if (likely(!ret)) + ret = wait_ready(kbdev, as->number); - if (!ret) + if (likely(!ret)) mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, op_param->mmu_sync_info); diff --git a/mali_kbase/tests/build.bp b/mali_kbase/tests/build.bp index 7abae23..5581ba9 100644 --- a/mali_kbase/tests/build.bp +++ b/mali_kbase/tests/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,3 +38,9 @@ bob_defaults { kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"], }, } + +bob_defaults { + name: "kernel_unit_tests", + add_to_alias: ["unit_tests"], + srcs: [".*_unit_test/"], +} diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c index 34d2223..1e636b9 100644 --- a/mali_kbase/thirdparty/mali_kbase_mmap.c +++ b/mali_kbase/thirdparty/mali_kbase_mmap.c @@ -10,6 +10,7 @@ */ #include "linux/mman.h" +#include <linux/version_compat_defs.h> #include <mali_kbase.h> /* mali_kbase_mmap.c @@ -90,7 +91,6 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) return false; - return true; } @@ -132,6 +132,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info *info, bool is_shader_code, bool is_same_4gb_page) { +#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; @@ -225,7 +226,37 @@ check_current: } } } +#else + unsigned long length, high_limit, gap_start, gap_end; + + MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) + return -ENOMEM; + + /* + * Adjust search limits by the desired length. + * See implementation comment at top of unmapped_area(). + */ + gap_end = info->high_limit; + if (gap_end < length) + return -ENOMEM; + high_limit = gap_end - length; + if (info->low_limit > high_limit) + return -ENOMEM; + + while (true) { + if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) + return -ENOMEM; + gap_end = mas.last + 1; + gap_start = mas.min; + + if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) + return gap_end; + } +#endif return -ENOMEM; } @@ -242,8 +273,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, struct vm_unmapped_area_info info; unsigned long align_offset = 0; unsigned long align_mask = 0; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base); + unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr); +#else unsigned long high_limit = mm->mmap_base; unsigned long low_limit = PAGE_SIZE; +#endif int cpu_va_bits = BITS_PER_LONG; int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; @@ -270,6 +306,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); + + /* requested length too big for entire address space */ + if (len > mmap_end - kbase_mmap_min_addr) + return -ENOMEM; +#endif /* err on fixed address */ if ((flags & MAP_FIXED) || addr) @@ -282,7 +325,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { high_limit = - min_t(unsigned long, mm->mmap_base, same_va_end_addr); + min_t(unsigned long, high_limit, same_va_end_addr); /* If there's enough (> 33 bits) of GPU VA space, align * to 2MB boundaries. @@ -359,9 +402,15 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && high_limit < same_va_end_addr) { +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + /* Retry above TASK_UNMAPPED_BASE */ + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = min_t(u64, mmap_end, same_va_end_addr); +#else /* Retry above mmap_base */ info.low_limit = mm->mmap_base; info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr); +#endif ret = kbase_unmapped_area_topdown(&info, is_shader_code, is_same_4gb_page); diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c index 09de3f0..20356d6 100644 --- a/mali_kbase/tl/mali_kbase_timeline.c +++ b/mali_kbase/tl/mali_kbase_timeline.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,8 +24,6 @@ #include "mali_kbase_tracepoints.h" #include <mali_kbase.h> -#include <mali_kbase_jm.h> - #include <linux/atomic.h> #include <linux/file.h> #include <linux/mutex.h> diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c index 359d063..ae57006 100644 --- a/mali_kbase/tl/mali_kbase_timeline_io.c +++ b/mali_kbase/tl/mali_kbase_timeline_io.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,6 +35,47 @@ #include <uapi/linux/eventpoll.h> #endif +static int kbase_unprivileged_global_profiling; + +/** + * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes + * + * @val: String containing value to set. Only strings representing positive + * integers are accepted as valid; any non-positive integer (including 0) + * is rejected. + * @kp: Module parameter associated with this method. + * + * This method can only be used to enable permissions for unprivileged processes, + * if they are disabled: for this reason, the only values which are accepted are + * strings representing positive integers. Since it's impossible to disable + * permissions once they're set, any integer which is non-positive is rejected, + * including 0. + * + * Return: 0 if success, otherwise error code. + */ +static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp) +{ + int new_val; + int ret = kstrtoint(val, 0, &new_val); + + if (ret == 0) { + if (new_val < 1) + return -EINVAL; + + kbase_unprivileged_global_profiling = 1; + } + + return ret; +} + +static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = { + .get = param_get_int, + .set = kbase_unprivileged_global_profiling_set, +}; + +module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops, + &kbase_unprivileged_global_profiling, 0600); + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); @@ -43,6 +84,15 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync); +static bool timeline_is_permitted(void) +{ +#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE + return kbase_unprivileged_global_profiling || perfmon_capable(); +#else + return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN); +#endif +} + /** * kbasep_timeline_io_packet_pending - check timeline streams for pending * packets @@ -328,6 +378,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) }; int err; + if (!timeline_is_permitted()) + return -EPERM; + if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK)) return -EINVAL; @@ -371,7 +424,7 @@ void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) return; - file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev, + file = debugfs_create_file("tlstream", 0400, kbdev->mali_debugfs_directory, kbdev, &kbasep_tlstream_debugfs_fops); if (IS_ERR_OR_NULL(file)) diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index e8a74e9..f62c755 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,7 +87,9 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_GPUCMDQUEUE_KICK, KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, - KBASE_TL_KBASE_DEVICE_HALT_CSG, + KBASE_TL_KBASE_DEVICE_HALTING_CSG, + KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, + KBASE_TL_KBASE_DEVICE_CSG_IDLE, KBASE_TL_KBASE_NEW_CTX, KBASE_TL_KBASE_DEL_CTX, KBASE_TL_KBASE_CTX_ASSIGN_AS, @@ -98,6 +100,8 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, @@ -116,6 +120,9 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, @@ -360,13 +367,21 @@ enum tl_msg_id_obj { TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ "CSG is programmed to a slot", \ "@IIIII", \ - "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \ + "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ "CSG is deprogrammed from a slot", \ "@II", \ "kbase_device_id,kbase_device_csg_slot_index") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALT_CSG, \ - "CSG is halted", \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \ + "CSG is halting", \ + "@III", \ + "kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \ + "CSG is suspended", \ + "@II", \ + "kbase_device_id,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \ + "KBase device is notified that CSG is idle.", \ "@II", \ "kbase_device_id,kbase_device_csg_slot_index") \ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ @@ -404,11 +419,19 @@ enum tl_msg_id_obj { TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@pLII", \ - "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \ + "kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \ "KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@pL", \ "kcpu_queue,cqs_obj_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \ + "KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \ + "@pLLIII", \ + "kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \ + "KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \ + "@pLLII", \ + "kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ "KCPU Queue enqueues Map Import", \ "@pL", \ @@ -481,6 +504,18 @@ enum tl_msg_id_obj { "KCPU Queue executes a Set on Cross Queue Sync Object", \ "@pI", \ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \ + "KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \ + "KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \ + "@pI", \ + "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \ + "KCPU Queue executes a Set Operation on Cross Queue Sync Object", \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ "KCPU Queue starts a Map Import", \ "@p", \ @@ -2130,7 +2165,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg( u32 kernel_ctx_id, u32 gpu_cmdq_grp_handle, u32 kbase_device_csg_slot_index, - u32 kbase_device_csg_slot_resumed + u32 kbase_device_csg_slot_resuming ) { const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; @@ -2139,7 +2174,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg( + sizeof(kernel_ctx_id) + sizeof(gpu_cmdq_grp_handle) + sizeof(kbase_device_csg_slot_index) - + sizeof(kbase_device_csg_slot_resumed) + + sizeof(kbase_device_csg_slot_resuming) ; char *buffer; unsigned long acq_flags; @@ -2158,7 +2193,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg( pos = kbasep_serialize_bytes(buffer, pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); pos = kbasep_serialize_bytes(buffer, - pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed)); + pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2190,13 +2225,71 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_device_halt_csg( +void __kbase_tlstream_tl_kbase_device_halting_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index, + u32 kbase_device_csg_slot_suspending +) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_csg_slot_index) + + sizeof(kbase_device_csg_slot_suspending) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_suspend_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index +) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_csg_idle( struct kbase_tlstream *stream, u32 kbase_device_id, u32 kbase_device_csg_slot_index ) { - const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG; + const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kbase_device_id) + sizeof(kbase_device_csg_slot_index) @@ -2433,16 +2526,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr, - u32 cqs_obj_compare_value, - u32 cqs_obj_inherit_error + u32 compare_value, + u32 inherit_error ) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(cqs_obj_gpu_addr) - + sizeof(cqs_obj_compare_value) - + sizeof(cqs_obj_inherit_error) + + sizeof(compare_value) + + sizeof(inherit_error) ; char *buffer; unsigned long acq_flags; @@ -2457,9 +2550,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( pos = kbasep_serialize_bytes(buffer, pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); pos = kbasep_serialize_bytes(buffer, - pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value)); + pos, &compare_value, sizeof(compare_value)); pos = kbasep_serialize_bytes(buffer, - pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error)); + pos, &inherit_error, sizeof(inherit_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2491,6 +2584,88 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u64 compare_value, + u32 condition, + u32 data_type, + u32 inherit_error +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + + sizeof(compare_value) + + sizeof(condition) + + sizeof(data_type) + + sizeof(inherit_error) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &compare_value, sizeof(compare_value)); + pos = kbasep_serialize_bytes(buffer, + pos, &condition, sizeof(condition)); + pos = kbasep_serialize_bytes(buffer, + pos, &data_type, sizeof(data_type)); + pos = kbasep_serialize_bytes(buffer, + pos, &inherit_error, sizeof(inherit_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u64 value, + u32 operation, + u32 data_type +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + + sizeof(value) + + sizeof(operation) + + sizeof(data_type) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &value, sizeof(value)); + pos = kbasep_serialize_bytes(buffer, + pos, &operation, sizeof(operation)); + pos = kbasep_serialize_bytes(buffer, + pos, &data_type, sizeof(data_type)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( struct kbase_tlstream *stream, const void *kcpu_queue, @@ -2981,6 +3156,83 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( + struct kbase_tlstream *stream, + const void *kcpu_queue +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 execute_error +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 execute_error +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( struct kbase_tlstream *stream, const void *kcpu_queue diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index 586fe67..f1f4761 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -408,7 +408,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg( u32 kernel_ctx_id, u32 gpu_cmdq_grp_handle, u32 kbase_device_csg_slot_index, - u32 kbase_device_csg_slot_resumed + u32 kbase_device_csg_slot_resuming ); void __kbase_tlstream_tl_kbase_device_deprogram_csg( @@ -417,7 +417,20 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg( u32 kbase_device_csg_slot_index ); -void __kbase_tlstream_tl_kbase_device_halt_csg( +void __kbase_tlstream_tl_kbase_device_halting_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index, + u32 kbase_device_csg_slot_suspending +); + +void __kbase_tlstream_tl_kbase_device_suspend_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index +); + +void __kbase_tlstream_tl_kbase_device_csg_idle( struct kbase_tlstream *stream, u32 kbase_device_id, u32 kbase_device_csg_slot_index @@ -474,8 +487,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr, - u32 cqs_obj_compare_value, - u32 cqs_obj_inherit_error + u32 compare_value, + u32 inherit_error ); void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( @@ -484,6 +497,25 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( u64 cqs_obj_gpu_addr ); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u64 compare_value, + u32 condition, + u32 data_type, + u32 inherit_error +); + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u64 value, + u32 operation, + u32 data_type +); + void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( struct kbase_tlstream *stream, const void *kcpu_queue, @@ -593,6 +625,23 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( u32 execute_error ); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( + struct kbase_tlstream *stream, + const void *kcpu_queue +); + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 execute_error +); + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 execute_error +); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( struct kbase_tlstream *stream, const void *kcpu_queue @@ -2026,7 +2075,7 @@ struct kbase_tlstream; * @kernel_ctx_id: Unique ID for the KBase Context * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed - * @kbase_device_csg_slot_resumed: Whether the csg is being resumed + * @kbase_device_csg_slot_resuming: Whether the csg is being resumed */ #if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ @@ -2035,7 +2084,7 @@ struct kbase_tlstream; kernel_ctx_id, \ gpu_cmdq_grp_handle, \ kbase_device_csg_slot_index, \ - kbase_device_csg_slot_resumed \ + kbase_device_csg_slot_resuming \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ @@ -2046,7 +2095,7 @@ struct kbase_tlstream; kernel_ctx_id, \ gpu_cmdq_grp_handle, \ kbase_device_csg_slot_index, \ - kbase_device_csg_slot_resumed \ + kbase_device_csg_slot_resuming \ ); \ } while (0) #else @@ -2056,7 +2105,7 @@ struct kbase_tlstream; kernel_ctx_id, \ gpu_cmdq_grp_handle, \ kbase_device_csg_slot_index, \ - kbase_device_csg_slot_resumed \ + kbase_device_csg_slot_resuming \ ) \ do { } while (0) #endif /* MALI_USE_CSF */ @@ -2066,7 +2115,7 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kbase_device_id: The ID of the physical hardware - * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed */ #if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ @@ -2093,14 +2142,49 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG - CSG is halted + * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting * * @kbdev: Kbase device * @kbase_device_id: The ID of the physical hardware - * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted + * @kbase_device_csg_slot_suspending: Whether the csg is being suspended + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index, \ + kbase_device_csg_slot_suspending \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_device_halting_csg( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kbase_device_id, \ + kbase_device_csg_slot_index, \ + kbase_device_csg_slot_suspending \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index, \ + kbase_device_csg_slot_suspending \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended + * + * @kbdev: Kbase device + * @kbase_device_id: The ID of the physical hardware + * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended */ #if MALI_USE_CSF -#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \ kbdev, \ kbase_device_id, \ kbase_device_csg_slot_index \ @@ -2108,14 +2192,45 @@ struct kbase_tlstream; do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ - __kbase_tlstream_tl_kbase_device_halt_csg( \ + __kbase_tlstream_tl_kbase_device_suspend_csg( \ __TL_DISPATCH_STREAM(kbdev, obj), \ kbase_device_id, \ kbase_device_csg_slot_index \ ); \ } while (0) #else -#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle. + * + * @kbdev: Kbase device + * @kbase_device_id: The ID of the physical hardware + * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_device_csg_idle( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \ kbdev, \ kbase_device_id, \ kbase_device_csg_slot_index \ @@ -2373,16 +2488,16 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue * @cqs_obj_gpu_addr: CQS Object GPU pointer - * @cqs_obj_compare_value: Semaphore value that should be exceeded for the WAIT to pass - * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue + * @compare_value: Semaphore value that should be exceeded for the WAIT to pass + * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue */ #if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ kbdev, \ kcpu_queue, \ cqs_obj_gpu_addr, \ - cqs_obj_compare_value, \ - cqs_obj_inherit_error \ + compare_value, \ + inherit_error \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ @@ -2391,8 +2506,8 @@ struct kbase_tlstream; __TL_DISPATCH_STREAM(kbdev, obj), \ kcpu_queue, \ cqs_obj_gpu_addr, \ - cqs_obj_compare_value, \ - cqs_obj_inherit_error \ + compare_value, \ + inherit_error \ ); \ } while (0) #else @@ -2400,8 +2515,8 @@ struct kbase_tlstream; kbdev, \ kcpu_queue, \ cqs_obj_gpu_addr, \ - cqs_obj_compare_value, \ - cqs_obj_inherit_error \ + compare_value, \ + inherit_error \ ) \ do { } while (0) #endif /* MALI_USE_CSF */ @@ -2438,6 +2553,96 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU pointer + * @compare_value: Value that should be compared to semaphore value for the WAIT to pass + * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal) + * @data_type: Data type of a CQS Object's value + * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + compare_value, \ + condition, \ + data_type, \ + inherit_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + compare_value, \ + condition, \ + data_type, \ + inherit_error \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + compare_value, \ + condition, \ + data_type, \ + inherit_error \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU pointer + * @value: Value that will be set or added to semaphore + * @operation: Operation type performed on semaphore value (SET or ADD) + * @data_type: Data type of a CQS Object's value + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + value, \ + operation, \ + data_type \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + value, \ + operation, \ + data_type \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + value, \ + operation, \ + data_type \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import * * @kbdev: Kbase device @@ -3000,6 +3205,95 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, \ + execute_error \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \ + kbdev, \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, \ + execute_error \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \ + kbdev, \ + kcpu_queue, \ + execute_error \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import * * @kbdev: Kbase device |