summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorToby Sunrise <tobyrs@google.com>2023-05-01 13:31:16 +0000
committerToby Sunrise <tobyrs@google.com>2023-05-01 13:33:19 +0000
commitbce5281a0408a175137c08dc93028e2a2c0fb69b (patch)
treeedc640500ccdf781a123e7fae22fac9c44ddbe46 /mali_kbase
parentf7a77046d77266482dedf54d134102e6031a7438 (diff)
downloadgpu-bce5281a0408a175137c08dc93028e2a2c0fb69b.tar.gz
Mali Valhall Android DDK r43p0-01eac0 KMD
Provenance: 48a9c7e25986318c8475bc245de51e7bec2606e8 (ipdelivery/EAC/v_r43p0) VX504X08X-BU-00000-r43p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r43p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r43p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r43p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I5df1914eba386e0bf507d4951240e1744f666a29
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/Kbuild4
-rw-r--r--mali_kbase/Kconfig43
-rw-r--r--mali_kbase/Makefile20
-rw-r--r--mali_kbase/Mconfig33
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c26
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h10
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c17
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_error_generator.c15
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.h2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c35
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c79
-rw-r--r--mali_kbase/build.bp22
-rw-r--r--mali_kbase/context/mali_kbase_context.c66
-rw-r--r--mali_kbase/context/mali_kbase_context.h15
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c151
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h2
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h93
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c68
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c9
-rw-r--r--mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c21
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c104
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h6
-rw-r--r--mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c18
-rw-r--r--mali_kbase/csf/mali_kbase_csf_registers.h34
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c33
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c28
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c86
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.h34
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c11
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c15
-rw-r--r--mali_kbase/device/mali_kbase_device.c6
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c66
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h3
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_fault.h6
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_regmap.h7
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c42
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c5
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt.c4
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c20
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h14
-rw-r--r--mali_kbase/jm/mali_kbase_js_defs.h5
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h1
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h15
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h21
-rw-r--r--mali_kbase/mali_kbase_core_linux.c18
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c3
-rw-r--r--mali_kbase/mali_kbase_debug_mem_allocs.c4
-rw-r--r--mali_kbase/mali_kbase_debugfs_helper.c5
-rw-r--r--mali_kbase/mali_kbase_defs.h85
-rw-r--r--mali_kbase/mali_kbase_fence.h15
-rw-r--r--mali_kbase/mali_kbase_gwt.c15
-rw-r--r--mali_kbase/mali_kbase_hw.c37
-rw-r--r--mali_kbase/mali_kbase_hwaccess_time.h62
-rw-r--r--mali_kbase/mali_kbase_js.c5
-rw-r--r--mali_kbase/mali_kbase_mem.c386
-rw-r--r--mali_kbase/mali_kbase_mem.h140
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c316
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.c33
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.h17
-rw-r--r--mali_kbase/mali_kbase_mem_pool.c64
-rw-r--r--mali_kbase/mali_kbase_refcount_defs.h57
-rw-r--r--mali_kbase/mali_kbase_softjobs.c7
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c8
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c8
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c1229
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h64
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c121
-rw-r--r--mali_kbase/tests/build.bp8
-rw-r--r--mali_kbase/thirdparty/mali_kbase_mmap.c53
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.c4
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_io.c57
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c286
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h340
75 files changed, 3159 insertions, 1607 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 32b4d37..73375f6 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r42p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r43p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 1c5e1f8..3d5a14a 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -115,21 +115,6 @@ config MALI_MIDGARD_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
-config MALI_FW_CORE_DUMP
- bool "Enable support for FW core dump"
- depends on MALI_MIDGARD && MALI_CSF_SUPPORT
- default y
- help
- Adds ability to request firmware core dump through the "fw_core_dump"
- debugfs file
-
- Example:
- * To explicitly request core dump:
- echo 1 > /sys/kernel/debug/mali0/fw_core_dump
- * To output current core dump (after explicitly requesting a core dump,
- or kernel driver reported an internal firmware error):
- cat /sys/kernel/debug/mali0/fw_core_dump
-
config MALI_ARBITER_SUPPORT
bool "Enable arbiter support for Mali"
depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -181,7 +166,19 @@ menuconfig MALI_EXPERT
if MALI_EXPERT
-config MALI_2MB_ALLOC
+config LARGE_PAGE_ALLOC_OVERRIDE
+ bool "Override default setting of 2MB pages"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ An override config for LARGE_PAGE_ALLOC config.
+ When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be
+ enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be
+ enabled when GPU HW satisfies requirements.
+
+ If in doubt, say N
+
+config LARGE_PAGE_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
default n
@@ -190,6 +187,10 @@ config MALI_2MB_ALLOC
allocate 2MB pages from the kernel. This reduces TLB pressure and
helps to prevent memory fragmentation.
+ Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config
+ is enabled and enabling this on a GPU HW that does not satisfy
+ requirements can cause serious problem.
+
If in doubt, say N
config MALI_MEMORY_FULLY_BACKED
@@ -225,14 +226,6 @@ config MALI_ERROR_INJECT
help
Enables insertion of errors to test module failure and recovery mechanisms.
-config MALI_GEM5_BUILD
- bool "Enable build of Mali kernel driver for GEM5"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- This option is to do a Mali GEM5 build.
- If unsure, say N.
-
comment "Debug options"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 5d88b14..5b3e99b 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -58,10 +58,7 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
endif
ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
- CONFIG_MALI_FW_CORE_DUMP ?= y
CONFIG_MALI_CORESIGHT ?= n
- else
- CONFIG_MALI_FW_CORE_DUMP ?= n
endif
#
@@ -101,7 +98,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_EXPERT=n
CONFIG_MALI_CORESTACK = n
- CONFIG_MALI_2MB_ALLOC = n
+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
+ CONFIG_LARGE_PAGE_ALLOC = n
CONFIG_MALI_PWRSOFT_765 = n
CONFIG_MALI_MEMORY_FULLY_BACKED = n
CONFIG_MALI_JOB_DUMP = n
@@ -143,7 +141,6 @@ else
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
- CONFIG_MALI_FW_CORE_DUMP = n
endif
# All Mali CONFIG should be listed here
@@ -155,14 +152,14 @@ CONFIGS := \
CONFIG_MALI_ARBITRATION \
CONFIG_MALI_PARTITION_MANAGER \
CONFIG_MALI_REAL_HW \
- CONFIG_MALI_GEM5_BUILD \
CONFIG_MALI_DEVFREQ \
CONFIG_MALI_MIDGARD_DVFS \
CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
CONFIG_MALI_EXPERT \
CONFIG_MALI_CORESTACK \
- CONFIG_MALI_2MB_ALLOC \
+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
+ CONFIG_LARGE_PAGE_ALLOC \
CONFIG_MALI_PWRSOFT_765 \
CONFIG_MALI_MEMORY_FULLY_BACKED \
CONFIG_MALI_JOB_DUMP \
@@ -183,7 +180,6 @@ CONFIGS := \
CONFIG_MALI_KUTF_CLK_RATE_TRACE \
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
CONFIG_MALI_XEN \
- CONFIG_MALI_FW_CORE_DUMP \
CONFIG_MALI_CORESIGHT
@@ -267,6 +263,12 @@ ifeq ($(CONFIG_GCOV_KERNEL),y)
EXTRA_CFLAGS += -DGCOV_PROFILE=1
endif
+ifeq ($(CONFIG_MALI_KCOV),y)
+ KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+ EXTRA_CFLAGS += -DKCOV=1
+ EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
+endif
+
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index d5b3067..f398d1a 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -117,21 +117,6 @@ config MALI_MIDGARD_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
-config MALI_FW_CORE_DUMP
- bool "Enable support for FW core dump"
- depends on MALI_MIDGARD && MALI_CSF_SUPPORT
- default y
- help
- Adds ability to request firmware core dump through the "fw_core_dump"
- debugfs file
-
- Example:
- * To explicitly request core dump:
- echo 1 > /sys/kernel/debug/mali0/fw_core_dump
- * To output current core dump (after explicitly requesting a core dump,
- or kernel driver reported an internal firmware error):
- cat /sys/kernel/debug/mali0/fw_core_dump
-
config MALI_ARBITER_SUPPORT
bool "Enable arbiter support for Mali"
depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -250,14 +235,6 @@ config MALI_ERROR_INJECT
depends on MALI_MIDGARD && MALI_EXPERT
default y if !MALI_ERROR_INJECT_NONE
-config MALI_GEM5_BUILD
- bool "Enable build of Mali kernel driver for GEM5"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- This option is to do a Mali GEM5 build.
- If unsure, say N.
-
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -275,6 +252,14 @@ config MALI_GCOV_KERNEL
coverage information. When built against a supporting kernel,
the coverage information will be available via debugfs.
+config MALI_KCOV
+ bool "Enable kcov coverage to support fuzzers"
+ depends on MALI_MIDGARD && MALI_DEBUG
+ default n
+ help
+ Choose this option to enable building with fuzzing-oriented
+ coverage, to improve the random test cases that are generated.
+
config MALI_FENCE_DEBUG
bool "Enable debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 9a17494..7df2173 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -190,6 +190,27 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
+static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_context *kctx)
+{
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms;
+ s64 diff = 0;
+
+ /* wait for the JS_COMMAND_NEXT register to reach the given status value */
+ do {
+ if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)))
+ return true;
+
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while (diff < max_timeout);
+
+ dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js,
+ kctx->tgid, kctx->id);
+
+ return false;
+}
+
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
{
struct kbase_context *kctx;
@@ -203,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
kctx = katom->kctx;
/* Command register must be available */
- if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
- "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
+ if (!kbasep_jm_wait_js_free(kbdev, js, kctx))
return -EPERM;
dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index e4cff1f..bfd55a6 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -52,14 +52,6 @@ static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string
}
#endif
-#if !MALI_USE_CSF
-static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js,
- struct kbase_context *kctx)
-{
- return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
-}
-#endif
-
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 388b37f..7db2b35 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1001,17 +1001,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
other_slots_busy(kbdev, js))
break;
-#ifdef CONFIG_MALI_GEM5_BUILD
- if (!kbasep_jm_is_js_free(kbdev, js,
- katom[idx]->kctx))
- break;
-#endif
/* Check if this job needs the cycle counter
* enabled before submission
*/
if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
- kbase_pm_request_gpu_cycle_counter_l2_is_on(
- kbdev);
+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
@@ -1025,9 +1019,12 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
/* Inform platform at start/finish of atom */
kbasep_platform_event_atom_submit(katom[idx]);
- }
- else
+ } else {
+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
break;
+ }
/* ***TRANSITION TO HIGHER STATE*** */
fallthrough;
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 9d5f15e..dd16fb2 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -2024,8 +2024,6 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
counter_index, is_low_word);
- } else if (addr == USER_REG(LATEST_FLUSH)) {
- *value = 0;
}
#endif
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
index 75b1e7e..f310cc7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,14 +25,17 @@
static struct kbase_error_atom *error_track_list;
-unsigned int rand_seed;
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+
+/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+#define prandom_u32 get_random_u32
+#endif
/*following error probability are set quite high in order to stress the driver*/
-unsigned int error_probability = 50; /* to be set between 0 and 100 */
+static unsigned int error_probability = 50; /* to be set between 0 and 100 */
/* probability to have multiple error give that there is an error */
-unsigned int multiple_error_probability = 50;
-
-#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+static unsigned int multiple_error_probability = 50;
/* all the error conditions supported by the model */
#define TOTAL_FAULTS 27
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index b37680d..e90e4df 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -105,7 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
kmem_cache_free(kbdev->irq_slab, data);
}
-void gpu_device_raise_irq(void *model, enum model_linux_irqs irq)
+void gpu_device_raise_irq(void *model, u32 irq)
{
struct model_irq_data *data;
struct kbase_device *kbdev = gpu_device_get_data(model);
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
index a24db17..4cf1235 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
@@ -124,7 +124,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
*
* This hook is global to the model Linux framework.
*/
-void gpu_device_raise_irq(void *model, enum model_linux_irqs irq);
+void gpu_device_raise_irq(void *model, u32 irq);
/**
* gpu_device_set_data() - Private model set data function.
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index c51b133..0caf63e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -2575,26 +2575,33 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
#if MALI_USE_CSF
+/**
+ * update_user_reg_page_mapping - Update the mapping for USER Register page
+ *
+ * @kbdev: The kbase device structure for the device.
+ *
+ * This function must be called to unmap the dummy or real page from USER Register page
+ * mapping whenever GPU is powered up or down. The dummy or real page would get
+ * appropriately mapped in when Userspace reads the LATEST_FLUSH value.
+ */
static void update_user_reg_page_mapping(struct kbase_device *kbdev)
{
+ struct kbase_context *kctx, *n;
+
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
-
- /* Only if the mappings for USER page exist, update all PTEs associated to it */
- if (kbdev->csf.nr_user_page_mapped > 0) {
- if (likely(kbdev->csf.mali_file_inode)) {
- /* This would zap the pte corresponding to the mapping of User
- * register page for all the Kbase contexts.
- */
- unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
- BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
- } else {
- dev_err(kbdev->dev,
- "Device file inode not exist even if USER page previously mapped");
- }
+ list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) {
+ /* This would zap the PTE corresponding to the mapping of User
+ * Register page of the kbase context. The mapping will be reestablished
+ * when the context (user process) needs to access to the page.
+ */
+ unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping,
+ kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
+ list_del_init(&kctx->csf.user_reg.link);
+ dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid,
+ kctx->id);
}
-
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 5110e3d..7a4d662 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
#if MALI_USE_CSF
+#include <asm/arch_timer.h>
+#include <linux/gcd.h>
#include <csf/mali_kbase_csf_timeout.h>
#endif
#include <device/mali_kbase_device.h>
@@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
/* Only for debug messages, safe default in case it's mis-maintained */
const char *selector_str = "(unknown)";
- if (WARN(!kbdev->lowest_gpu_freq_khz,
- "Lowest frequency uninitialized! Using reference frequency for scaling")) {
+ if (!kbdev->lowest_gpu_freq_khz) {
+ dev_dbg(kbdev->dev,
+ "Lowest frequency uninitialized! Using reference frequency for scaling");
freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
} else {
freq_khz = kbdev->lowest_gpu_freq_khz;
}
switch (selector) {
+ case MMU_AS_INACTIVE_WAIT_TIMEOUT:
+ selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
+ nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
+ break;
case KBASE_TIMEOUT_SELECTOR_COUNT:
default:
#if !MALI_USE_CSF
WARN(1, "Invalid timeout selector used! Using default value");
nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
break;
+ case JM_DEFAULT_JS_FREE_TIMEOUT:
+ selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
+ nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
+ break;
#else
/* Use Firmware timeout if invalid selection */
WARN(1,
@@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
return lo | (((u64) hi1) << 32);
}
+
+#if MALI_USE_CSF
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts)
+{
+ if (WARN_ON(!kbdev))
+ return 0;
+
+ return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) +
+ kbdev->backend_time.offset;
+}
+
+/**
+ * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
+ *
+ * @kbdev: Kbase device.
+ * @cpu_ts: Output CPU timestamp.
+ * @gpu_ts: Output GPU timestamp.
+ * @gpu_cycle: Output GPU cycle counts.
+ */
+static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle)
+{
+ struct timespec64 ts;
+
+ kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
+
+ if (cpu_ts)
+ *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+#endif
+
+int kbase_backend_time_init(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+ u64 cpu_ts = 0;
+ u64 gpu_ts = 0;
+ u64 freq;
+ u64 common_factor;
+
+ get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
+ freq = arch_timer_get_cntfrq();
+
+ if (!freq) {
+ dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
+ return -EINVAL;
+ }
+
+ common_factor = gcd(NSEC_PER_SEC, freq);
+
+ kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor);
+ kbdev->backend_time.divisor = div64_u64(freq, common_factor);
+
+ if (!kbdev->backend_time.divisor) {
+ dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
+ return -EINVAL;
+ }
+
+ kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
+ kbdev->backend_time.divisor);
+#endif
+
+ return 0;
+}
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index a563058..4f475ab 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -62,8 +62,11 @@ bob_defaults {
mali_dma_buf_legacy_compat: {
kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
},
+ large_page_alloc_override: {
+ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"],
+ },
large_page_alloc: {
- kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
},
mali_memory_fully_backed: {
kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
@@ -86,9 +89,6 @@ bob_defaults {
mali_error_inject: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
},
- mali_gem5_build: {
- kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
- },
mali_debug: {
kbuild_options: [
"CONFIG_MALI_DEBUG=y",
@@ -137,9 +137,6 @@ bob_defaults {
platform_is_fpga: {
kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
},
- mali_fw_core_dump: {
- kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
- },
mali_coresight: {
kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
},
@@ -194,6 +191,15 @@ bob_kernel_module {
"platform/*/*.c",
"platform/*/*.h",
"platform/*/Kbuild",
+ "platform/*/*/*.c",
+ "platform/*/*/*.h",
+ "platform/*/*/Kbuild",
+ "platform/*/*/*.c",
+ "platform/*/*/*.h",
+ "platform/*/*/Kbuild",
+ "platform/*/*/*/*.c",
+ "platform/*/*/*/*.h",
+ "platform/*/*/*/Kbuild",
"thirdparty/*.c",
"thirdparty/Kbuild",
"debug/*.c",
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 792f724..b8036b8 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
/*
* Base kernel context APIs
*/
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -129,13 +135,51 @@ int kbase_context_common_init(struct kbase_context *kctx)
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
- spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
+ kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
kctx->tgid = current->tgid;
kctx->pid = current->pid;
+ /* Check if this is a Userspace created context */
+ if (likely(kctx->filp)) {
+ struct pid *pid_struct;
+
+ rcu_read_lock();
+ pid_struct = find_get_pid(kctx->tgid);
+ if (likely(pid_struct)) {
+ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+ if (likely(task)) {
+ /* Take a reference on the task to avoid slow lookup
+ * later on from the page allocation loop.
+ */
+ get_task_struct(task);
+ kctx->task = task;
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get task pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+
+ put_pid(pid_struct);
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get pid pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+ rcu_read_unlock();
+
+ if (unlikely(err))
+ return err;
+
+ kbase_mem_mmgrab();
+ kctx->process_mm = current->mm;
+ }
+
atomic_set(&kctx->used_pages, 0);
mutex_init(&kctx->reg_lock);
@@ -168,13 +212,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
mutex_lock(&kctx->kbdev->kctx_list_lock);
-
err = kbase_insert_kctx_to_process(kctx);
- if (err)
- dev_err(kctx->kbdev->dev,
- "(err:%d) failed to insert kctx to kbase_process\n", err);
-
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (err) {
+ dev_err(kctx->kbdev->dev,
+ "(err:%d) failed to insert kctx to kbase_process", err);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+ }
return err;
}
@@ -260,6 +307,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
}
diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h
index a0c51c9..7c90e27 100644
--- a/mali_kbase/context/mali_kbase_context.h
+++ b/mali_kbase/context/mali_kbase_context.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -93,6 +93,19 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
}
/**
+ * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate
+ * in compatibility mode for 32-bit userspace.
+ * @kctx: kbase context
+ *
+ * Return: True if needs to maintain compatibility, False otherwise.
+ */
+static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
+{
+ return !IS_ENABLED(CONFIG_64BIT) ||
+ (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
+}
+
+/**
* kbase_ctx_flag_clear - Clear @flag on @kctx
* @kctx: Pointer to kbase context
* @flag: Flag to clear
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index dbfcfde..88a3975 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -39,7 +39,9 @@
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
+
+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
@@ -73,6 +75,38 @@ struct irq_idle_and_protm_track {
s8 idle_slot;
};
+/**
+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
+ *
+ * @kctx: Pointer to the kbase context
+ */
+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ if (unlikely(kctx->csf.user_reg.vma))
+ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
+ kctx->tgid, kctx->id);
+ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
+ list_del_init(&kctx->csf.user_reg.link);
+}
+
+/**
+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
+ *
+ * @kctx: Pointer to the kbase context
+ *
+ * @return: 0 on success.
+ */
+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
+{
+ INIT_LIST_HEAD(&kctx->csf.user_reg.link);
+ kctx->csf.user_reg.vma = NULL;
+ kctx->csf.user_reg.file_offset = 0;
+
+ return 0;
+}
+
static void put_user_pages_mmap_handle(struct kbase_context *kctx,
struct kbase_queue *queue)
{
@@ -262,7 +296,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct
ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
KBASEP_NUM_CS_USER_IO_PAGES,
- queue->phys, false);
+ queue->phys, false, kctx->task);
if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
/* Marking both the phys to zero for indicating there is no phys allocated */
queue->phys[0].tagged_addr = 0;
@@ -288,11 +322,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct
queue->db_file_offset = kbdev->csf.db_file_offsets;
kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
-#else
- WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
-#endif
+ WARN(kbase_refcount_read(&queue->refcount) != 1,
+ "Incorrect refcounting for queue object\n");
/* This is the second reference taken on the queue object and
* would be dropped only when the IO mapping is removed either
* explicitly by userspace or implicitly by kernel on process exit.
@@ -364,21 +395,13 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
static void get_queue(struct kbase_queue *queue)
{
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- WARN_ON(!atomic_inc_not_zero(&queue->refcount));
-#else
- WARN_ON(!refcount_inc_not_zero(&queue->refcount));
-#endif
+ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
}
static void release_queue(struct kbase_queue *queue)
{
lockdep_assert_held(&queue->kctx->csf.lock);
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- if (atomic_dec_and_test(&queue->refcount)) {
-#else
- if (refcount_dec_and_test(&queue->refcount)) {
-#endif
+ if (kbase_refcount_dec_and_test(&queue->refcount)) {
/* The queue can't still be on the per context list. */
WARN_ON(!list_empty(&queue->link));
WARN_ON(queue->group);
@@ -394,7 +417,7 @@ static void release_queue(struct kbase_queue *queue)
* would free up the GPU queue memory.
*/
kbase_gpu_vm_lock(queue->kctx);
- kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+ kbase_va_region_no_user_free_dec(queue->queue_reg);
kbase_gpu_vm_unlock(queue->kctx);
kfree(queue);
@@ -500,17 +523,16 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->kctx = kctx;
queue->base_addr = queue_addr;
- queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
+
+ queue->queue_reg = region;
+ kbase_va_region_no_user_free_inc(region);
+
queue->size = (queue_size << PAGE_SHIFT);
queue->csi_index = KBASEP_IF_NR_INVALID;
queue->enabled = false;
queue->priority = reg->priority;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- atomic_set(&queue->refcount, 1);
-#else
- refcount_set(&queue->refcount, 1);
-#endif
+ kbase_refcount_set(&queue->refcount, 1);
queue->group = NULL;
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
@@ -567,6 +589,13 @@ out:
int kbase_csf_queue_register(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_register *reg)
{
+ /* Validate the ring buffer configuration parameters */
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+ reg->buffer_gpu_addr & ~PAGE_MASK)
+ return -EINVAL;
+
return csf_queue_register_internal(kctx, reg, NULL);
}
@@ -585,6 +614,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
if (glb_version < kbase_csf_interface_version(1, 1, 0))
return -EINVAL;
+ /* Validate the ring buffer configuration parameters */
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+ reg->buffer_gpu_addr & ~PAGE_MASK)
+ return -EINVAL;
+
/* Validate the cs_trace configuration parameters */
if (reg->ex_buffer_size &&
((reg->ex_event_size > max_size) ||
@@ -904,6 +940,9 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
{
lockdep_assert_held(&kctx->csf.lock);
+ if (WARN_ON(queue->csi_index < 0))
+ return;
+
if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
unsigned long flags;
@@ -917,6 +956,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
put_user_pages_mmap_handle(kctx, queue);
+ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
}
}
@@ -1094,7 +1134,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
/* Get physical page for a normal suspend buffer */
err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
- &s_buf->phy[0], false);
+ &s_buf->phy[0], false, kctx->task);
if (err < 0) {
kfree(s_buf->phy);
@@ -1534,6 +1574,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
}
KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
struct kbase_suspend_copy_buffer *sus_buf,
u8 group_handle)
@@ -1564,6 +1605,7 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
return err;
}
+#endif
void kbase_csf_add_group_fatal_error(
struct kbase_queue_group *const group,
@@ -1632,8 +1674,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
kbase_csf_event_init(kctx);
- kctx->csf.user_reg_vma = NULL;
-
/* Mark all the cookies as 'free' */
bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@@ -1653,7 +1693,14 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
mutex_init(&kctx->csf.lock);
INIT_WORK(&kctx->csf.pending_submission_work,
pending_submission_worker);
- } else
+
+ err = kbasep_ctx_user_reg_page_mapping_init(kctx);
+
+ if (unlikely(err))
+ kbase_csf_tiler_heap_context_term(kctx);
+ }
+
+ if (unlikely(err))
kbase_csf_kcpu_queue_context_term(kctx);
}
@@ -1811,17 +1858,14 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* only one reference left that was taken when queue was
* registered.
*/
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- WARN_ON(atomic_read(&queue->refcount) != 1);
-#else
- WARN_ON(refcount_read(&queue->refcount) != 1);
-#endif
+ WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
list_del_init(&queue->link);
release_queue(queue);
}
mutex_unlock(&kctx->csf.lock);
+ kbasep_ctx_user_reg_page_mapping_term(kctx);
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
kbase_csf_scheduler_context_term(kctx);
@@ -2736,6 +2780,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
+
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
CSG_REQ_IDLE_MASK);
@@ -3149,12 +3196,12 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
struct file *filp;
int ret;
- filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
+ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
if (IS_ERR(filp))
return PTR_ERR(filp);
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
- false);
+ false, NULL);
if (ret <= 0) {
fput(filp);
@@ -3170,29 +3217,34 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
{
- if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
- struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+ if (kbdev->csf.user_reg.filp) {
+ struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
- kbase_mem_pool_free(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
- false);
+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+ fput(kbdev->csf.user_reg.filp);
}
}
int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
{
struct tagged_addr phys;
+ struct file *filp;
struct page *page;
u32 *addr;
- int ret;
- kbdev->csf.dummy_user_reg_page = as_tagged(0);
+ kbdev->csf.user_reg.filp = NULL;
- ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
- false);
+ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
+ if (IS_ERR(filp)) {
+ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
+ return PTR_ERR(filp);
+ }
- if (ret <= 0)
- return ret;
+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+ false, NULL) <= 0) {
+ fput(filp);
+ return -ENOMEM;
+ }
page = as_page(phys);
addr = kmap_atomic(page);
@@ -3202,12 +3254,13 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
*/
addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
- kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
DMA_BIDIRECTIONAL);
kunmap_atomic(addr);
- kbdev->csf.dummy_user_reg_page = phys;
-
+ kbdev->csf.user_reg.filp = filp;
+ kbdev->csf.user_reg.dummy_page = phys;
+ kbdev->csf.user_reg.file_offset = 0;
return 0;
}
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 9fbc932..dd947dc 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -274,6 +274,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
*/
void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
/**
* kbase_csf_queue_group_suspend - Suspend a GPU command queue group
*
@@ -291,6 +292,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
*/
int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
+#endif
/**
* kbase_csf_add_group_fatal_error - Report a fatal group error to userspace
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index f1af1b9..f09544c 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
#include <linux/wait.h>
#include "mali_kbase_csf_firmware.h"
+#include "mali_kbase_refcount_defs.h"
#include "mali_kbase_csf_event.h"
#include <uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h>
@@ -269,6 +270,8 @@ enum kbase_queue_group_priority {
* @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
* to a ping from KBase.
* @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ * of a MMU operation
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
@@ -280,6 +283,7 @@ enum kbase_timeout_selector {
CSF_FIRMWARE_BOOT_TIMEOUT,
CSF_FIRMWARE_PING_TIMEOUT,
CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
+ MMU_AS_INACTIVE_WAIT_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -387,11 +391,7 @@ struct kbase_queue {
int doorbell_nr;
unsigned long db_file_offset;
struct list_head link;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- atomic_t refcount;
-#else
- refcount_t refcount;
-#endif
+ kbase_refcount_t refcount;
struct kbase_queue_group *group;
struct kbase_va_region *queue_reg;
struct work_struct oom_event_work;
@@ -779,6 +779,23 @@ struct kbase_csf_event {
};
/**
+ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping
+ * of USER Register page for a context.
+ *
+ * @vma: Pointer to the VMA corresponding to the virtual mapping
+ * of the USER register page.
+ * @file_offset: File offset value that is assigned to userspace mapping
+ * of the USER Register page. It is in page units.
+ * @link: Links the context to the device list when mapping is pointing to
+ * either the dummy or the real Register page.
+ */
+struct kbase_csf_user_reg_context {
+ struct vm_area_struct *vma;
+ u32 file_offset;
+ struct list_head link;
+};
+
+/**
* struct kbase_csf_context - Object representing CSF for a GPU address space.
*
* @event_pages_head: A list of pages allocated for the event memory used by
@@ -816,13 +833,11 @@ struct kbase_csf_event {
* used by GPU command queues, and progress timeout events.
* @link: Link to this csf context in the 'runnable_kctxs' list of
* the scheduler instance
- * @user_reg_vma: Pointer to the vma corresponding to the virtual mapping
- * of the USER register page. Currently used only for sanity
- * checking.
* @sched: Object representing the scheduler's context
* @pending_submission_work: Work item to process pending kicked GPU command queues.
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
+ * @user_reg: Collective information to support mapping to USER Register page.
*/
struct kbase_csf_context {
struct list_head event_pages_head;
@@ -837,12 +852,12 @@ struct kbase_csf_context {
struct kbase_csf_tiler_heap_context tiler_heaps;
struct workqueue_struct *wq;
struct list_head link;
- struct vm_area_struct *user_reg_vma;
struct kbase_csf_scheduler_context sched;
struct work_struct pending_submission_work;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_cpu_queue_context cpu_queue;
#endif
+ struct kbase_csf_user_reg_context user_reg;
};
/**
@@ -1427,6 +1442,37 @@ struct kbase_csf_dump_on_fault {
#endif /* CONFIG_DEBUG_FS*/
/**
+ * struct kbase_csf_user_reg - Object containing members to manage the mapping
+ * of USER Register page for all contexts
+ *
+ * @dummy_page: Address of a dummy page that is mapped in place
+ * of the real USER Register page just before the GPU
+ * is powered down. The USER Register page is mapped
+ * in the address space of every process, that created
+ * a Base context, to enable the access to LATEST_FLUSH
+ * register from userspace.
+ * @filp: Pointer to a dummy file, that along with @file_offset,
+ * facilitates the use of unique file offset for the userspace mapping
+ * created for USER Register page.
+ * The userspace mapping is made to point to this file
+ * inside the mmap handler.
+ * @file_offset: Counter that is incremented every time Userspace creates a mapping of
+ * USER Register page, to provide a unique file offset range for
+ * @filp file, so that the CPU PTE of the Userspace mapping can be zapped
+ * through the kernel function unmap_mapping_range().
+ * It is incremented in page units.
+ * @list: Linked list to maintain user processes(contexts)
+ * having the mapping to USER Register page.
+ * It's protected by &kbase_csf_device.reg_lock.
+ */
+struct kbase_csf_user_reg {
+ struct tagged_addr dummy_page;
+ struct file *filp;
+ u32 file_offset;
+ struct list_head list;
+};
+
+/**
* struct kbase_csf_device - Object representing CSF for an instance of GPU
* platform device.
*
@@ -1463,20 +1509,6 @@ struct kbase_csf_dump_on_fault {
* of the real Hw doorbell page for the active GPU
* command queues after they are stopped or after the
* GPU is powered down.
- * @dummy_user_reg_page: Address of the dummy page that is mapped in place
- * of the real User register page just before the GPU
- * is powered down. The User register page is mapped
- * in the address space of every process, that created
- * a Base context, to enable the access to LATEST_FLUSH
- * register from userspace.
- * @nr_user_page_mapped: The number of clients using the mapping of USER page.
- * This is used to maintain backward compatibility.
- * It's protected by @reg_lock.
- * @mali_file_inode: Pointer to the inode corresponding to mali device
- * file. This is needed in order to switch to the
- * @dummy_user_reg_page on GPU power down.
- * All instances of the mali device file will point to
- * the same inode. It's protected by @reg_lock.
* @reg_lock: Lock to serialize the MCU firmware related actions
* that affect all contexts such as allocation of
* regions from shared interface area, assignment of
@@ -1531,7 +1563,7 @@ struct kbase_csf_dump_on_fault {
* the @p mcu_core_pwroff_dur_count as an update
* to the latter is asynchronous.
* @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
- * window in unit of microseconds. The firmware does not
+ * window in unit of microseconds. The firmware does not
* use it directly.
* @gpu_idle_dur_count: The counterpart of the hysteresis time window in
* interface required format, ready to be used
@@ -1545,6 +1577,8 @@ struct kbase_csf_dump_on_fault {
* @fw_core_dump: Contain members required for handling the firmware
* core dump.
* @dof: Structure for dump on fault.
+ * @user_reg: Collective information to support the mapping to
+ * USER Register page for user processes.
*/
struct kbase_csf_device {
struct kbase_mmu_table mcu_mmu;
@@ -1558,9 +1592,6 @@ struct kbase_csf_device {
struct file *db_filp;
u32 db_file_offsets;
struct tagged_addr dummy_db_page;
- struct tagged_addr dummy_user_reg_page;
- u32 nr_user_page_mapped;
- struct inode *mali_file_inode;
struct mutex reg_lock;
wait_queue_head_t event_wait;
bool interrupt_received;
@@ -1597,6 +1628,7 @@ struct kbase_csf_device {
*/
struct kbase_debug_coresight_device coresight;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+ struct kbase_csf_user_reg user_reg;
};
/**
@@ -1613,6 +1645,10 @@ struct kbase_csf_device {
* @bf_data: Data relating to Bus fault.
* @gf_data: Data relating to GPU fault.
* @current_setup: Stores the MMU configuration for this address space.
+ * @is_unresponsive: Flag to indicate MMU is not responding.
+ * Set if a MMU command isn't completed within
+ * &kbase_device:mmu_as_inactive_wait_time_ms.
+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -1624,6 +1660,7 @@ struct kbase_as {
struct kbase_fault bf_data;
struct kbase_fault gf_data;
struct kbase_mmu_setup current_setup;
+ bool is_unresponsive;
};
#endif /* _KBASE_CSF_DEFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 4dc9de4..d69a4d4 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -201,8 +201,8 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
if (!interface)
return -EINVAL;
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
+ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
+ interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
if (reg) {
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -296,19 +296,41 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
wait_for_firmware_boot(kbdev);
}
-static void wait_ready(struct kbase_device *kbdev)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev: Kbase device to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command times out.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int wait_ready(struct kbase_device *kbdev)
{
- u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
- u32 val;
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+ s64 diff;
- val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+ do {
+ unsigned int i;
- /* Wait for a while for the update command to take effect */
- while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
- val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+ for (i = 0; i < 1000; i++) {
+ /* Wait for the MMU status to indicate there is no active command */
+ if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+ AS_STATUS_AS_ACTIVE))
+ return 0;
+ }
+
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while (diff < mmu_as_inactive_wait_time_ms);
- if (max_loops == 0)
- dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+ dev_err(kbdev->dev,
+ "AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system");
+
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
+
+ return -ETIMEDOUT;
}
static void unload_mmu_tables(struct kbase_device *kbdev)
@@ -323,7 +345,7 @@ static void unload_mmu_tables(struct kbase_device *kbdev)
mutex_unlock(&kbdev->mmu_hw_mutex);
}
-static void load_mmu_tables(struct kbase_device *kbdev)
+static int load_mmu_tables(struct kbase_device *kbdev)
{
unsigned long irq_flags;
@@ -334,7 +356,7 @@ static void load_mmu_tables(struct kbase_device *kbdev)
mutex_unlock(&kbdev->mmu_hw_mutex);
/* Wait for a while for the update command to take effect */
- wait_ready(kbdev);
+ return wait_ready(kbdev);
}
/**
@@ -695,7 +717,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
ret = kbase_mem_pool_alloc_pages(
kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
is_small_page),
- num_pages_aligned, phys, false);
+ num_pages_aligned, phys, false, NULL);
ignore_page_migration = false;
}
}
@@ -2240,6 +2262,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -2403,7 +2426,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
kbase_pm_wait_for_l2_powered(kbdev);
/* Load the MMU tables into the selected address space */
- load_mmu_tables(kbdev);
+ ret = load_mmu_tables(kbdev);
+ if (ret != 0)
+ goto err_out;
boot_csf_firmware(kbdev);
@@ -2445,9 +2470,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
goto err_out;
}
-#ifdef CONFIG_MALI_FW_CORE_DUMP
- kbase_csf_firmware_core_dump_init(kbdev);
-#endif
+ if (kbdev->csf.fw_core_dump.available)
+ kbase_csf_firmware_core_dump_init(kbdev);
/* Firmware loaded successfully, ret = 0 */
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
@@ -3029,7 +3053,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
goto page_list_alloc_error;
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
- phys, false);
+ phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
@@ -3040,8 +3064,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 7976d90..37a7f21 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1124,6 +1124,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -1569,7 +1570,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
goto page_list_alloc_error;
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
- phys, false);
+ phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
@@ -1580,8 +1581,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 42d19e1..7c14b8e 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -100,10 +100,10 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct
lockdep_assert_held(&ctx_alloc->lock);
- /* There is no need to take vm_lock here as the ctx_alloc region is no_user_free
- * refcounted. The region and the backing page can't disappear whilst this
- * function is executing.
- * Flush type is passed as FLUSH_PT to CLN+INV L2 only.
+ /* There is no need to take vm_lock here as the ctx_alloc region is protected
+ * via a nonzero no_user_free_count. The region and the backing page can't
+ * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
+ * to CLN+INV L2 only.
*/
kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
heap_context_pa, ctx_alloc->heap_context_size_aligned,
@@ -181,14 +181,9 @@ void kbase_csf_heap_context_allocator_term(
if (ctx_alloc->region) {
kbase_gpu_vm_lock(kctx);
- /*
- * We can't enforce (nor check) the no_user_free refcount
- * to be 0 here as other code regions can take such a reference.
- * Anyway, this isn't an issue as the region will eventually
- * be freed by the region tracker if its refcount didn't drop
- * to 0.
- */
- kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
+ WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
+
+ kbase_va_region_no_user_free_dec(ctx_alloc->region);
kbase_mem_free_region(kctx, ctx_alloc->region);
kbase_gpu_vm_unlock(kctx);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 8c1fcdb..0797224 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -365,15 +365,16 @@ static int kbase_kcpu_jit_allocate_prepare(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
void __user *data = u64_to_user_ptr(alloc_info->info);
- struct base_jit_alloc_info *info;
+ struct base_jit_alloc_info *info = NULL;
u32 count = alloc_info->count;
int ret = 0;
u32 i;
lockdep_assert_held(&kcpu_queue->lock);
- if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
- count > ARRAY_SIZE(kctx->jit_alloc)) {
+ if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) ||
+ (count > kcpu_queue->kctx->jit_max_allocations) || (!data) ||
+ !kbase_mem_allow_alloc(kctx)) {
ret = -EINVAL;
goto out;
}
@@ -610,6 +611,7 @@ out:
return ret;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
static int kbase_csf_queue_group_suspend_prepare(
struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_group_suspend_info *suspend_buf,
@@ -681,8 +683,7 @@ static int kbase_csf_queue_group_suspend_prepare(
(kbase_reg_current_backed_size(reg) < nr_pages) ||
!(reg->flags & KBASE_REG_CPU_WR) ||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
- (kbase_is_region_shrinkable(reg)) ||
- (kbase_va_region_is_no_user_free(kctx, reg))) {
+ (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) {
ret = -EINVAL;
goto out_clean_pages;
}
@@ -726,6 +727,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx,
{
return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle);
}
+#endif
static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
@@ -1037,9 +1039,12 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
u64 val = 0;
- /* GPUCORE-28172 RDT to review */
- if (!queue->command_started)
+ if (!queue->command_started) {
queue->command_started = true;
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(
+ kbdev, queue);
+ }
+
if (!evt) {
dev_warn(kbdev->dev,
@@ -1089,7 +1094,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
queue->has_error = true;
}
- /* GPUCORE-28172 RDT to review */
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(
+ kbdev, queue, *(u32 *)evt);
queue->command_started = false;
}
@@ -1232,8 +1238,6 @@ static void kbase_kcpu_cqs_set_operation_process(
evt = (uintptr_t)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
- /* GPUCORE-28172 RDT to review */
-
if (!evt) {
dev_warn(kbdev->dev,
"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
@@ -1258,7 +1262,8 @@ static void kbase_kcpu_cqs_set_operation_process(
break;
}
- /* GPUCORE-28172 RDT to review */
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(
+ kbdev, queue, *(u32 *)evt ? 1 : 0);
/* Always propagate errors */
*(u32 *)evt = queue->has_error;
@@ -1622,11 +1627,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
/* Set reference to KCPU metadata and increment refcount */
kcpu_fence->metadata = kcpu_queue->metadata;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount));
-#else
- WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
-#endif
+ WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
/* create a sync_file fd representing the fence */
*sync_file = sync_file_create(fence_out);
@@ -2056,7 +2057,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
break;
}
- case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
+ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
status = kbase_kcpu_jit_free_process(queue, cmd);
@@ -2066,6 +2067,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
kbdev, queue);
break;
+ }
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
struct kbase_suspend_copy_buffer *sus_buf =
cmd->info.suspend_buf_copy.sus_buf;
@@ -2082,24 +2085,25 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
kbdev, queue, status);
+ }
- if (!sus_buf->cpu_alloc) {
- int i;
+ if (!sus_buf->cpu_alloc) {
+ int i;
- for (i = 0; i < sus_buf->nr_pages; i++)
- put_page(sus_buf->pages[i]);
- } else {
- kbase_mem_phy_alloc_kernel_unmapped(
- sus_buf->cpu_alloc);
- kbase_mem_phy_alloc_put(
- sus_buf->cpu_alloc);
- }
+ for (i = 0; i < sus_buf->nr_pages; i++)
+ put_page(sus_buf->pages[i]);
+ } else {
+ kbase_mem_phy_alloc_kernel_unmapped(
+ sus_buf->cpu_alloc);
+ kbase_mem_phy_alloc_put(
+ sus_buf->cpu_alloc);
}
kfree(sus_buf->pages);
kfree(sus_buf);
break;
}
+#endif
default:
dev_dbg(kbdev->dev,
"Unrecognized command type");
@@ -2174,12 +2178,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
}
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
{
- /* GPUCORE-28172 RDT to review */
+ const struct base_cqs_wait_operation_info *waits =
+ cmd->info.cqs_wait_operation.objs;
+ u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags;
+ unsigned int i;
+
+ for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(
+ kbdev, queue, waits[i].addr, waits[i].val,
+ waits[i].operation, waits[i].data_type,
+ (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
{
- /* GPUCORE-28172 RDT to review */
+ const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs;
+ unsigned int i;
+
+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(
+ kbdev, queue, sets[i].addr, sets[i].val,
+ sets[i].operation, sets[i].data_type);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
@@ -2226,11 +2247,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
break;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
cmd->info.suspend_buf_copy.group_handle);
break;
+#endif
default:
dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
break;
@@ -2387,11 +2410,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
ret = kbase_kcpu_jit_free_prepare(queue,
&command.info.jit_free, kcpu_cmd);
break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
ret = kbase_csf_queue_group_suspend_prepare(queue,
&command.info.suspend_buf_copy,
kcpu_cmd);
break;
+#endif
default:
dev_dbg(queue->kctx->kbdev->dev,
"Unknown command type %u", command.type);
@@ -2467,6 +2492,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
{
struct kbase_kcpu_command_queue *queue;
int idx;
+ int n;
int ret = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
struct kbase_kcpu_dma_fence_meta *metadata;
@@ -2519,6 +2545,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
if (!metadata) {
+ destroy_workqueue(queue->wq);
kfree(queue);
ret = -ENOMEM;
goto out;
@@ -2526,14 +2553,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
metadata->kbdev = kctx->kbdev;
metadata->kctx_id = kctx->id;
- snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id,
- kctx->tgid, kctx->id, queue->fence_context);
+ n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu",
+ kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
+ if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+ destroy_workqueue(queue->wq);
+ kfree(queue);
+ kfree(metadata);
+ ret = -EINVAL;
+ goto out;
+ }
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- atomic_set(&metadata->refcount, 1);
-#else
- refcount_set(&metadata->refcount, 1);
-#endif
+ kbase_refcount_set(&metadata->refcount, 1);
queue->metadata = metadata;
atomic_inc(&kctx->kbdev->live_fence_metadata);
#endif /* CONFIG_SYNC_FILE */
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index b8099fd..6d5145e 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -186,6 +186,7 @@ struct kbase_suspend_copy_buffer {
struct kbase_mem_phy_alloc *cpu_alloc;
};
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
/**
* struct kbase_kcpu_command_group_suspend_info - structure which contains
* suspend buffer data captured for a suspended queue group.
@@ -198,6 +199,7 @@ struct kbase_kcpu_command_group_suspend_info {
struct kbase_suspend_copy_buffer *sus_buf;
u8 group_handle;
};
+#endif
/**
@@ -232,7 +234,9 @@ struct kbase_kcpu_command {
struct kbase_kcpu_command_import_info import;
struct kbase_kcpu_command_jit_alloc_info jit_alloc;
struct kbase_kcpu_command_jit_free_info jit_free;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
+#endif
} info;
};
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
index 77e19db..4056a9d 100644
--- a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -613,7 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
int err, i;
INIT_LIST_HEAD(&csg_reg->link);
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
+ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
KBASE_REG_ZONE_MCU_SHARED);
if (!reg) {
@@ -668,16 +668,17 @@ fail_userio_pages_map_fail:
while (i-- > 0) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+ MCU_AS_NR, true);
}
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, MCU_AS_NR, true);
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
fail_pmod_map_fail:
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, MCU_AS_NR, true);
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
fail_susp_map_fail:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
@@ -701,15 +702,16 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
for (i = 0; i < nr_csis; i++) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+ MCU_AS_NR, true);
}
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, MCU_AS_NR, true);
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, MCU_AS_NR, true);
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
@@ -738,7 +740,7 @@ int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
return -ENOMEM;
if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
- &shared_regs->dummy_phys[0], false) <= 0)
+ &shared_regs->dummy_phys[0], false, NULL) <= 0)
return -ENOMEM;
shared_regs->dummy_phys_allocated = true;
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index 82389e5..b5bf7bb 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,10 +31,6 @@
* Begin register sets
*/
-/* DOORBELLS base address */
-#define DOORBELLS_BASE 0x0080000
-#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
-
/* CS_KERNEL_INPUT_BLOCK base address */
#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
@@ -71,10 +67,6 @@
#define GLB_OUTPUT_BLOCK_BASE 0x0000
#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
-/* USER base address */
-#define USER_BASE 0x0010000
-#define USER_REG(r) (USER_BASE + (r))
-
/* End register sets */
/*
@@ -267,9 +259,6 @@
#define GLB_DEBUG_ARG_OUT0 0x0FE0
#endif /* CONFIG_MALI_CORESIGHT */
-/* USER register offsets */
-#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
-
/* End register offsets */
/* CS_KERNEL_INPUT_BLOCK register set definitions */
@@ -728,6 +717,27 @@
#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB
/* End of CS_FAULT_EXCEPTION_TYPE values */
#define CS_FAULT_EXCEPTION_DATA_SHIFT 8
#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 755df75..bbae94a 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1554,11 +1554,13 @@ static void program_cs(struct kbase_device *kbdev,
WARN_ON(csi_index >= ginfo->stream_num))
return;
- assign_user_doorbell_to_queue(kbdev, queue);
- if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
- return;
+ if (queue->enabled) {
+ assign_user_doorbell_to_queue(kbdev, queue);
+ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
+ return;
- WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+ }
if (queue->enabled && queue_group_suspended_locked(group))
program_cs_extract_init(queue);
@@ -1860,6 +1862,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
unsigned long flags;
struct kbase_csf_cmd_stream_group_info *ginfo =
&global_iface->groups[slot];
+
u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
CSG_REQ_STATE_TERMINATE;
@@ -1877,8 +1880,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
csg_slot[slot].trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
- KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
- kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
}
}
@@ -3433,6 +3436,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
/* The on slot csg is now stopped */
clear_bit(i, slot_mask);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
if (likely(group)) {
bool as_fault;
/* Only do save/cleanup if the
@@ -5071,6 +5077,9 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
/* The on slot csg is now stopped */
clear_bit(i, slot_mask_local);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
group = scheduler->csg_slots[i].resident_group;
if (likely(group)) {
/* Only do save/cleanup if the
@@ -5129,8 +5138,13 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
if (all_addr_spaces_used) {
for (i = 0; i != total_csg_slots; ++i) {
- if (scheduler->csg_slots[i].resident_group != NULL)
+ if (scheduler->csg_slots[i].resident_group != NULL) {
+ if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
+ 0))
+ continue;
+
as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
+ }
}
}
@@ -5151,6 +5165,9 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
(group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
((lru_idle_group == NULL) ||
(lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
+ if (WARN_ON(group->kctx->as_nr < 0))
+ continue;
+
/* If all address spaces are used, we need to ensure the group does not
* share the AS with other active CSGs. Or CSG would be freed without AS
* and this optimization would not work.
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 14d8097..8072a8b 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -228,11 +228,11 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
kbase_vunmap(kctx, &chunk->map);
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
* regions), and so we must clear that flag too before freeing.
- * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+ * For "no user free count", we check that the count is 1 as it is a shrinkable region;
* no other code part within kbase can take a reference to it.
*/
- WARN_ON(chunk->region->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, chunk->region);
+ WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(chunk->region);
#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
#endif
@@ -315,8 +315,8 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
* It should be fine and not a security risk if we let the region leak till
* region tracker termination in such a case.
*/
- if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
- dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+ if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
+ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
goto unroll_region;
}
@@ -371,7 +371,7 @@ unroll_region:
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
* regions), and so we must clear that flag too before freeing.
*/
- kbase_va_region_no_user_free_put(kctx, chunk->region);
+ kbase_va_region_no_user_free_dec(chunk->region);
#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
#endif
@@ -531,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
if (heap->buf_desc_reg) {
kbase_vunmap(kctx, &heap->buf_desc_map);
kbase_gpu_vm_lock(kctx);
- kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
@@ -741,7 +741,8 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
*/
heap->buf_desc_va = buf_desc_va;
- heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
+ heap->buf_desc_reg = buf_desc_reg;
+ kbase_va_region_no_user_free_inc(buf_desc_reg);
vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
KBASE_REG_CPU_RD, &heap->buf_desc_map,
@@ -834,7 +835,7 @@ heap_context_alloc_failed:
buf_desc_vmap_failed:
if (heap->buf_desc_reg) {
kbase_gpu_vm_lock(kctx);
- kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
buf_desc_not_suitable:
@@ -967,7 +968,12 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
if (unlikely(err)) {
- dev_err(kctx->kbdev->dev,
+ /* The allocation request can be legitimate, but be invoked on a heap
+ * that has already reached the maximum pre-configured capacity. This
+ * is useful debug information, but should not be treated as an error,
+ * since the request will be re-sent at a later point.
+ */
+ dev_dbg(kctx->kbdev->dev,
"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
gpu_heap_va, err);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 162b40f..910ba22 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,7 @@
#include "mali_kbase_pm.h"
#include "mali_kbase_hwaccess_time.h"
-#include <linux/gcd.h>
#include <linux/math64.h>
-#include <asm/arch_timer.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "tl/mali_kbase_timeline_priv.h"
@@ -98,81 +96,6 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
#endif
/**
- * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
- *
- * @kbdev: Kbase device.
- * @cpu_ts: Output CPU timestamp.
- * @gpu_ts: Output GPU timestamp.
- * @gpu_cycle: Output GPU cycle counts.
- */
-static void get_cpu_gpu_time(
- struct kbase_device *kbdev,
- u64 *cpu_ts,
- u64 *gpu_ts,
- u64 *gpu_cycle)
-{
- struct timespec64 ts;
-
- kbase_pm_context_active(kbdev);
- kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
- kbase_pm_context_idle(kbdev);
-
- if (cpu_ts)
- *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
-}
-
-
-/**
- * kbase_ts_converter_init() - Initialize system timestamp converter.
- *
- * @self: System Timestamp Converter instance.
- * @kbdev: Kbase device pointer
- *
- * Return: Zero on success, -1 otherwise.
- */
-static int kbase_ts_converter_init(
- struct kbase_ts_converter *self,
- struct kbase_device *kbdev)
-{
- u64 cpu_ts = 0;
- u64 gpu_ts = 0;
- u64 freq;
- u64 common_factor;
-
- get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
- freq = arch_timer_get_cntfrq();
-
- if (!freq) {
- dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
- return -1;
- }
-
- common_factor = gcd(NSEC_PER_SEC, freq);
-
- self->multiplier = div64_u64(NSEC_PER_SEC, common_factor);
- self->divisor = div64_u64(freq, common_factor);
- self->offset =
- cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor);
-
- return 0;
-}
-
-/**
- * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp.
- *
- * @self: System Timestamp Converter instance.
- * @gpu_ts: System timestamp value to converter.
- *
- * Return: The CPU timestamp.
- */
-static u64 __maybe_unused
-kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts)
-{
- return div64_u64(gpu_ts * self->multiplier, self->divisor) +
- self->offset;
-}
-
-/**
* tl_reader_overflow_notify() - Emit stream overflow tracepoint.
*
* @self: CSFFW TL Reader instance.
@@ -322,8 +245,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
struct kbase_csffw_tl_message *msg =
(struct kbase_csffw_tl_message *) csffw_data_it;
- msg->timestamp = kbase_ts_converter_convert(&self->ts_converter,
- msg->timestamp);
+ msg->timestamp =
+ kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp);
}
/* Copy the message out to the tl_stream. */
@@ -397,9 +320,6 @@ static int tl_reader_init_late(
return -1;
}
- if (kbase_ts_converter_init(&self->ts_converter, kbdev))
- return -1;
-
self->kbdev = kbdev;
self->trace_buffer = tb;
self->tl_header.data = hdr;
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
index d554d56..12b285f 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,37 +40,6 @@ struct kbase_tlstream;
struct kbase_device;
/**
- * struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
- *
- * @multiplier: Numerator of the converter's fraction.
- * @divisor: Denominator of the converter's fraction.
- * @offset: Converter's offset term.
- *
- * According to Generic timer spec, system timer:
- * - Increments at a fixed frequency
- * - Starts operating from zero
- *
- * Hence CPU time is a linear function of System Time.
- *
- * CPU_ts = alpha * SYS_ts + beta
- *
- * Where
- * - alpha = 10^9/SYS_ts_freq
- * - beta is calculated by two timer samples taken at the same time:
- * beta = CPU_ts_s - SYS_ts_s * alpha
- *
- * Since alpha is a rational number, we minimizing possible
- * rounding error by simplifying the ratio. Thus alpha is stored
- * as a simple `multiplier / divisor` ratio.
- *
- */
-struct kbase_ts_converter {
- u64 multiplier;
- u64 divisor;
- s64 offset;
-};
-
-/**
* struct kbase_csf_tl_reader - CSFFW timeline reader state.
*
* @read_timer: Timer used for periodical tracebufer reading.
@@ -106,7 +75,6 @@ struct kbase_csf_tl_reader {
size_t size;
size_t btc;
} tl_header;
- struct kbase_ts_converter ts_converter;
bool got_first_event;
bool is_active;
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 217a056..492684f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -123,6 +123,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
+ err = kbase_backend_time_init(kbdev);
+ if (err)
+ goto fail_update_l2_features;
+
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
kbase_pm_context_idle(kbdev);
@@ -285,8 +289,10 @@ static const struct kbase_device_init dev_init[] = {
"Dummy model initialization failed" },
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
- { registers_map, registers_unmap, "Register map failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+ { registers_map, registers_unmap, "Register map failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
{ power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
@@ -359,7 +365,6 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,
void kbase_device_term(struct kbase_device *kbdev)
{
- kbdev->csf.mali_file_inode = NULL;
kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
kbase_mem_halt(kbdev);
}
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index c104fa4..b46180f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -100,6 +100,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
+ err = kbase_backend_time_init(kbdev);
+ if (err)
+ goto fail_update_l2_features;
+
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
/* Idle the GPU and/or cores, if the policy wants it to */
@@ -211,17 +215,19 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd
static const struct kbase_device_init dev_init[] = {
#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
- { kbase_gpu_device_create, kbase_gpu_device_destroy,
- "Dummy model initialization failed" },
+ { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
- { registers_map, registers_unmap, "Register map failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+ { registers_map, registers_unmap, "Register map failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
+ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -237,7 +243,6 @@ static const struct kbase_device_init dev_init[] = {
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
- { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_instr_backend_init, kbase_instr_backend_term,
"Instrumentation backend initialization failed" },
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 4f5ac22..15839ae 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_hw.h>
#include <mali_kbase_config_defaults.h>
#include <linux/priority_control_manager.h>
@@ -308,7 +309,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
#endif /* MALI_USE_CSF */
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
-
+ kbdev->mmu_as_inactive_wait_time_ms =
+ kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
mutex_init(&kbdev->kctx_list_lock);
INIT_LIST_HEAD(&kbdev->kctx_list);
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index 15bfd03..60ba9be 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -105,6 +105,70 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT:
e = "GPU_CACHEABILITY_FAULT";
break;
+ /* MMU Fault */
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0:
+ e = "TRANSLATION_FAULT at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1:
+ e = "TRANSLATION_FAULT at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2:
+ e = "TRANSLATION_FAULT at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3:
+ e = "TRANSLATION_FAULT at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4:
+ e = "TRANSLATION_FAULT";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0:
+ e = "PERMISSION_FAULT at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1:
+ e = "PERMISSION_FAULT at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2:
+ e = "PERMISSION_FAULT at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3:
+ e = "PERMISSION_FAULT at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1:
+ e = "ACCESS_FLAG at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2:
+ e = "ACCESS_FLAG at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3:
+ e = "ACCESS_FLAG at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN:
+ e = "ADDRESS_SIZE_FAULT_IN";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0:
+ e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1:
+ e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2:
+ e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3:
+ e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0:
+ e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1:
+ e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2:
+ e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3:
+ e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3";
+ break;
/* Any other exception code is unknown */
default:
e = "UNKNOWN";
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
index 380ec30..f86f493 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -108,7 +108,6 @@
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
-#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
@@ -125,8 +124,6 @@
#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
-#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h
index 8b50a5d..6a937a5 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_fault.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,9 @@
*
* @exception_code: exception code
*
- * This function is called from the interrupt handler when a GPU fault occurs.
+ * This function is called by error handlers when GPU reports an error.
*
- * Return: name associated with the exception code
+ * Return: Error string associated with the exception code
*/
const char *kbase_gpu_exception_name(u32 exception_code);
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index 907a872..e51791f 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -51,9 +51,7 @@
#define MMU_FEATURES 0x014 /* (RO) MMU features */
#define AS_PRESENT 0x018 /* (RO) Address space slots present */
#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
-#define GPU_IRQ_CLEAR 0x024 /* (WO) */
#define GPU_IRQ_MASK 0x028 /* (RW) */
-#define GPU_IRQ_STATUS 0x02C /* (RO) */
#define GPU_COMMAND 0x030 /* (WO) */
#define GPU_STATUS 0x034 /* (RO) */
@@ -176,14 +174,9 @@
/* Job control registers */
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
-#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
/* MMU control registers */
-#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
-
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index 424a360..27acfc6 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -289,6 +289,8 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
+ memset(cycle_counts, 0, sizeof(cycle_counts));
+
/* Read cycle count from CSF interface for both clock domains. */
backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
clk_enable_map);
@@ -308,6 +310,8 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
+ memset(cycle_counts, 0, sizeof(cycle_counts));
+
backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
@@ -558,7 +562,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
u32 insert_index_to_stop)
{
u32 raw_idx;
- unsigned long flags;
+ unsigned long flags = 0UL;
u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
@@ -639,7 +643,7 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
{
struct kbase_hwcnt_backend_csf_info *csf_info = info;
struct kbase_hwcnt_backend_csf *backend_csf;
- unsigned long flags;
+ unsigned long flags = 0UL;
csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
@@ -658,8 +662,8 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
/* 3. dump state indicates no other dumping is in progress. */
((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
(backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
- u32 extract_index;
- u32 insert_index;
+ u32 extract_index = 0U;
+ u32 insert_index = 0U;
/* Read the raw extract and insert indexes from the CSF interface. */
csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
@@ -700,11 +704,11 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
*/
static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
{
- unsigned long flags;
+ unsigned long flags = 0ULL;
struct kbase_hwcnt_backend_csf *backend_csf;
u32 insert_index_to_acc;
- u32 extract_index;
- u32 insert_index;
+ u32 extract_index = 0U;
+ u32 insert_index = 0U;
WARN_ON(!work);
backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
@@ -776,10 +780,10 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
*/
static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
{
- unsigned long flags;
+ unsigned long flags = 0ULL;
struct kbase_hwcnt_backend_csf *backend_csf;
- u32 extract_index;
- u32 insert_index;
+ u32 extract_index = 0U;
+ u32 insert_index = 0U;
WARN_ON(!work);
@@ -920,7 +924,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *back
const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend_csf)
@@ -954,7 +958,7 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_disable = false;
@@ -1050,7 +1054,7 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
u64 *dump_time_ns)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_request = false;
bool watchdog_dumping = false;
@@ -1157,7 +1161,7 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac
/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
int errcode;
@@ -1365,7 +1369,7 @@ alloc_error:
static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
struct kbase_hwcnt_backend **out_backend)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = NULL;
struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
int errcode;
@@ -1407,7 +1411,7 @@ static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *
/* CSF backend implementation of kbase_hwcnt_backend_term_fn */
static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend)
@@ -1619,7 +1623,7 @@ void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *
void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf_info *csf_info;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
@@ -1639,7 +1643,7 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i
void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 124fd4c..e4a963d 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -329,7 +329,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
/* Get physical page for the buffer */
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
- phys, false);
+ phys, false, NULL);
if (ret != num_pages)
goto phys_mem_pool_alloc_error;
@@ -482,7 +482,8 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
- fw_ring_buf->num_pages, MCU_AS_NR, true));
+ fw_ring_buf->num_pages, fw_ring_buf->num_pages,
+ MCU_AS_NR, true));
vunmap(fw_ring_buf->cpu_dump_base);
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
index e724572..34deb5d 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -362,7 +362,7 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *
bool cur_map_any_enabled;
struct kbase_hwcnt_enable_map *cur_map;
bool new_map_any_enabled = false;
- u64 dump_time_ns;
+ u64 dump_time_ns = 0;
struct kbase_hwcnt_accumulator *accum;
WARN_ON(!hctx);
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index cd5a9bf..5a204ae 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -455,16 +455,14 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = {
},
};
-
-#define IPA_POWER_MODEL_OPS(gpu, init_token) \
- const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
- .name = "mali-" #gpu "-power-model", \
- .init = kbase_ ## init_token ## _power_model_init, \
- .term = kbase_ipa_vinstr_common_model_term, \
- .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
- .reset_counter_data = kbase_ipa_vinstr_reset_data, \
- }; \
- KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+ static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \
+ .name = "mali-" #gpu "-power-model", \
+ .init = kbase_##init_token##_power_model_init, \
+ .term = kbase_ipa_vinstr_common_model_term, \
+ .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+ .reset_counter_data = kbase_ipa_vinstr_reset_data, \
+ }
#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
static int kbase_ ## gpu ## _power_model_init(\
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index fe8995a..debc3ad 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -127,10 +127,17 @@
/**
* enum kbase_timeout_selector - The choice of which timeout to get scaled
* using the lowest GPU frequency.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ * of a MMU operation
+ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
+ * to be updated on HW side so a Job Slot is
+ * considered free.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
enum kbase_timeout_selector {
+ MMU_AS_INACTIVE_WAIT_TIMEOUT,
+ JM_DEFAULT_JS_FREE_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -852,6 +859,10 @@ struct jsctx_queue {
* @pf_data: Data relating to Page fault.
* @bf_data: Data relating to Bus fault.
* @current_setup: Stores the MMU configuration for this address space.
+ * @is_unresponsive: Flag to indicate MMU is not responding.
+ * Set if a MMU command isn't completed within
+ * &kbase_device:mmu_as_inactive_wait_time_ms.
+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -861,6 +872,7 @@ struct kbase_as {
struct kbase_fault pf_data;
struct kbase_fault bf_data;
struct kbase_mmu_setup current_setup;
+ bool is_unresponsive;
};
#endif /* _KBASE_JM_DEFS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 924a685..2b93d3d 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -277,6 +277,7 @@ typedef u32 kbase_atom_ordering_flag_t;
* @nr_contexts_runnable:Number of contexts that can either be pulled from or
* arecurrently running
* @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
+ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
* @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
* independently of the Run Pool.
* Of course, you don't need the Run Pool lock to access this.
@@ -329,6 +330,8 @@ struct kbasep_js_device_data {
u32 nr_contexts_pullable;
atomic_t nr_contexts_runnable;
atomic_t soft_job_timeout_ms;
+ u32 js_free_wait_time_ms;
+
struct mutex queue_mutex;
/*
* Run Pool mutex, for managing contexts within the runpool.
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index c6fea79..11aedef 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -40,6 +40,7 @@ enum base_hw_feature {
BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_PBHA_HWU,
+ BASE_HW_FEATURE_LARGE_PAGE_ALLOC,
BASE_HW_FEATURE_END
};
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 2dc0402..0fbdec0 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -796,6 +796,19 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2
BASE_HW_ISSUE_END
};
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_END
+};
+
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 5035ed5..96529a3 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -221,6 +221,16 @@ enum {
*/
#define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT
+ * is updated on HW side so a Job Slot is considered free.
+ * This timeout will only take effect on GPUs with low value for the minimum
+ * GPU clock frequency (<= 100MHz).
+ *
+ * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher
+ * value for minimum GPU clock frequency.
+ */
+#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
+
#endif /* MALI_USE_CSF */
/* Default timeslice that a context is scheduled in for, in nanoseconds.
@@ -257,5 +267,12 @@ enum {
*/
#define DEFAULT_IR_THRESHOLD (192)
+/* Waiting time in clock cycles for the completion of a MMU operation.
+ *
+ * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush.
+ *
+ * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen.
+ * It corresponds to 0.5s in GPU @ 100Mhz.
+ */
+#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
-
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index a8f8791..4179091 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1565,7 +1565,6 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
cpu_queue_info->size);
}
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
union kbase_ioctl_read_user_page *user_page)
{
@@ -2051,6 +2050,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_cs_cpu_queue_info,
kctx);
break;
+ /* This IOCTL will be kept for backward compatibility */
case KBASE_IOCTL_READ_USER_PAGE:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page,
union kbase_ioctl_read_user_page, kctx);
@@ -2217,7 +2217,10 @@ KBASE_EXPORT_TEST_API(kbase_event_wakeup);
#if MALI_USE_CSF
int kbase_event_pending(struct kbase_context *ctx)
{
- WARN_ON_ONCE(!ctx);
+ KBASE_DEBUG_ASSERT(ctx);
+
+ if (unlikely(!ctx))
+ return -EPERM;
return (atomic_read(&ctx->event_count) != 0) ||
kbase_csf_event_error_pending(ctx) ||
@@ -2228,6 +2231,9 @@ int kbase_event_pending(struct kbase_context *ctx)
{
KBASE_DEBUG_ASSERT(ctx);
+ if (unlikely(!ctx))
+ return -EPERM;
+
return (atomic_read(&ctx->event_count) != 0) ||
(atomic_read(&ctx->event_closed) != 0);
}
@@ -4276,7 +4282,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
kfree(kbdev->protected_dev);
}
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
return 0;
@@ -4284,7 +4290,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
{
}
-#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#else /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
int err = 0;
@@ -4320,7 +4326,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
kbdev->reg_size = 0;
}
}
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
int registers_map(struct kbase_device * const kbdev)
{
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index beb2928..dc6feb9 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -242,6 +242,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
struct kbase_context *kctx;
+ kbdev->as[i].is_unresponsive = false;
#if MALI_USE_CSF
if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,
diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c
index 598d8f5..418bb19 100644
--- a/mali_kbase/mali_kbase_debug_mem_allocs.c
+++ b/mali_kbase/mali_kbase_debug_mem_allocs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,7 @@
#include "mali_kbase_debug_mem_allocs.h"
#include "mali_kbase.h"
-#include <string.h>
+#include <linux/string.h>
#include <linux/list.h>
#include <linux/file.h>
diff --git a/mali_kbase/mali_kbase_debugfs_helper.c b/mali_kbase/mali_kbase_debugfs_helper.c
index 4c1aa28..c846491 100644
--- a/mali_kbase/mali_kbase_debugfs_helper.c
+++ b/mali_kbase/mali_kbase_debugfs_helper.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -90,11 +90,10 @@ set_attr_from_string(char *const buf, void *const array, size_t const nelems,
int kbase_debugfs_string_validator(char *const buf)
{
- size_t index;
int err = 0;
char *ptr = buf;
- for (index = 0; *ptr; ++index) {
+ while (*ptr) {
unsigned long test_number;
size_t len;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index e98ab45..6236f70 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -238,12 +238,25 @@ struct kbase_fault {
bool protected_mode;
};
+/** Maximum number of memory pages that should be allocated for the array
+ * of pointers to free PGDs.
+ *
+ * This number has been pre-calculated to deal with the maximum allocation
+ * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE.
+ * This is supposed to be enough for almost the entirety of MMU operations.
+ * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down
+ * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE
+ * bytes.
+ *
+ * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes.
+ */
+#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
+
+/* Maximum number of pointers to free PGDs */
+#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
+
/**
* struct kbase_mmu_table - object representing a set of GPU page tables
- * @mmu_teardown_pages: Array containing pointers to 3 separate pages, used
- * to cache the entries of top (L0) & intermediate level
- * page tables (L1 & L2) to avoid repeated calls to
- * kmap_atomic() during the MMU teardown.
* @mmu_lock: Lock to serialize the accesses made to multi level GPU
* page tables
* @pgd: Physical address of the page allocated for the top
@@ -255,14 +268,40 @@ struct kbase_fault {
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
* @kctx: If this set of MMU tables belongs to a context then
* this is a back-reference to the context, otherwise
- * it is NULL
+ * it is NULL.
+ * @scratch_mem: Scratch memory used for MMU operations, which are
+ * serialized by the @mmu_lock.
*/
struct kbase_mmu_table {
- u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
struct mutex mmu_lock;
phys_addr_t pgd;
u8 group_id;
struct kbase_context *kctx;
+ union {
+ /**
+ * @teardown_pages: Scratch memory used for backup copies of whole
+ * PGD pages when tearing down levels upon
+ * termination of the MMU table.
+ */
+ struct {
+ /**
+ * @levels: Array of PGD pages, large enough to copy one PGD
+ * for each level of the MMU table.
+ */
+ u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
+ } teardown_pages;
+ /**
+ * @free_pgds: Scratch memory user for insertion, update and teardown
+ * operations to store a temporary list of PGDs to be freed
+ * at the end of the operation.
+ */
+ struct {
+ /** @pgds: Array of pointers to PGDs to free. */
+ struct page *pgds[MAX_FREE_PGDS];
+ /** @head_index: Index of first free element in the PGDs array. */
+ size_t head_index;
+ } free_pgds;
+ } scratch_mem;
};
/**
@@ -286,6 +325,8 @@ struct kbase_reg_zone {
#include "jm/mali_kbase_jm_defs.h"
#endif
+#include "mali_kbase_hwaccess_time.h"
+
static inline int kbase_as_has_bus_fault(struct kbase_as *as,
struct kbase_fault *fault)
{
@@ -754,6 +795,8 @@ struct kbase_mem_migrate {
* GPU adrress spaces assigned to them.
* @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
* register used in the handling of Bus & Page faults.
+ * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are
+ * supported and used where possible.
* @gpu_props: Object containing complete information about the
* configuration/properties of GPU HW device in use.
* @hw_issues_mask: List of SW workarounds for HW issues
@@ -799,6 +842,7 @@ struct kbase_mem_migrate {
* GPU reset.
* @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used
* to calculate suitable timeouts for wait operations.
+ * @backend_time: Kbase backend time related attributes.
* @cache_clean_in_progress: Set when a cache clean has been started, and
* cleared when it has finished. This prevents multiple
* cache cleans being done simultaneously.
@@ -993,6 +1037,9 @@ struct kbase_mem_migrate {
* KCPU queue. These structures may outlive kbase module
* itself. Therefore, in such a case, a warning should be
* be produced.
+ * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
+ * a MMU operation
+ * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures.
*/
struct kbase_device {
u32 hw_quirks_sc;
@@ -1049,6 +1096,8 @@ struct kbase_device {
spinlock_t mmu_mask_change;
+ bool pagesize_2mb;
+
struct kbase_gpu_props gpu_props;
unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
@@ -1102,6 +1151,10 @@ struct kbase_device {
u64 lowest_gpu_freq_khz;
+#if MALI_USE_CSF
+ struct kbase_backend_time backend_time;
+#endif
+
bool cache_clean_in_progress;
u32 cache_clean_queued;
wait_queue_head_t cache_clean_wait;
@@ -1283,6 +1336,8 @@ struct kbase_device {
#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
atomic_t live_fence_metadata;
#endif
+ u32 mmu_as_inactive_wait_time_ms;
+ struct kmem_cache *va_region_slab;
};
/**
@@ -1636,11 +1691,13 @@ struct kbase_sub_alloc {
* is scheduled in and an atom is pulled from the context's per
* slot runnable tree in JM GPU or GPU command queue
* group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock: lock used for handling of special tracking page.
* @process_mm: Pointer to the memory descriptor of the process which
* created the context. Used for accounting the physical
* pages used for GPU allocations, done for the context,
- * to the memory consumed by the process.
+ * to the memory consumed by the process. A reference is taken
+ * on this descriptor for the Userspace created contexts so that
+ * Kbase can safely access it to update the memory usage counters.
+ * The reference is dropped on context termination.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
* @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
* tiler heaps of the kbase context.
@@ -1762,6 +1819,10 @@ struct kbase_sub_alloc {
* @limited_core_mask: The mask that is applied to the affinity in case of atoms
* marked with BASE_JD_REQ_LIMITED_CORE_MASK.
* @platform_data: Pointer to platform specific per-context data.
+ * @task: Pointer to the task structure of the main thread of the process
+ * that created the Kbase context. It would be set only for the
+ * contexts created by the Userspace and not for the contexts
+ * created internally by the Kbase.
*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
@@ -1849,8 +1910,7 @@ struct kbase_context {
atomic_t refcount;
- spinlock_t mm_update_lock;
- struct mm_struct __rcu *process_mm;
+ struct mm_struct *process_mm;
u64 gpu_va_end;
#if MALI_USE_CSF
u32 running_total_tiler_heap_nr_chunks;
@@ -1913,6 +1973,8 @@ struct kbase_context {
#if !MALI_USE_CSF
void *platform_data;
#endif
+
+ struct task_struct *task;
};
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -2015,5 +2077,4 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000
/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000
-
#endif /* _KBASE_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index 25986f6..f4507ac 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@
#include <linux/list.h>
#include "mali_kbase_fence_defs.h"
#include "mali_kbase.h"
+#include "mali_kbase_refcount_defs.h"
#if MALI_USE_CSF
/* Maximum number of characters in DMA fence timeline name. */
@@ -49,11 +50,7 @@
* @timeline_name: String of timeline name for associated fence object.
*/
struct kbase_kcpu_dma_fence_meta {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- atomic_t refcount;
-#else
- refcount_t refcount;
-#endif
+ kbase_refcount_t refcount;
struct kbase_device *kbdev;
int kctx_id;
char timeline_name[MAX_TIMELINE_NAME];
@@ -225,11 +222,7 @@ static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_f
static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
{
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
- if (atomic_dec_and_test(&metadata->refcount)) {
-#else
- if (refcount_dec_and_test(&metadata->refcount)) {
-#endif
+ if (kbase_refcount_dec_and_test(&metadata->refcount)) {
atomic_dec(&metadata->kbdev->live_fence_metadata);
kfree(metadata);
}
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 16cccee..0eba889 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -125,14 +125,17 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx)
return 0;
}
-
+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE)
+static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b)
+#else
static int list_cmp_function(void *priv, struct list_head *a,
struct list_head *b)
+#endif
{
- struct kbasep_gwt_list_element *elementA = container_of(a,
- struct kbasep_gwt_list_element, link);
- struct kbasep_gwt_list_element *elementB = container_of(b,
- struct kbasep_gwt_list_element, link);
+ const struct kbasep_gwt_list_element *elementA =
+ container_of(a, struct kbasep_gwt_list_element, link);
+ const struct kbasep_gwt_list_element *elementB =
+ container_of(b, struct kbasep_gwt_list_element, link);
CSTD_UNUSED(priv);
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index c658fb7..b07327a 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -232,6 +232,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
{ GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
{ GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_LTUX,
@@ -239,6 +240,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
{ GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
{ GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_TTIX,
@@ -303,21 +305,20 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
*/
issues = fallback_issues;
- dev_warn(kbdev->dev,
- "GPU hardware issue table may need updating:\n"
- "r%dp%d status %d is unknown; treating as r%dp%d status %d",
- (gpu_id & GPU_ID2_VERSION_MAJOR) >>
- GPU_ID2_VERSION_MAJOR_SHIFT,
- (gpu_id & GPU_ID2_VERSION_MINOR) >>
- GPU_ID2_VERSION_MINOR_SHIFT,
- (gpu_id & GPU_ID2_VERSION_STATUS) >>
- GPU_ID2_VERSION_STATUS_SHIFT,
- (fallback_version & GPU_ID2_VERSION_MAJOR) >>
- GPU_ID2_VERSION_MAJOR_SHIFT,
- (fallback_version & GPU_ID2_VERSION_MINOR) >>
- GPU_ID2_VERSION_MINOR_SHIFT,
- (fallback_version & GPU_ID2_VERSION_STATUS) >>
- GPU_ID2_VERSION_STATUS_SHIFT);
+ dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n",
+ (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT,
+ (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT,
+ (gpu_id & GPU_ID2_VERSION_STATUS) >>
+ GPU_ID2_VERSION_STATUS_SHIFT);
+ dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n",
+ (fallback_version & GPU_ID2_VERSION_MAJOR) >>
+ GPU_ID2_VERSION_MAJOR_SHIFT,
+ (fallback_version & GPU_ID2_VERSION_MINOR) >>
+ GPU_ID2_VERSION_MINOR_SHIFT,
+ (fallback_version & GPU_ID2_VERSION_STATUS) >>
+ GPU_ID2_VERSION_STATUS_SHIFT);
+ dev_notice(kbdev->dev,
+ "Execution proceeding normally with fallback match\n");
gpu_id &= ~GPU_ID2_VERSION;
gpu_id |= fallback_version;
@@ -343,7 +344,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
issues = kbase_hw_get_issues_for_new_id(kbdev);
if (issues == NULL) {
dev_err(kbdev->dev,
- "Unknown GPU ID %x", gpu_id);
+ "HW product - Unknown GPU ID %x", gpu_id);
return -EINVAL;
}
@@ -407,7 +408,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
break;
default:
dev_err(kbdev->dev,
- "Unknown GPU ID %x", gpu_id);
+ "HW issues - Unknown GPU ID %x", gpu_id);
return -EINVAL;
}
}
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index 27e2cb7..ac2a26d 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,49 @@
#ifndef _KBASE_BACKEND_TIME_H_
#define _KBASE_BACKEND_TIME_H_
+#if MALI_USE_CSF
+/**
+ * struct kbase_backend_time - System timestamp attributes.
+ *
+ * @multiplier: Numerator of the converter's fraction.
+ * @divisor: Denominator of the converter's fraction.
+ * @offset: Converter's offset term.
+ *
+ * According to Generic timer spec, system timer:
+ * - Increments at a fixed frequency
+ * - Starts operating from zero
+ *
+ * Hence CPU time is a linear function of System Time.
+ *
+ * CPU_ts = alpha * SYS_ts + beta
+ *
+ * Where
+ * - alpha = 10^9/SYS_ts_freq
+ * - beta is calculated by two timer samples taken at the same time:
+ * beta = CPU_ts_s - SYS_ts_s * alpha
+ *
+ * Since alpha is a rational number, we minimizing possible
+ * rounding error by simplifying the ratio. Thus alpha is stored
+ * as a simple `multiplier / divisor` ratio.
+ *
+ */
+struct kbase_backend_time {
+ u64 multiplier;
+ u64 divisor;
+ s64 offset;
+};
+
+/**
+ * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp.
+ *
+ * @kbdev: Kbase device pointer
+ * @gpu_ts: System timestamp value to converter.
+ *
+ * Return: The CPU timestamp.
+ */
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts);
+#endif
+
/**
* kbase_backend_get_gpu_time() - Get current GPU time
* @kbdev: Device pointer
@@ -46,9 +89,6 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
u64 *cycle_counter,
u64 *system_time,
struct timespec64 *ts);
-
-#endif /* _KBASE_BACKEND_TIME_H_ */
-
/**
* kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
* GPU frequency, using a choice from
@@ -70,3 +110,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
* Return: Snapshot of the GPU cycle count register.
*/
u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_time_init() - Initialize system timestamp converter.
+ *
+ * @kbdev: Kbase device pointer
+ *
+ * This function should only be called after GPU is powered-up and
+ * L2 cached power-up has been initiated.
+ *
+ * Return: Zero on success, error code otherwise.
+ */
+int kbase_backend_time_init(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index d623aca..6e803bd 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@
#include "mali_kbase_jm.h"
#include "mali_kbase_hwaccess_jm.h"
+#include <mali_kbase_hwaccess_time.h>
#include <linux/priority_control_manager.h>
/*
@@ -531,6 +532,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+ jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT);
dev_dbg(kbdev->dev, "JS Config Attribs: ");
dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
@@ -555,6 +557,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
jsdd->ctx_timeslice_ns);
dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
atomic_read(&jsdd->soft_job_timeout_ms));
+ dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms);
if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index b18b1e2..1c94e9c 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -44,6 +44,9 @@
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_trace_gpu_mem.h>
+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
+
#if MALI_JIT_PRESSURE_LIMIT_BASE
/*
@@ -92,10 +95,8 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
#error "Unknown CPU VA width for this architecture"
#endif
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (kbase_ctx_compat_mode(kctx))
cpu_va_bits = 32;
-#endif
return cpu_va_bits;
}
@@ -130,18 +131,14 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
else {
u64 same_va_end;
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif /* CONFIG_64BIT */
+ if (kbase_ctx_compat_mode(kctx)) {
same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
} else {
struct kbase_reg_zone *same_va_zone =
kbase_ctx_reg_zone_get(kctx,
KBASE_REG_ZONE_SAME_VA);
same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
}
-#endif /* CONFIG_64BIT */
if (gpu_pfn >= same_va_end)
rbtree = &kctx->reg_rbtree_custom;
@@ -383,6 +380,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
struct rb_node *rbnext;
struct kbase_va_region *next = NULL;
struct rb_root *reg_rbtree = NULL;
+ struct kbase_va_region *orig_reg = reg;
int merged_front = 0;
int merged_back = 0;
@@ -447,9 +445,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
*/
struct kbase_va_region *free_reg;
- free_reg = kbase_alloc_free_region(reg_rbtree,
- reg->start_pfn, reg->nr_pages,
- reg->flags & KBASE_REG_ZONE_MASK);
+ free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
+ reg->flags & KBASE_REG_ZONE_MASK);
if (!free_reg) {
/* In case of failure, we cannot allocate a replacement
* free region, so we will be left with a 'gap' in the
@@ -480,6 +477,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
}
+ /* This operation is always safe because the function never frees
+ * the region. If the region has been merged to both front and back,
+ * then it's the previous region that is supposed to be freed.
+ */
+ orig_reg->start_pfn = 0;
+
out:
return;
}
@@ -490,6 +493,7 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
* kbase_insert_va_region_nolock - Insert a VA region to the list,
* replacing the existing one.
*
+ * @kbdev: The kbase device
* @new_reg: The new region to insert
* @at_reg: The region to replace
* @start_pfn: The Page Frame Number to insert at
@@ -497,8 +501,10 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
*
* Return: 0 on success, error code otherwise.
*/
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
- struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
+ struct kbase_va_region *new_reg,
+ struct kbase_va_region *at_reg, u64 start_pfn,
+ size_t nr_pages)
{
struct rb_root *reg_rbtree = NULL;
int err = 0;
@@ -542,10 +548,9 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
else {
struct kbase_va_region *new_front_reg;
- new_front_reg = kbase_alloc_free_region(reg_rbtree,
- at_reg->start_pfn,
- start_pfn - at_reg->start_pfn,
- at_reg->flags & KBASE_REG_ZONE_MASK);
+ new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
+ start_pfn - at_reg->start_pfn,
+ at_reg->flags & KBASE_REG_ZONE_MASK);
if (new_front_reg) {
at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -682,8 +687,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
goto exit;
}
- err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
- nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
if (err) {
dev_warn(dev, "Failed to insert va region");
err = -ENOMEM;
@@ -708,8 +712,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
nr_pages, align_offset, align_mask,
&start_pfn);
if (tmp) {
- err = kbase_insert_va_region_nolock(reg, tmp,
- start_pfn, nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
if (unlikely(err)) {
dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
start_pfn, nr_pages);
@@ -847,7 +850,7 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
if (rbnode) {
rb_erase(rbnode, rbtree);
reg = rb_entry(rbnode, struct kbase_va_region, rblink);
- WARN_ON(reg->va_refcnt != 1);
+ WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
if (kbase_page_migration_enabled)
kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
/* Reset the start_pfn - as the rbtree is being
@@ -933,9 +936,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
#endif
/* all have SAME_VA */
- same_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
- same_va_pages, KBASE_REG_ZONE_SAME_VA);
+ same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
+ same_va_pages, KBASE_REG_ZONE_SAME_VA);
if (!same_va_reg) {
err = -ENOMEM;
@@ -944,10 +946,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
same_va_pages);
-#if IS_ENABLED(CONFIG_64BIT)
- /* 32-bit clients have custom VA zones */
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
err = -EINVAL;
goto fail_free_same_va;
@@ -959,10 +958,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
- custom_va_reg = kbase_alloc_free_region(
- &kctx->reg_rbtree_custom,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
+ KBASE_REG_ZONE_CUSTOM_VA_BASE,
+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
if (!custom_va_reg) {
err = -ENOMEM;
@@ -971,11 +969,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
KBASE_REG_ZONE_CUSTOM_VA_BASE,
custom_va_size);
-#if IS_ENABLED(CONFIG_64BIT)
} else {
custom_va_size = 0;
}
-#endif
#if MALI_USE_CSF
/* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
@@ -986,17 +982,15 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
*/
fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (kbase_ctx_compat_mode(kctx)) {
exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
}
-#endif
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
KBASE_REG_ZONE_EXEC_VA_SIZE);
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base,
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
if (!exec_va_reg) {
@@ -1010,8 +1004,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
exec_fixed_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base,
- KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
+ kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
+ exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
KBASE_REG_ZONE_EXEC_FIXED_VA);
if (!exec_fixed_va_reg) {
@@ -1024,7 +1018,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
- fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base,
+ fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
kctx->gpu_va_end = fixed_va_end;
@@ -1163,7 +1157,6 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
return false;
}
-#if IS_ENABLED(CONFIG_64BIT)
static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
u64 jit_va_pages)
{
@@ -1212,9 +1205,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
* Create a custom VA zone at the end of the VA for allocations which
* JIT can use so it doesn't have to allocate VA from the kernel.
*/
- custom_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
- jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
+ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
/*
* The context will be destroyed if we fail here so no point
@@ -1231,7 +1223,6 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
kbase_region_tracker_insert(custom_va_reg);
return 0;
}
-#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
int max_allocations, int trim_level, int group_id,
@@ -1272,10 +1263,8 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (!kbase_ctx_compat_mode(kctx))
err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
-#endif
/*
* Nothing to do for 32-bit clients, JIT uses the existing
* custom VA zone.
@@ -1351,17 +1340,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
/* 32-bit client: take from CUSTOM_VA zone */
target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
-#if IS_ENABLED(CONFIG_64BIT)
} else {
/* 64-bit client: take from SAME_VA zone */
target_zone_bits = KBASE_REG_ZONE_SAME_VA;
}
-#endif
+
target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
@@ -1389,10 +1375,8 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
/* Taken from the end of the target zone */
exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
- exec_va_start,
- exec_va_pages,
- KBASE_REG_ZONE_EXEC_VA);
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
+ exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
if (!exec_va_reg) {
err = -ENOMEM;
goto exit_unlock;
@@ -1435,10 +1419,9 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
kbdev->csf.shared_reg_rbtree = RB_ROOT;
- shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
- shared_reg_start_pfn,
- shared_reg_size,
- KBASE_REG_ZONE_MCU_SHARED);
+ shared_reg =
+ kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
+ shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
if (!shared_reg)
return -ENOMEM;
@@ -1447,10 +1430,30 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
}
#endif
+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
+{
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
+ kbdev->pagesize_2mb = true;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
+ dev_warn(
+ kbdev->dev,
+ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
+ }
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+ kbdev->pagesize_2mb = false;
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+ /* Set it to the default based on which GPU is present */
+ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+}
+
int kbase_mem_init(struct kbase_device *kbdev)
{
int err = 0;
struct kbasep_mem_device *memdev;
+ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
#if IS_ENABLED(CONFIG_OF)
struct device_node *mgm_node = NULL;
#endif
@@ -1459,6 +1462,19 @@ int kbase_mem_init(struct kbase_device *kbdev)
memdev = &kbdev->memdev;
+ kbasep_mem_page_size_init(kbdev);
+
+ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
+ kbdev->devname);
+
+ /* Initialize slab cache for kbase_va_regions */
+ kbdev->va_region_slab =
+ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
+ if (kbdev->va_region_slab == NULL) {
+ dev_err(kbdev->dev, "Failed to create va_region_slab\n");
+ return -ENOMEM;
+ }
+
kbase_mem_migrate_init(kbdev);
kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -1550,6 +1566,9 @@ void kbase_mem_term(struct kbase_device *kbdev)
kbase_mem_migrate_term(kbdev);
+ kmem_cache_destroy(kbdev->va_region_slab);
+ kbdev->va_region_slab = NULL;
+
WARN_ON(kbdev->total_gpu_pages);
WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
@@ -1563,6 +1582,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
/**
* kbase_alloc_free_region - Allocate a free region object.
*
+ * @kbdev: kbase device
* @rbtree: Backlink to the red-black tree of memory regions.
* @start_pfn: The Page Frame Number in GPU virtual address space.
* @nr_pages: The size of the region in pages.
@@ -1575,8 +1595,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
*
* Return: pointer to the allocated region object on success, NULL otherwise.
*/
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone)
{
struct kbase_va_region *new_reg;
@@ -1588,13 +1608,13 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
- new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+ new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
if (!new_reg)
return NULL;
- new_reg->va_refcnt = 1;
- new_reg->no_user_free_refcnt = 0;
+ kbase_refcount_set(&new_reg->va_refcnt, 1);
+ atomic_set(&new_reg->no_user_free_count, 0);
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
new_reg->rbtree = rbtree;
@@ -1726,7 +1746,6 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
unsigned long gwt_mask = ~0;
int group_id;
struct kbase_mem_phy_alloc *alloc;
- bool ignore_page_migration = false;
#ifdef CONFIG_MALI_CINSTR_GWT
if (kctx->gwt_enabled)
@@ -1755,41 +1774,46 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
for (i = 0; i < alloc->imported.alias.nents; i++) {
if (alloc->imported.alias.aliased[i].alloc) {
- err = kbase_mmu_insert_pages(
+ err = kbase_mmu_insert_aliased_pages(
kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
alloc->imported.alias.aliased[i].alloc->pages +
alloc->imported.alias.aliased[i].offset,
alloc->imported.alias.aliased[i].length,
reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
- NULL, ignore_page_migration);
+ NULL);
if (err)
- goto bad_insert;
+ goto bad_aliased_insert;
/* Note: mapping count is tracked at alias
* creation time
*/
} else {
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + i * stride,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_aliased_page(
+ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
- (reg->flags & mask & gwt_mask) | attr,
- group_id, mmu_sync_info);
+ (reg->flags & mask & gwt_mask) | attr, group_id,
+ mmu_sync_info);
if (err)
- goto bad_insert;
+ goto bad_aliased_insert;
}
}
} else {
if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
- reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
- ignore_page_migration = true;
-
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr, group_id,
- mmu_sync_info, reg, ignore_page_migration);
+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+
+ err = kbase_mmu_insert_imported_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
+ } else {
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id,
+ mmu_sync_info, reg, true);
+ }
+
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1799,9 +1823,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
!WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
- /* For padded imported dma-buf memory, map the dummy aliasing
- * page from the end of the dma-buf pages, to the end of the
- * region using a read only mapping.
+ /* For padded imported dma-buf or user-buf memory, map the dummy
+ * aliasing page from the end of the imported pages, to the end of
+ * the region using a read only mapping.
*
* Only map when it's imported dma-buf memory that is currently
* mapped.
@@ -1809,22 +1833,32 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
* Assume reg->gpu_alloc->nents is the number of actual pages
* in the dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + reg->gpu_alloc->nents,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
reg->nr_pages - reg->gpu_alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK, mmu_sync_info);
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_insert;
}
return err;
-bad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
- reg->nr_pages, kctx->as_nr, ignore_page_migration);
+bad_aliased_insert:
+ while (i-- > 0) {
+ struct tagged_addr *phys_alloc = NULL;
+ u64 const stride = alloc->imported.alias.stride;
+
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset;
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+ phys_alloc, alloc->imported.alias.aliased[i].length,
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
+ false);
+ }
+bad_insert:
kbase_remove_va_region(kctx->kbdev, reg);
return err;
@@ -1870,26 +1904,49 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
kctx->kbdev, &kctx->mmu,
reg->start_pfn + (i * alloc->imported.alias.stride),
phys_alloc, alloc->imported.alias.aliased[i].length,
- kctx->as_nr, false);
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
+ false);
if (WARN_ON_ONCE(err_loop))
err = err_loop;
}
}
break;
- case KBASE_MEM_TYPE_IMPORTED_UMM:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, reg->nr_pages, kctx->as_nr, true);
+ case KBASE_MEM_TYPE_IMPORTED_UMM: {
+ size_t nr_phys_pages = reg->nr_pages;
+ size_t nr_virt_pages = reg->nr_pages;
+ /* If the region has import padding and falls under the threshold for
+ * issuing a partial GPU cache flush, we want to reduce the number of
+ * physical pages that get flushed.
+
+ * This is symmetric with case of mapping the memory, which first maps
+ * each imported physical page to a separate virtual page, and then
+ * maps the single aliasing sink page to each of the virtual padding
+ * pages.
+ */
+ if (reg->flags & KBASE_REG_IMPORT_PAD)
+ nr_phys_pages = alloc->nents + 1;
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_phys_pages, nr_virt_pages,
+ kctx->as_nr, true);
+ }
break;
- case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, kbase_reg_current_backed_size(reg),
- kctx->as_nr, true);
+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr, true);
+ }
break;
- default:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, kbase_reg_current_backed_size(reg),
- kctx->as_nr, false);
+ default: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr, false);
+ }
break;
}
@@ -2214,7 +2271,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
__func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
- if (kbase_va_region_is_no_user_free(kctx, reg)) {
+ if (kbase_va_region_is_no_user_free(reg)) {
dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
return -EINVAL;
}
@@ -2435,7 +2492,7 @@ int kbase_update_region_flags(struct kbase_context *kctx,
if (flags & BASEP_MEM_NO_USER_FREE) {
kbase_gpu_vm_lock(kctx);
- kbase_va_region_no_user_free_get(kctx, reg);
+ kbase_va_region_no_user_free_inc(reg);
kbase_gpu_vm_unlock(kctx);
}
@@ -2489,15 +2546,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
tp = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
/* Check if we have enough pages requested so we can allocate a large
* page (512 * 4KB = 2MB )
*/
- if (nr_left >= (SZ_2M / SZ_4K)) {
+ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
- nr_lp * (SZ_2M / SZ_4K), tp, true);
+ nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);
if (res > 0) {
nr_left -= res;
@@ -2551,7 +2607,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
err = kbase_mem_pool_grow(
&kctx->mem_pools.large[alloc->group_id],
- 1);
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2592,12 +2648,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
}
}
}
-no_new_partial:
-#endif
+no_new_partial:
if (nr_left) {
res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
- tp, false);
+ tp, false, kctx->task);
if (res <= 0)
goto alloc_failed;
}
@@ -2656,18 +2711,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
lockdep_assert_held(&pool->pool_lock);
-#if !defined(CONFIG_MALI_2MB_ALLOC)
- WARN_ON(pool->order);
-#endif
+ kctx = alloc->imported.native.kctx;
+ kbdev = kctx->kbdev;
+
+ if (!kbdev->pagesize_2mb)
+ WARN_ON(pool->order);
if (alloc->reg) {
if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
goto invalid_request;
}
- kctx = alloc->imported.native.kctx;
- kbdev = kctx->kbdev;
-
lockdep_assert_held(&kctx->mem_partials_lock);
if (nr_pages_requested == 0)
@@ -2686,8 +2740,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
tp = alloc->pages + alloc->nents;
new_pages = tp;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
res = kbase_mem_pool_alloc_pages_locked(pool,
@@ -2771,15 +2824,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
if (nr_left)
goto alloc_failed;
} else {
-#endif
res = kbase_mem_pool_alloc_pages_locked(pool,
nr_left,
tp);
if (res <= 0)
goto alloc_failed;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
KBASE_TLSTREAM_AUX_PAGESALLOC(
kbdev,
@@ -2800,8 +2850,7 @@ alloc_failed:
struct tagged_addr *start_free = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
while (nr_pages_to_free) {
if (is_huge_head(*start_free)) {
kbase_mem_pool_free_pages_locked(
@@ -2819,15 +2868,12 @@ alloc_failed:
}
}
} else {
-#endif
kbase_mem_pool_free_pages_locked(pool,
nr_pages_to_free,
start_free,
false, /* not dirty */
true); /* return to pool */
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
}
kbase_process_page_usage_dec(kctx, nr_pages_requested);
@@ -3816,8 +3862,8 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(reg->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, reg);
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
kbase_gpu_vm_unlock(kctx);
} while (1);
@@ -4078,18 +4124,14 @@ static int kbase_jit_grow(struct kbase_context *kctx,
delta = info->commit_pages - reg->gpu_alloc->nents;
pages_required = delta;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pages_required >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
pool = &kctx->mem_pools.large[kctx->jit_group_id];
/* Round up to number of 2 MB pages required */
pages_required += ((SZ_2M / SZ_4K) - 1);
pages_required /= (SZ_2M / SZ_4K);
} else {
-#endif
pool = &kctx->mem_pools.small[kctx->jit_group_id];
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (reg->cpu_alloc != reg->gpu_alloc)
pages_required *= 2;
@@ -4110,7 +4152,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
- ret = kbase_mem_pool_grow(pool, pool_delta);
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
kbase_gpu_vm_lock(kctx);
if (ret)
@@ -4374,14 +4416,14 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
return NULL;
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
- if (!prealloc_sas[i])
- goto end;
+ if (kctx->kbdev->pagesize_2mb) {
+ /* Preallocate memory for the sub-allocation structs */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+ if (!prealloc_sas[i])
+ goto end;
+ }
}
-#endif
kbase_gpu_vm_lock(kctx);
mutex_lock(&kctx->jit_evict_lock);
@@ -4561,7 +4603,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
/* Similarly to tiler heap init, there is a short window of time
* where the (either recycled or newly allocated, in our case) region has
- * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+ * "no user free" count incremented but is still missing the DONT_NEED flag, and
* doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
* allocation is the least bad option that doesn't lead to a security issue down the
* line (it will eventually be cleaned up during context termination).
@@ -4570,9 +4612,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
* flags.
*/
kbase_gpu_vm_lock(kctx);
- if (unlikely(reg->no_user_free_refcnt > 1)) {
+ if (unlikely(atomic_read(&reg->no_user_free_count) > 1)) {
kbase_gpu_vm_unlock(kctx);
- dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
mutex_lock(&kctx->jit_evict_lock);
list_move(&reg->jit_node, &kctx->jit_pool_head);
@@ -4728,8 +4770,8 @@ bool kbase_jit_evict(struct kbase_context *kctx)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(reg->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, reg);
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
}
@@ -4757,8 +4799,8 @@ void kbase_jit_term(struct kbase_context *kctx)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(walker->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, walker);
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4776,8 +4818,8 @@ void kbase_jit_term(struct kbase_context *kctx)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(walker->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, walker);
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -5023,9 +5065,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
* region, otherwise the initial content of memory would be wrong.
*/
for (i = 0; i < pinned_pages; i++) {
- dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-
+ dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
err = dma_mapping_error(dev, dma_addr);
if (err)
goto unwind;
@@ -5041,9 +5087,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
- kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
- kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+ mmu_sync_info, NULL);
if (err == 0)
return 0;
@@ -5064,8 +5111,12 @@ unwind:
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
/* The user buffer could already have been previously pinned before
@@ -5182,9 +5233,13 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
}
/* Notice: use the original DMA address to unmap the whole memory page. */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+#else
dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-
+#endif
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
@@ -5308,6 +5363,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages,
kbase_reg_current_backed_size(reg),
+ kbase_reg_current_backed_size(reg),
kctx->as_nr, true);
}
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 83872a1..02e5509 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -38,6 +38,7 @@
/* Required for kbase_mem_evictable_unmake */
#include "mali_kbase_mem_linux.h"
#include "mali_kbase_mem_migrate.h"
+#include "mali_kbase_refcount_defs.h"
static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
int pages);
@@ -419,8 +420,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
* @jit_usage_id: The last just-in-time memory usage ID for this region.
* @jit_bin_id: The just-in-time memory bin this region came from.
* @va_refcnt: Number of users of this region. Protected by reg_lock.
- * @no_user_free_refcnt: Number of users that want to prevent the region from
- * being freed by userspace.
+ * @no_user_free_count: Number of contexts that want to prevent the region
+ * from being freed by userspace.
* @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
* an allocated region
* The object can be one of:
@@ -681,8 +682,8 @@ struct kbase_va_region {
size_t used_pages;
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
- int va_refcnt;
- int no_user_free_refcnt;
+ kbase_refcount_t va_refcnt;
+ atomic_t no_user_free_count;
};
/**
@@ -759,15 +760,12 @@ static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
static inline struct kbase_va_region *kbase_va_region_alloc_get(
struct kbase_context *kctx, struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
- WARN_ON(!region->va_refcnt);
- WARN_ON(region->va_refcnt == INT_MAX);
+ WARN_ON(!kbase_refcount_read(&region->va_refcnt));
+ WARN_ON(kbase_refcount_read(&region->va_refcnt) == INT_MAX);
- /* non-atomic as kctx->reg_lock is held */
dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
- region->va_refcnt, (void *)region);
- region->va_refcnt++;
+ kbase_refcount_read(&region->va_refcnt), (void *)region);
+ kbase_refcount_inc(&region->va_refcnt);
return region;
}
@@ -775,17 +773,14 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
static inline struct kbase_va_region *kbase_va_region_alloc_put(
struct kbase_context *kctx, struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
- WARN_ON(region->va_refcnt <= 0);
+ WARN_ON(kbase_refcount_read(&region->va_refcnt) <= 0);
WARN_ON(region->flags & KBASE_REG_FREE);
- /* non-atomic as kctx->reg_lock is held */
- region->va_refcnt--;
- dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
- region->va_refcnt, (void *)region);
- if (!region->va_refcnt)
+ if (kbase_refcount_dec_and_test(&region->va_refcnt))
kbase_region_refcnt_free(kctx->kbdev, region);
+ else
+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
+ kbase_refcount_read(&region->va_refcnt), (void *)region);
return NULL;
}
@@ -799,58 +794,44 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
* Hence, callers cannot rely on this check alone to determine if a region might be shrunk
* by any part of kbase. Instead they should use kbase_is_region_shrinkable().
*
- * @kctx: Pointer to kbase context.
* @region: Pointer to region.
*
* Return: true if userspace cannot free the region, false if userspace can free the region.
*/
-static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
- struct kbase_va_region *region)
+static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
- return region->no_user_free_refcnt > 0;
+ return atomic_read(&region->no_user_free_count) > 0;
}
/**
- * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region.
* Calling this function will prevent the region to be shrunk by parts of kbase that
- * don't own the region (as long as the refcount stays above zero). Refer to
+ * don't own the region (as long as the count stays above zero). Refer to
* kbase_va_region_is_no_user_free() for more information.
*
- * @kctx: Pointer to kbase context.
* @region: Pointer to region (not shrinkable).
*
* Return: the pointer to the region passed as argument.
*/
-static inline struct kbase_va_region *
-kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
WARN_ON(kbase_is_region_shrinkable(region));
- WARN_ON(region->no_user_free_refcnt == INT_MAX);
+ WARN_ON(atomic_read(&region->no_user_free_count) == INT_MAX);
/* non-atomic as kctx->reg_lock is held */
- region->no_user_free_refcnt++;
-
- return region;
+ atomic_inc(&region->no_user_free_count);
}
/**
- * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region.
*
- * @kctx: Pointer to kbase context.
* @region: Pointer to region (not shrinkable).
*/
-static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
- struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
+ WARN_ON(!kbase_va_region_is_no_user_free(region));
- WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
-
- /* non-atomic as kctx->reg_lock is held */
- region->no_user_free_refcnt--;
+ atomic_dec(&region->no_user_free_count);
}
/* Common functions */
@@ -1148,6 +1129,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* @pages: Pointer to array where the physical address of the allocated
* pages will be stored.
* @partial_allowed: If fewer pages allocated is allowed
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the pages are being allocated. It can be NULL if the pages
+ * won't be associated with any Kbase context.
*
* Like kbase_mem_pool_alloc() but optimized for allocating many pages.
*
@@ -1164,7 +1148,8 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
*/
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed);
+ struct tagged_addr *pages, bool partial_allowed,
+ struct task_struct *page_owner);
/**
* kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1276,13 +1261,17 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
* kbase_mem_pool_grow - Grow the pool
* @pool: Memory pool to grow
* @nr_to_grow: Number of pages to add to the pool
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the memory pool is being grown. It can be NULL if the pages
+ * to be allocated won't be associated with any Kbase context.
*
* Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
* become larger than the maximum size specified.
*
* Return: 0 on success, -ENOMEM if unable to allocate sufficent pages
*/
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
+ struct task_struct *page_owner);
/**
* kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -1398,8 +1387,8 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
u64 gpu_addr);
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone);
void kbase_free_alloced_region(struct kbase_va_region *reg);
int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
u64 addr, size_t nr_pages, size_t align);
@@ -1410,6 +1399,32 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
bool kbase_check_alloc_flags(unsigned long flags);
bool kbase_check_import_flags(unsigned long flags);
+static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+ dev_dbg(
+ kbdev->dev,
+ "Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+ (unsigned long long)va_pages);
+ return false;
+ }
+
+ return true;
+}
+
+static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+ dev_dbg(
+ kbdev->dev,
+ "Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+ (unsigned long long)va_pages);
+ return false;
+ }
+
+ return true;
+}
+
/**
* kbase_check_alloc_sizes - check user space sizes parameters for an
* allocation
@@ -1737,7 +1752,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
*
* @prealloc_sa: Information about the partial allocation if the amount of memory requested
* is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be
- * allocated by the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
+ * allocated by the caller if kbdev->pagesize_2mb is enabled.
*
* Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new
* pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be
@@ -1765,7 +1780,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
* This ensures that the pool can be grown to the required size and that the allocation can
* complete without another thread using the newly grown pages.
*
- * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then @pool must be one of the
+ * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the
* pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the
* mempools from alloc->imported.native.kctx->mem_pools.small[].
*
@@ -2494,8 +2509,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
* kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
* @kctx: Pointer to kbase context
*
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
* from the forked child process using the mali device file fd inherited from
* the parent process.
*
@@ -2503,13 +2517,23 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
*/
static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
{
- bool allow_alloc = true;
-
- rcu_read_lock();
- allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
- rcu_read_unlock();
+ return (kctx->process_mm == current->mm);
+}
- return allow_alloc;
+/**
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+ /* This merely takes a reference on the memory descriptor structure
+ * i.e. mm_struct of current process and not on its address space and
+ * so won't block the freeing of address space on process exit.
+ */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+ atomic_inc(&current->mm->mm_count);
+#else
+ mmgrab(current->mm);
+#endif
}
/**
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index e577452..7d30790 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -37,7 +37,7 @@
#include <linux/memory_group_manager.h>
#include <linux/math64.h>
#include <linux/migrate.h>
-
+#include <linux/version.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
@@ -385,8 +385,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
zone = KBASE_REG_ZONE_CUSTOM_VA;
}
- reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va),
- va_pages, zone);
+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);
if (!reg) {
dev_err(dev, "Failed to allocate free region");
@@ -481,22 +480,22 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
} else /* we control the VA */ {
size_t align = 1;
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* If there's enough (> 33 bits) of GPU VA space, align to 2MB
- * boundaries. The similar condition is used for mapping from
- * the SAME_VA zone inside kbase_context_get_unmapped_area().
- */
- if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
- if (va_pages >= (SZ_2M / SZ_4K))
- align = (SZ_2M / SZ_4K);
- }
- if (*gpu_va)
- align = 1;
+ if (kctx->kbdev->pagesize_2mb) {
+ /* If there's enough (> 33 bits) of GPU VA space, align to 2MB
+ * boundaries. The similar condition is used for mapping from
+ * the SAME_VA zone inside kbase_context_get_unmapped_area().
+ */
+ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+ if (va_pages >= (SZ_2M / SZ_4K))
+ align = (SZ_2M / SZ_4K);
+ }
+ if (*gpu_va)
+ align = 1;
#if !MALI_USE_CSF
- if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
- align = 1;
+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
+ align = 1;
#endif /* !MALI_USE_CSF */
-#endif /* CONFIG_MALI_2MB_ALLOC */
+ }
if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align,
mmu_sync_info) != 0) {
dev_warn(dev, "Failed to map memory on GPU");
@@ -999,7 +998,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
* & GPU queue ringbuffer and none of them needs to be explicitly marked
* as evictable by Userspace.
*/
- if (kbase_va_region_is_no_user_free(kctx, reg))
+ if (kbase_va_region_is_no_user_free(reg))
goto out_unlock;
/* Is the region being transitioning between not needed and needed? */
@@ -1319,10 +1318,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
- kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+ mmu_sync_info, NULL);
if (err)
goto bad_insert;
@@ -1335,11 +1335,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
* Assume alloc->nents is the number of actual pages in the
* dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + alloc->nents,
- kctx->aliasing_sink_page, reg->nr_pages - alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK, mmu_sync_info);
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page,
+ reg->nr_pages - alloc->nents,
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_pad_insert;
}
@@ -1348,7 +1348,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
bad_pad_insert:
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
- alloc->nents, kctx->as_nr, true);
+ alloc->nents, alloc->nents, kctx->as_nr, true);
bad_insert:
kbase_mem_umm_unmap_attachment(kctx, alloc);
bad_map_attachment:
@@ -1377,7 +1377,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
int err;
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, reg->nr_pages, kctx->as_nr, true);
+ alloc->pages, reg->nr_pages, reg->nr_pages,
+ kctx->as_nr, true);
WARN_ON(err);
}
@@ -1449,6 +1450,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
return NULL;
}
+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+ return NULL;
+
/* ignore SAME_VA */
*flags &= ~BASE_MEM_SAME_VA;
@@ -1469,23 +1473,21 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
need_sync = true;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/*
* 64-bit tasks require us to reserve VA on the CPU that we use
* on the GPU.
*/
shared_zone = true;
}
-#endif
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
- 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
- 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg) {
@@ -1618,21 +1620,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
/* 64-bit address range is the max */
goto bad_size;
+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+ goto bad_size;
+
/* SAME_VA generally not supported with imported memory (no known use cases) */
*flags &= ~BASE_MEM_SAME_VA;
if (*flags & BASE_MEM_IMPORT_SHARED)
shared_zone = true;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/*
* 64-bit tasks require us to reserve VA on the CPU that we use
* on the GPU.
*/
shared_zone = true;
}
-#endif
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
@@ -1641,7 +1644,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
} else
rbtree = &kctx->reg_rbtree_custom;
- reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);
if (!reg)
goto no_region;
@@ -1667,11 +1670,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
user_buf->address = address;
user_buf->nr_pages = *va_pages;
user_buf->mm = current->mm;
-#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
- atomic_inc(&current->mm->mm_count);
-#else
- mmgrab(current->mm);
-#endif
+ kbase_mem_mmgrab();
if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
else
@@ -1746,10 +1745,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
* region, otherwise the initial content of memory would be wrong.
*/
for (i = 0; i < faulted_pages; i++) {
- dma_addr_t dma_addr =
- dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC);
-
+ dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
@@ -1776,8 +1778,12 @@ unwind_dma_map:
dma_addr_t dma_addr = user_buf->dma_addrs[i];
dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
fault_mismatch:
if (pages) {
@@ -1853,22 +1859,19 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
/* calculate the number of pages this alias will cover */
*num_pages = nents * stride;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages))
+ goto bad_size;
+
+ if (!kbase_ctx_compat_mode(kctx)) {
/* 64-bit tasks must MMAP anyway, but not expose this address to
* clients
*/
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
- *num_pages,
- KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
-#else
- if (1) {
-#endif
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
- 0, *num_pages,
- KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg)
@@ -1919,7 +1922,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
goto bad_handle; /* Not found/already free */
if (kbase_is_region_shrinkable(aliasing_reg))
goto bad_handle; /* Ephemeral region */
- if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
+ if (kbase_va_region_is_no_user_free(aliasing_reg))
goto bad_handle; /* JIT regions can't be
* aliased. NO_USER_FREE flag
* covers the entire lifetime
@@ -1974,8 +1977,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
}
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/* Bind to a cookie */
if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) {
dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
@@ -1990,10 +1992,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
/* relocate to correct base */
gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
gpu_va <<= PAGE_SHIFT;
- } else /* we control the VA */ {
-#else
- if (1) {
-#endif
+ } else {
+ /* we control the VA */
if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,
mmu_sync_info) != 0) {
dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
@@ -2010,9 +2010,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
return gpu_va;
-#if IS_ENABLED(CONFIG_64BIT)
no_cookie:
-#endif
no_mmap:
bad_handle:
/* Marking the source allocs as not being mapped on the GPU and putting
@@ -2227,7 +2225,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
int ret = 0;
ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
- alloc->pages + new_pages, delta, kctx->as_nr, false);
+ alloc->pages + new_pages, delta, delta, kctx->as_nr, false);
return ret;
}
@@ -2295,7 +2293,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
if (kbase_is_region_shrinkable(reg))
goto out_unlock;
- if (kbase_va_region_is_no_user_free(kctx, reg))
+ if (kbase_va_region_is_no_user_free(reg))
goto out_unlock;
#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2398,18 +2396,19 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
if (reg->cpu_alloc != reg->gpu_alloc)
kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (kbase_reg_current_backed_size(reg) > new_pages) {
- old_pages = new_pages;
- new_pages = kbase_reg_current_backed_size(reg);
-
- /* Update GPU mapping. */
- err = kbase_mem_grow_gpu_mapping(kctx, reg,
- new_pages, old_pages, CALLER_MMU_ASYNC);
+
+ if (kctx->kbdev->pagesize_2mb) {
+ if (kbase_reg_current_backed_size(reg) > new_pages) {
+ old_pages = new_pages;
+ new_pages = kbase_reg_current_backed_size(reg);
+
+ /* Update GPU mapping. */
+ err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
+ CALLER_MMU_ASYNC);
+ }
+ } else {
+ WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
}
-#else
- WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
-#endif
}
return err;
@@ -2707,8 +2706,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
goto out;
}
- new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
- KBASE_REG_ZONE_SAME_VA);
+ new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
+ KBASE_REG_ZONE_SAME_VA);
if (!new_reg) {
err = -ENOMEM;
WARN_ON(1);
@@ -3378,79 +3377,29 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = kctx->process_mm;
- rcu_read_lock();
- mm = rcu_dereference(kctx->process_mm);
- if (mm) {
- atomic_add(pages, &kctx->nonmapped_pages);
-#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
- spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
- spin_unlock(&mm->page_table_lock);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
- int pages;
- struct mm_struct *mm;
-
- spin_lock(&kctx->mm_update_lock);
- mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
- if (!mm) {
- spin_unlock(&kctx->mm_update_lock);
+ if (unlikely(!mm))
return;
- }
- rcu_assign_pointer(kctx->process_mm, NULL);
- spin_unlock(&kctx->mm_update_lock);
- synchronize_rcu();
-
- pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+ atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
- struct kbase_context *kctx;
-
- kctx = vma->vm_private_data;
- kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
- .close = kbase_special_vm_close,
-};
-
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
- /* check that this is the only tracking page */
- spin_lock(&kctx->mm_update_lock);
- if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
- spin_unlock(&kctx->mm_update_lock);
- return -EFAULT;
- }
-
- rcu_assign_pointer(kctx->process_mm, current->mm);
-
- spin_unlock(&kctx->mm_update_lock);
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
/* no real access */
vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
- vma->vm_ops = &kbase_vm_special_ops;
- vma->vm_private_data = kctx;
return 0;
}
@@ -3723,23 +3672,27 @@ static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma)
static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
{
struct kbase_context *kctx = vma->vm_private_data;
+ struct kbase_device *kbdev;
- if (!kctx) {
+ if (unlikely(!kctx)) {
pr_debug("Close function called for the unexpected mapping");
return;
}
- if (unlikely(!kctx->csf.user_reg_vma))
- dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL");
+ kbdev = kctx->kbdev;
- kctx->csf.user_reg_vma = NULL;
+ if (unlikely(!kctx->csf.user_reg.vma))
+ dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
+ kctx->tgid, kctx->id);
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0))
- dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter");
- else
- kctx->kbdev->csf.nr_user_page_mapped--;
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
+ mutex_lock(&kbdev->csf.reg_lock);
+ list_del_init(&kctx->csf.user_reg.link);
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ kctx->csf.user_reg.vma = NULL;
+
+ /* Now as the VMA is closed, drop the reference on mali device file */
+ fput(kctx->filp);
}
/**
@@ -3784,10 +3737,11 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
unsigned long flags;
/* Few sanity checks up front */
- if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) ||
- (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) {
- pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
- current->comm, current->tgid, current->pid);
+
+ if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) ||
+ (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) {
+ pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
+ current->comm, current->tgid, current->pid);
return VM_FAULT_SIGBUS;
}
@@ -3796,22 +3750,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
mutex_lock(&kbdev->csf.reg_lock);
+
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- /* Don't map in the actual register page if GPU is powered down.
- * Always map in the dummy page in no mali builds.
+ /* Dummy page will be mapped during GPU off.
+ *
+ * In no mail builds, always map in the dummy page.
*/
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#else
- if (!kbdev->pm.backend.gpu_powered)
- pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#endif
+ if (IS_ENABLED(CONFIG_MALI_NO_MALI) || !kbdev->pm.backend.gpu_powered)
+ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page));
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list);
ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
KBASE_MEM_GROUP_CSF_FW, vma,
vma->vm_start, pfn,
vma->vm_page_prot);
+
mutex_unlock(&kbdev->csf.reg_lock);
return ret;
@@ -3824,20 +3778,6 @@ static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
.fault = kbase_csf_user_reg_vm_fault
};
-/**
- * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page.
- *
- * @kctx: Pointer of the kernel context.
- * @vma: Pointer to the struct containing the information about
- * the userspace mapping of USER page.
- *
- * Return: 0 on success, error code otherwise.
- *
- * Note:
- * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf.
- * But this function needs to be kept for backward-compatibility as old Base (<=1.12)
- * will try to mmap USER page for direct access when it creates a base context.
- */
static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
struct vm_area_struct *vma)
{
@@ -3845,7 +3785,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
struct kbase_device *kbdev = kctx->kbdev;
/* Few sanity checks */
- if (kctx->csf.user_reg_vma)
+ if (kctx->csf.user_reg.vma)
return -EBUSY;
if (nr_pages != 1)
@@ -3864,19 +3804,21 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
*/
vma->vm_flags |= VM_PFNMAP;
- kctx->csf.user_reg_vma = vma;
+ kctx->csf.user_reg.vma = vma;
mutex_lock(&kbdev->csf.reg_lock);
- kbdev->csf.nr_user_page_mapped++;
-
- if (!kbdev->csf.mali_file_inode)
- kbdev->csf.mali_file_inode = kctx->filp->f_inode;
-
- if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode))
- dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts");
-
+ kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++;
mutex_unlock(&kbdev->csf.reg_lock);
+ /* Make VMA point to the special internal file, but don't drop the
+ * reference on mali device file (that would be done later when the
+ * VMA is closed).
+ */
+ vma->vm_file = kctx->kbdev->csf.user_reg.filp;
+ get_file(vma->vm_file);
+
+ /* Also adjust the vm_pgoff */
+ vma->vm_pgoff = kctx->csf.user_reg.file_offset;
vma->vm_ops = &kbase_csf_user_reg_vm_ops;
vma->vm_private_data = kctx;
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index 9c4b0d9..1dc76d0 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -60,6 +60,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
lock_page(p);
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
__SetPageMovable(p, &movable_ops);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
#else
/* In some corner cases, the driver may attempt to allocate memory pages
* even before the device file is open and the mapping for address space
@@ -78,8 +79,10 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
* is enabled and because the pages may always return to memory pools and
* gain the movable property later on in their life cycle.
*/
- if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping)
+ if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) {
__SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+ }
#endif
unlock_page(p);
@@ -112,6 +115,7 @@ static void kbase_free_pages_worker(struct work_struct *work)
container_of(work, struct kbase_mem_migrate, free_pages_work);
struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate);
struct page *p, *tmp;
+ struct kbase_page_metadata *page_md;
LIST_HEAD(free_list);
spin_lock(&mem_migrate->free_pages_lock);
@@ -123,8 +127,11 @@ static void kbase_free_pages_worker(struct work_struct *work)
list_del_init(&p->lru);
lock_page(p);
- if (PageMovable(p))
+ page_md = kbase_page_private(p);
+ if (IS_PAGE_MOVABLE(page_md->status)) {
__ClearPageMovable(p);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+ }
unlock_page(p);
kbase_free_page_metadata(kbdev, p, &group_id);
@@ -189,9 +196,12 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
__SetPageMovable(new_page, &movable_ops);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
#else
- if (kbdev->mem_migrate.inode->i_mapping)
+ if (kbdev->mem_migrate.inode->i_mapping) {
__SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+ }
#endif
SetPagePrivate(new_page);
get_page(new_page);
@@ -257,9 +267,12 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
/* Set PG_movable to the new page. */
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
__SetPageMovable(new_page, &movable_ops);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
#else
- if (kctx->kbdev->mem_migrate.inode->i_mapping)
+ if (kctx->kbdev->mem_migrate.inode->i_mapping) {
__SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+ }
#endif
} else
dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -288,7 +301,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
CSTD_UNUSED(mode);
- if (!PageMovable(p) || !page_md)
+ if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
return false;
if (!spin_trylock(&page_md->migrate_lock))
@@ -327,6 +340,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
case NOT_MOVABLE:
/* Opportunistically clear the movable property for these pages */
__ClearPageMovable(p);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
break;
default:
/* State should always fall in one of the previous cases!
@@ -401,7 +415,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
#endif
CSTD_UNUSED(mode);
- if (!PageMovable(old_page) || !page_md)
+ if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
return -EINVAL;
if (!spin_trylock(&page_md->migrate_lock))
@@ -469,8 +483,10 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
* error is returned are called putback on, which may not be what we
* expect.
*/
- if (err < 0 && err != -EAGAIN)
+ if (err < 0 && err != -EAGAIN) {
__ClearPageMovable(old_page);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+ }
return err;
}
@@ -542,6 +558,7 @@ static void kbase_page_putback(struct page *p)
if (status_mem_pool || status_free_isolated_in_progress ||
status_free_pt_isolated_in_progress) {
__ClearPageMovable(p);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
if (!WARN_ON_ONCE(!kbdev)) {
struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
index 30d0803..76bbc99 100644
--- a/mali_kbase/mali_kbase_mem_migrate.h
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,13 +23,22 @@
* DOC: Base kernel page migration implementation.
*/
-#define PAGE_STATUS_MASK ((u8)0x7F)
+#define PAGE_STATUS_MASK ((u8)0x3F)
#define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK)
#define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK))
+
#define PAGE_ISOLATE_SHIFT (7)
+#define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT)
#define PAGE_ISOLATE_SET(status, value) \
- ((status & PAGE_STATUS_MASK) | (value << PAGE_ISOLATE_SHIFT))
-#define IS_PAGE_ISOLATED(status) ((bool)(status & ~PAGE_STATUS_MASK))
+ ((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT))
+#define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK))
+
+#define PAGE_MOVABLE_SHIFT (6)
+#define PAGE_MOVABLE_MASK ((u8)1 << PAGE_MOVABLE_SHIFT)
+#define PAGE_MOVABLE_CLEAR(status) ((status) & ~PAGE_MOVABLE_MASK)
+#define PAGE_MOVABLE_SET(status) (status | PAGE_MOVABLE_MASK)
+
+#define IS_PAGE_MOVABLE(status) ((bool)(status & PAGE_MOVABLE_MASK))
/* Global integer used to determine if module parameter value has been
* provided and if page migration feature is enabled.
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index 75569cc..fa8f34d 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -28,6 +28,11 @@
#include <linux/shrinker.h>
#include <linux/atomic.h>
#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
#define pool_dbg(pool, format, ...) \
dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \
@@ -39,6 +44,47 @@
#define NOT_DIRTY false
#define NOT_RECLAIMED false
+/**
+ * can_alloc_page() - Check if the current thread can allocate a physical page
+ *
+ * @pool: Pointer to the memory pool.
+ * @page_owner: Pointer to the task/process that created the Kbase context
+ * for which a page needs to be allocated. It can be NULL if
+ * the page won't be associated with Kbase context.
+ * @alloc_from_kthread: Flag indicating that the current thread is a kernel thread.
+ *
+ * This function checks if the current thread is a kernel thread and can make a
+ * request to kernel to allocate a physical page. If the kernel thread is allocating
+ * a page for the Kbase context and the process that created the context is exiting
+ * or is being killed, then there is no point in doing a page allocation.
+ *
+ * The check done by the function is particularly helpful when the system is running
+ * low on memory. When a page is allocated from the context of a kernel thread, OoM
+ * killer doesn't consider the kernel thread for killing and kernel keeps retrying
+ * to allocate the page as long as the OoM killer is able to kill processes.
+ * The check allows kernel thread to quickly exit the page allocation loop once OoM
+ * killer has initiated the killing of @page_owner, thereby unblocking the context
+ * termination for @page_owner and freeing of GPU memory allocated by it. This helps
+ * in preventing the kernel panic and also limits the number of innocent processes
+ * that get killed.
+ *
+ * Return: true if the page can be allocated otherwise false.
+ */
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+ const bool alloc_from_kthread)
+{
+ if (likely(!alloc_from_kthread || !page_owner))
+ return true;
+
+ if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+ dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting",
+ __func__, page_owner->comm, task_pid_nr(page_owner));
+ return false;
+ }
+
+ return true;
+}
+
static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
{
ssize_t max_size = kbase_mem_pool_max_size(pool);
@@ -342,10 +388,12 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
return nr_freed;
}
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow)
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
+ struct task_struct *page_owner)
{
struct page *p;
size_t i;
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
kbase_mem_pool_lock(pool);
@@ -360,6 +408,9 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow)
}
kbase_mem_pool_unlock(pool);
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ return -ENOMEM;
+
p = kbase_mem_alloc_page(pool);
if (!p) {
kbase_mem_pool_lock(pool);
@@ -392,7 +443,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
if (new_size < cur_size)
kbase_mem_pool_shrink(pool, cur_size - new_size);
else if (new_size > cur_size)
- err = kbase_mem_pool_grow(pool, new_size - cur_size);
+ err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
if (err) {
size_t grown_size = kbase_mem_pool_size(pool);
@@ -656,13 +707,15 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
}
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed)
+ struct tagged_addr *pages, bool partial_allowed,
+ struct task_struct *page_owner)
{
struct page *p;
size_t nr_from_pool;
size_t i = 0;
int err = -ENOMEM;
size_t nr_pages_internal;
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
nr_pages_internal = nr_4k_pages / (1u << (pool->order));
@@ -697,7 +750,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
if (i != nr_4k_pages && pool->next_pool) {
/* Allocate via next pool */
err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i,
- partial_allowed);
+ partial_allowed, page_owner);
if (err < 0)
goto err_rollback;
@@ -706,6 +759,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
} else {
/* Get any remaining pages from kernel */
while (i != nr_4k_pages) {
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ goto err_rollback;
+
p = kbase_mem_alloc_page(pool);
if (!p) {
if (partial_allowed)
diff --git a/mali_kbase/mali_kbase_refcount_defs.h b/mali_kbase/mali_kbase_refcount_defs.h
new file mode 100644
index 0000000..c517a2d
--- /dev/null
+++ b/mali_kbase/mali_kbase_refcount_defs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_REFCOUNT_DEFS_H_
+#define _KBASE_REFCOUNT_DEFS_H_
+
+/*
+ * The Refcount API is available from 4.11 onwards
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#include <linux/version.h>
+#include <linux/types.h>
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+
+#define kbase_refcount_t atomic_t
+#define kbase_refcount_read(x) atomic_read(x)
+#define kbase_refcount_set(x, v) atomic_set(x, v)
+#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x)
+#define kbase_refcount_dec(x) atomic_dec(x)
+#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x)
+#define kbase_refcount_inc(x) atomic_inc(x)
+
+#else
+
+#include <linux/refcount.h>
+
+#define kbase_refcount_t refcount_t
+#define kbase_refcount_read(x) refcount_read(x)
+#define kbase_refcount_set(x, v) refcount_set(x, v)
+#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x)
+#define kbase_refcount_dec(x) refcount_dec(x)
+#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x)
+#define kbase_refcount_inc(x) refcount_inc(x)
+
+#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */
+
+#endif /* _KBASE_REFCOUNT_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index b64bbc1..f494a8f 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -943,6 +943,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
int ret;
u32 i;
+ if (!kbase_mem_allow_alloc(kctx)) {
+ dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+ current->comm, current->pid, kctx->tgid, kctx->id);
+ ret = -EINVAL;
+ goto fail;
+ }
+
/* For backwards compatibility, and to prevent reading more than 1 jit
* info struct on jit version 1
*/
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 4a09265..4cac787 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -150,17 +150,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
"true" : "false";
int as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at PA 0x%016llX\n"
+ "GPU bus fault in AS%d at PA %pK\n"
"PA_VALID: %s\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"access type 0x%X: %s\n"
"source id 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
addr_valid,
status,
exception_type, kbase_gpu_exception_name(exception_type),
@@ -557,6 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
kbdev->as[i].gf_data.addr = 0ULL;
+ kbdev->as[i].is_unresponsive = false;
kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
if (!kbdev->as[i].pf_wq)
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index 83605c3..d716ce0 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
u32 const exception_data = (status >> 8) & 0xFFFFFF;
int const as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at PA 0x%016llX\n"
+ "GPU bus fault in AS%d at PA %pK\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"exception data 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
status,
exception_type, kbase_gpu_exception_name(exception_type),
exception_data,
@@ -428,6 +429,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
kbdev->as[i].number = i;
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
+ kbdev->as[i].is_unresponsive = false;
kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
if (!kbdev->as[i].pf_wq)
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 41876ff..ea58381 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -144,35 +144,21 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
enum kbase_mmu_op_type op)
{
u32 flush_op;
- int ret;
-
- if (WARN_ON(kbdev == NULL))
- return;
lockdep_assert_held(&kbdev->hwaccess_lock);
/* Translate operation to command */
- if (op == KBASE_MMU_OP_FLUSH_PT) {
+ if (op == KBASE_MMU_OP_FLUSH_PT)
flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
- } else if (op == KBASE_MMU_OP_FLUSH_MEM) {
+ else if (op == KBASE_MMU_OP_FLUSH_MEM)
flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
- } else {
+ else {
dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
return;
}
- ret = kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op);
-
- if (ret) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover
- */
- dev_err(kbdev->dev,
- "Flush for physical address range did not complete. Issuing GPU soft-reset to recover");
-
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
- }
+ if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
+ dev_err(kbdev->dev, "Flush for physical address range did not complete");
}
#endif
@@ -190,21 +176,15 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
const struct kbase_mmu_hw_op_param *op_param)
{
- int err = 0;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
as_nr = kctx ? kctx->as_nr : as_nr;
- err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param);
- }
-
- if (err) {
- dev_err(kbdev->dev,
- "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover");
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
+ if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
+ dev_err(kbdev->dev,
+ "Invalidate after GPU page table update did not complete");
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -215,25 +195,14 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct
static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
- int err = 0;
unsigned long flags;
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- if (kbdev->pm.backend.gpu_powered)
- err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param);
-
- if (err) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover.
- */
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
-
- if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu_locked(kbdev);
- }
+ if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -308,7 +277,6 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex
static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
int as_nr, const struct kbase_mmu_hw_op_param *op_param)
{
- int err = 0;
unsigned long flags;
/* AS transaction begin */
@@ -317,19 +285,8 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct
if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
as_nr = kctx ? kctx->as_nr : as_nr;
- err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr],
- op_param);
- }
-
- if (err) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover.
- */
- dev_err(kbdev->dev,
- "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
-
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
+ if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -405,13 +362,11 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
* @level: The level of MMU page table.
* @flush_op: The type of MMU flush operation to perform.
* @dirty_pgds: Flags to track every level where a PGD has been updated.
- * @free_pgds_list: Linked list of the page directory pages to free.
*/
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, phys_addr_t *pgds,
u64 vpfn, int level,
- enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
- struct list_head *free_pgds_list);
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
@@ -485,14 +440,17 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
phys_addr_t pgd)
{
struct page *p;
+ bool page_is_isolated = false;
lockdep_assert_held(&mmut->mmu_lock);
p = pfn_to_page(PFN_DOWN(pgd));
+ page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
-
- kbase_mmu_account_freed_pgd(kbdev, mmut);
+ if (likely(!page_is_isolated)) {
+ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
+ }
}
/**
@@ -500,41 +458,42 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
*
* @kbdev: Device pointer.
* @mmut: GPU MMU page table.
- * @free_pgds_list: Linked list of the page directory pages to free.
*
* This function will call kbase_mmu_free_pgd() on each page directory page
- * present in the @free_pgds_list.
+ * present in the list of free PGDs inside @mmut.
*
* The function is supposed to be called after the GPU cache and MMU TLB has
* been invalidated post the teardown loop.
+ *
+ * The mmu_lock shall be held prior to calling the function.
*/
-static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- struct list_head *free_pgds_list)
+static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
- struct page *page, *next_page;
+ size_t i;
- mutex_lock(&mmut->mmu_lock);
+ lockdep_assert_held(&mmut->mmu_lock);
- list_for_each_entry_safe(page, next_page, free_pgds_list, lru) {
- list_del_init(&page->lru);
- kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page));
- }
+ for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
+ kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
- mutex_unlock(&mmut->mmu_lock);
+ mmut->scratch_mem.free_pgds.head_index = 0;
}
-static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- struct page *p, struct list_head *free_pgds_list)
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
{
- bool page_is_isolated = false;
-
lockdep_assert_held(&mmut->mmu_lock);
- page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+ if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
+ return;
+
+ mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
+}
+
+static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
+{
+ lockdep_assert_held(&mmut->mmu_lock);
- if (likely(!page_is_isolated))
- list_add(&p->lru, free_pgds_list);
+ mmut->scratch_mem.free_pgds.head_index = 0;
}
/**
@@ -627,6 +586,7 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
struct kbase_mmu_hw_op_param op_param;
+ int ret = 0;
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -645,16 +605,20 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
op_param.flush_skip_levels =
pgd_level_to_skip_flush(dirty_pgds);
- kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
+ ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
} else {
mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
+ ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
mmu_hw_operation_end(kbdev);
}
mutex_unlock(&kbdev->mmu_hw_mutex);
+ if (ret)
+ dev_err(kbdev->dev,
+ "Flush for GPU page fault due to write access did not complete");
+
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
}
@@ -869,17 +833,13 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return false;
}
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (new_pages >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) {
root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
*grow_2mb_pool = true;
} else {
-#endif
root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
*grow_2mb_pool = false;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (region->gpu_alloc != region->cpu_alloc)
new_pages *= 2;
@@ -1128,22 +1088,22 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
}
page_fault_retry:
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- if (!prealloc_sas[i]) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
-
+ if (kbdev->pagesize_2mb) {
+ /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
if (!prealloc_sas[i]) {
- kbase_mmu_report_fault_and_kill(
- kctx, faulting_as,
- "Failed pre-allocating memory for sub-allocations' metadata",
- fault);
- goto fault_done;
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+
+ if (!prealloc_sas[i]) {
+ kbase_mmu_report_fault_and_kill(
+ kctx, faulting_as,
+ "Failed pre-allocating memory for sub-allocations' metadata",
+ fault);
+ goto fault_done;
+ }
}
}
}
-#endif /* CONFIG_MALI_2MB_ALLOC */
/* so we have a translation fault,
* let's see if it is for growable memory
@@ -1457,8 +1417,7 @@ page_fault_retry:
* Otherwise fail the allocation.
*/
if (pages_to_grow > 0) {
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (grow_2mb_pool) {
+ if (kbdev->pagesize_2mb && grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
&kctx->mem_pools.large[
@@ -1469,18 +1428,15 @@ page_fault_retry:
>> lp_mem_pool->order;
ret = kbase_mem_pool_grow(lp_mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
} else {
-#endif
struct kbase_mem_pool *const mem_pool =
&kctx->mem_pools.small[
region->gpu_alloc->group_id];
ret = kbase_mem_pool_grow(mem_pool,
- pages_to_grow);
-#ifdef CONFIG_MALI_2MB_ALLOC
+ pages_to_grow, kctx->task);
}
-#endif
}
if (ret < 0) {
/* failed to extend, handle as a normal PF */
@@ -1570,15 +1526,24 @@ alloc_free:
return KBASE_MMU_INVALID_PGD_ADDRESS;
}
-/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
- * new table from the pool if needed and possible
+/**
+ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @pgd: Physical addresse of level N page directory.
+ * @vpfn: The virtual page frame number.
+ * @level: The level of MMU page table (N).
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
*/
static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd,
- u64 *dirty_pgds)
+ phys_addr_t *pgd, u64 vpfn, int level)
{
u64 *page;
- u64 pgd_vpfn = vpfn;
phys_addr_t target_pgd;
struct page *p;
@@ -1594,67 +1559,15 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
p = pfn_to_page(PFN_DOWN(*pgd));
page = kmap(p);
if (page == NULL) {
- dev_warn(kbdev->dev, "%s: kmap failure", __func__);
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
return -EINVAL;
}
if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
- unsigned int current_valid_entries;
- u64 managed_pte;
-
- target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
- if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
- dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__);
- kunmap(p);
- return -ENOMEM;
- }
-
- current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page);
- kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd);
- page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
- kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
- kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1);
-
- /* Rely on the caller to update the address space flags. */
- if (newly_created_pgd && !*newly_created_pgd) {
- *newly_created_pgd = true;
- if (dirty_pgds)
- *dirty_pgds |= 1ULL << level;
- }
-
- /* A new valid entry is added to an existing PGD. Perform the
- * invalidate operation for GPU cache as it could be having a
- * cacheline that contains the entry (in an invalid form).
- * Even if the parent PGD was newly created, invalidation of
- * GPU cache is still needed. For explanation, please refer
- * the comment in kbase_mmu_insert_pages_no_flush().
- */
- kbase_mmu_sync_pgd(kbdev, mmut->kctx,
- *pgd + (vpfn * sizeof(u64)),
- kbase_dma_addr(p) + (vpfn * sizeof(u64)),
- sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
-
- /* Update the new target_pgd page to its stable state */
- if (kbase_page_migration_enabled) {
- struct kbase_page_metadata *page_md =
- kbase_page_private(phys_to_page(target_pgd));
-
- spin_lock(&page_md->migrate_lock);
-
- WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
- IS_PAGE_ISOLATED(page_md->status));
-
- if (mmut->kctx) {
- page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
- page_md->data.pt_mapped.mmut = mmut;
- page_md->data.pt_mapped.pgd_vpfn_level =
- PGD_VPFN_LEVEL_SET(pgd_vpfn, level);
- } else {
- page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
- }
-
- spin_unlock(&page_md->migrate_lock);
- }
+ dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
+ vpfn);
+ kunmap(p);
+ return -EFAULT;
} else {
target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
@@ -1667,12 +1580,69 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
return 0;
}
+/**
+ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @vpfn: The virtual page frame number.
+ * @in_level: The level of MMU page table (N).
+ * @out_level: Set to the level of the lowest valid PGD found on success.
+ * Invalid on error.
+ * @out_pgd: Set to the lowest valid PGD found on success.
+ * Invalid on error.
+ *
+ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
+ * closest to in_level
+ *
+ * Terminology:
+ * Level-0 = Top-level = highest
+ * Level-3 = Bottom-level = lowest
+ *
+ * Return:
+ * * 0 - OK
+ * * -EINVAL - kmap() failed during page table walk.
+ */
+static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
+{
+ phys_addr_t pgd;
+ int l;
+ int err = 0;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+ pgd = mmut->pgd;
+
+ for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
+ err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+
+ /* Handle failure condition */
+ if (err) {
+ dev_dbg(kbdev->dev,
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+ __func__, l + 1);
+ break;
+ }
+ }
+
+ *out_pgd = pgd;
+ *out_level = l;
+
+ /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
+ * This implies that we have found the lowest valid pgd. Reset the error code.
+ */
+ if (err == -EFAULT)
+ err = 0;
+
+ return err;
+}
+
/*
- * Returns the PGD for the specified level of translation
+ * On success, sets out_pgd to the PGD for the specified level of translation
+ * Returns -EFAULT if a valid PGD is not found
*/
static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- int level, phys_addr_t *out_pgd, bool *newly_created_pgd,
- u64 *dirty_pgds)
+ int level, phys_addr_t *out_pgd)
{
phys_addr_t pgd;
int l;
@@ -1681,12 +1651,12 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
pgd = mmut->pgd;
for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
- int err =
- mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds);
+ int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
/* Handle failure condition */
if (err) {
- dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__,
- l);
+ dev_err(kbdev->dev,
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+ __func__, l + 1);
return err;
}
}
@@ -1696,17 +1666,9 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
return 0;
}
-static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds)
-{
- return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd,
- newly_created_pgd, dirty_pgds);
-}
-
static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 from_vpfn,
u64 to_vpfn, u64 *dirty_pgds,
- struct list_head *free_pgds_list,
struct tagged_addr *phys, bool ignore_page_migration)
{
u64 vpfn = from_vpfn;
@@ -1719,6 +1681,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
lockdep_assert_held(&mmut->mmu_lock);
mmu_mode = kbdev->mmu_mode;
+ kbase_mmu_reset_free_pgds_list(mmut);
while (vpfn < to_vpfn) {
unsigned int idx = vpfn & 0x1FF;
@@ -1779,11 +1742,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
if (!num_of_valid_entries) {
kunmap(p);
- kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
- KBASE_MMU_OP_NONE, dirty_pgds,
- free_pgds_list);
+ KBASE_MMU_OP_NONE, dirty_pgds);
vpfn += count;
continue;
}
@@ -1863,34 +1825,209 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
}
-/*
- * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+/**
+ * update_parent_pgds() - Updates the page table from bottom level towards
+ * the top level to insert a new ATE
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @cur_level: The level of MMU page table where the ATE needs to be added.
+ * The bottom PGD level.
+ * @insert_level: The level of MMU page table where the chain of newly allocated
+ * PGDs needs to be linked-in/inserted.
+ * The top-most PDG level to be updated.
+ * @insert_vpfn: The virtual page frame number for the ATE.
+ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
+ * the physical addresses of newly allocated PGDs from index
+ * insert_level+1 to cur_level, and an existing PGD at index
+ * insert_level.
+ *
+ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
+ * at insert_level which already exists in the MMU Page Tables.Migration status is also
+ * updated for all the newly allocated PGD pages.
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
+ */
+static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ int cur_level, int insert_level, u64 insert_vpfn,
+ phys_addr_t *pgds_to_insert)
+{
+ int pgd_index;
+ int err = 0;
+
+ /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
+ * Loop runs from the bottom-most to the top-most level so that all entries in the chain
+ * are valid when they are inserted into the MMU Page table via the insert_level PGD.
+ */
+ for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
+ int parent_index = pgd_index - 1;
+ phys_addr_t parent_pgd = pgds_to_insert[parent_index];
+ unsigned int current_valid_entries;
+ u64 pte;
+ phys_addr_t target_pgd = pgds_to_insert[pgd_index];
+ u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
+ struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
+ u64 *parent_page_va;
+
+ if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) {
+ err = -EFAULT;
+ goto failure_recovery;
+ }
+
+ parent_page_va = kmap(parent_page);
+ if (unlikely(parent_page_va == NULL)) {
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
+ err = -EINVAL;
+ goto failure_recovery;
+ }
+
+ current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
+
+ kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
+ parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
+ kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
+ kunmap(parent_page);
+
+ if (parent_index != insert_level) {
+ /* Newly allocated PGDs */
+ kbase_mmu_sync_pgd_cpu(
+ kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+ sizeof(u64));
+ } else {
+ /* A new valid entry is added to an existing PGD. Perform the
+ * invalidate operation for GPU cache as it could be having a
+ * cacheline that contains the entry (in an invalid form).
+ */
+ kbase_mmu_sync_pgd(
+ kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
+ kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+ sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+ }
+
+ /* Update the new target_pgd page to its stable state */
+ if (kbase_page_migration_enabled) {
+ struct kbase_page_metadata *page_md =
+ kbase_page_private(phys_to_page(target_pgd));
+
+ spin_lock(&page_md->migrate_lock);
+
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
+ IS_PAGE_ISOLATED(page_md->status));
+
+ if (mmut->kctx) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
+ page_md->data.pt_mapped.mmut = mmut;
+ page_md->data.pt_mapped.pgd_vpfn_level =
+ PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
+ } else {
+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+
+ return 0;
+
+failure_recovery:
+ /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
+ for (; pgd_index < cur_level; pgd_index++) {
+ phys_addr_t pgd = pgds_to_insert[pgd_index];
+ struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
+ u64 *pgd_page_va = kmap(pgd_page);
+ u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
+ kunmap(pgd_page);
+ }
+
+ return err;
+}
+
+/**
+ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
+ * level_high (inclusive)
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @level_low: The lower bound for the levels for which the PGD allocs are required
+ * @level_high: The higher bound for the levels for which the PGD allocs are required
+ * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
+ * newly allocated PGD addresses to.
+ *
+ * Numerically, level_low < level_high, not to be confused with top level and
+ * bottom level concepts for MMU PGDs. They are only used as low and high bounds
+ * in an incrementing for-loop.
+ *
+ * Return:
+ * * 0 - OK
+ * * -ENOMEM - allocation failed for a PGD.
*/
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int const group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t *new_pgds, int level_low, int level_high)
+{
+ int err = 0;
+ int i;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ for (i = level_low; i <= level_high; i++) {
+ do {
+ new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+ break;
+
+ mutex_unlock(&mmut->mmu_lock);
+ err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
+ level_high, NULL);
+ mutex_lock(&mmut->mmu_lock);
+ if (err) {
+ dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
+ __func__, err);
+
+ /* Free all PGDs allocated in previous successful iterations
+ * from (i-1) to level_low
+ */
+ for (i = (i - 1); i >= level_low; i--) {
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
+ }
+
+ return err;
+ }
+ } while (1);
+ }
+
+ return 0;
+}
+
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
- /* In case the insert_single_page only partially completes
- * we need to be able to recover
- */
- bool recover_required = false;
- u64 start_vpfn = vpfn;
- size_t recover_count = 0;
+ u64 insert_vpfn = start_vpfn;
size_t remain = nr;
int err;
struct kbase_device *kbdev;
- enum kbase_mmu_op_type flush_op;
u64 dirty_pgds = 0;
- LIST_HEAD(free_pgds_list);
+ unsigned int i;
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ enum kbase_mmu_op_type flush_op;
+ struct kbase_mmu_table *mmut = &kctx->mmu;
+ int l, cur_level, insert_level;
if (WARN_ON(kctx == NULL))
return -EINVAL;
/* 64-bit address range is the max */
- KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
kbdev = kctx->kbdev;
@@ -1901,7 +2038,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
/* If page migration is enabled, pages involved in multiple GPU mappings
* are always treated as not movable.
*/
- if (kbase_page_migration_enabled) {
+ if (kbase_page_migration_enabled && !ignore_page_migration) {
struct page *phys_page = as_page(phys);
struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
@@ -1912,12 +2049,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
}
}
- mutex_lock(&kctx->mmu.mmu_lock);
+ mutex_lock(&mmut->mmu_lock);
while (remain) {
- unsigned int i;
- unsigned int index = vpfn & 0x1FF;
- unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ unsigned int vindex = insert_vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
register unsigned int num_of_valid_entries;
bool newly_created_pgd = false;
@@ -1925,64 +2061,61 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
if (count > remain)
count = remain;
+ cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+ insert_level = cur_level;
+
/*
- * Repeatedly calling mmu_get_bottom_pgd() is clearly
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
* 256 pages at once (on average). Do we really care?
*/
- do {
- err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd,
- &dirty_pgds);
- if (err != -ENOMEM)
- break;
- /* Fill the memory pool with enough pages for
- * the page walk to succeed
- */
- mutex_unlock(&kctx->mmu.mmu_lock);
- err = kbase_mem_pool_grow(
- &kbdev->mem_pools.small[
- kctx->mmu.group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
- mutex_lock(&kctx->mmu.mmu_lock);
- } while (!err);
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+ &pgd);
+
if (err) {
- dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__);
- if (recover_required) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
- start_vpfn + recover_count,
- &dirty_pgds, &free_pgds_list,
- NULL, true);
- }
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+ __func__, err);
goto fail_unlock;
}
+ /* No valid pgd at cur_level */
+ if (insert_level != cur_level) {
+ /* Allocate new pgds for all missing levels from the required level
+ * down to the lowest valid pgd at insert_level
+ */
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+ cur_level);
+ if (err)
+ goto fail_unlock;
+
+ newly_created_pgd = true;
+
+ new_pgds[insert_level] = pgd;
+
+ /* If we didn't find an existing valid pgd at cur_level,
+ * we've now allocated one. The ATE in the next step should
+ * be inserted in this newly allocated pgd.
+ */
+ pgd = new_pgds[cur_level];
+ }
+
p = pfn_to_page(PFN_DOWN(pgd));
pgd_page = kmap(p);
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure", __func__);
- if (recover_required) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
- start_vpfn + recover_count,
- &dirty_pgds, &free_pgds_list,
- NULL, true);
- }
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
- goto fail_unlock;
+
+ goto fail_unlock_free_pgds;
}
num_of_valid_entries =
kbdev->mmu_mode->get_num_valid_entries(pgd_page);
for (i = 0; i < count; i++) {
- unsigned int ofs = index + i;
+ unsigned int ofs = vindex + i;
/* Fail if the current page is a valid ATE entry */
KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
@@ -1994,50 +2127,87 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
kbdev->mmu_mode->set_num_valid_entries(
pgd_page, num_of_valid_entries + count);
- vpfn += count;
- remain -= count;
-
- if (count > 0 && !newly_created_pgd)
- dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL;
+ dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
/* MMU cache flush operation here will depend on whether bottom level
* PGD is newly created or not.
*
- * If bottom level PGD is newly created then no cache maintenance is
+ * If bottom level PGD is newly created then no GPU cache maintenance is
* required as the PGD will not exist in GPU cache. Otherwise GPU cache
* maintenance is required for existing PGD.
*/
flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
- kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
- kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64),
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
flush_op);
+ if (newly_created_pgd) {
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+ new_pgds);
+ if (err) {
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+ __func__, err);
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+ kunmap(p);
+ goto fail_unlock_free_pgds;
+ }
+ }
+
+ insert_vpfn += count;
+ remain -= count;
kunmap(p);
- /* We have started modifying the page table.
- * If further pages need inserting and fail we need to undo what
- * has already taken place
- */
- recover_required = true;
- recover_count += count;
}
- mutex_unlock(&kctx->mmu.mmu_lock);
- mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
- mmu_sync_info, false);
+ mutex_unlock(&mmut->mmu_lock);
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+ false);
return 0;
+fail_unlock_free_pgds:
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
+ for (l = cur_level; l > insert_level; l--)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
fail_unlock:
- mutex_unlock(&kctx->mmu.mmu_lock);
+ if (insert_vpfn != start_vpfn) {
+ /* Invalidate the pages we have partially completed */
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds,
+ NULL, true);
+ }
- mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
- mmu_sync_info, true);
- kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list);
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+ true);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+ mutex_unlock(&mmut->mmu_lock);
return err;
}
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+ false);
+}
+
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+ false);
+}
+
static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
struct kbase_va_region *reg,
struct kbase_mmu_table *mmut, const u64 vpfn)
@@ -2139,7 +2309,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
size_t remain = nr;
int err;
struct kbase_mmu_mode const *mmu_mode;
- LIST_HEAD(free_pgds_list);
+ unsigned int i;
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ int l, cur_level, insert_level;
/* Note that 0 is a valid start_vpfn */
/* 64-bit address range is the max */
@@ -2154,13 +2326,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
mutex_lock(&mmut->mmu_lock);
while (remain) {
- unsigned int i;
unsigned int vindex = insert_vpfn & 0x1FF;
unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
- int cur_level;
register unsigned int num_of_valid_entries;
bool newly_created_pgd = false;
+ enum kbase_mmu_op_type flush_op;
if (count > remain)
count = remain;
@@ -2170,57 +2341,53 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
else
cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+ insert_level = cur_level;
+
/*
- * Repeatedly calling mmu_get_pgd_at_level() is clearly
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
* 256 pages at once (on average). Do we really care?
*/
- do {
- err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd,
- &newly_created_pgd, dirty_pgds);
- if (err != -ENOMEM)
- break;
- /* Fill the memory pool with enough pages for
- * the page walk to succeed
- */
- mutex_unlock(&mmut->mmu_lock);
- err = kbase_mem_pool_grow(
- &kbdev->mem_pools.small[mmut->group_id],
- cur_level);
- mutex_lock(&mmut->mmu_lock);
- } while (!err);
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+ &pgd);
if (err) {
- dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__);
- if (insert_vpfn != start_vpfn) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
- insert_vpfn, dirty_pgds,
- &free_pgds_list, phys,
- ignore_page_migration);
- }
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+ __func__, err);
goto fail_unlock;
}
+ /* No valid pgd at cur_level */
+ if (insert_level != cur_level) {
+ /* Allocate new pgds for all missing levels from the required level
+ * down to the lowest valid pgd at insert_level
+ */
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+ cur_level);
+ if (err)
+ goto fail_unlock;
+
+ newly_created_pgd = true;
+
+ new_pgds[insert_level] = pgd;
+
+ /* If we didn't find an existing valid pgd at cur_level,
+ * we've now allocated one. The ATE in the next step should
+ * be inserted in this newly allocated pgd.
+ */
+ pgd = new_pgds[cur_level];
+ }
+
p = pfn_to_page(PFN_DOWN(pgd));
pgd_page = kmap(p);
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure", __func__);
- if (insert_vpfn != start_vpfn) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
- insert_vpfn, dirty_pgds,
- &free_pgds_list, phys,
- ignore_page_migration);
- }
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
- goto fail_unlock;
+
+ goto fail_unlock_free_pgds;
}
num_of_valid_entries =
@@ -2262,34 +2429,39 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
- if (dirty_pgds && !newly_created_pgd)
- *dirty_pgds |= 1ULL << cur_level;
-
- phys += count;
- insert_vpfn += count;
- remain -= count;
+ if (dirty_pgds)
+ *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
- /* Even if mmu_get_pgd_at_level() allocated a new bottom level
- * table page, the invalidation of L2 cache is still needed for
- * for the valid entries written in that page. This is because a
- * race can happen as soon as the entry of parent level table is
- * updated to point to the page of bottom level table.
- * GPU can try to access within the the same virtual range that
- * is being mapped, before the valid entries of bottom level table
- * page are flushed to the memory from the CPU's cache. And if that
- * happens then the invalid entries from memory could get fetched
- * into the L2 cache and so those entries won't be affected by the
- * MMU TLB invalidation done by sending the UNLOCK command.
- * If the memory is growable then this could result in unexpected
- * page faults happening repeatedly, until the invalid entry is
- * evicted from the L2 cache, as Driver would consider the page
- * faults for mapped memory as duplicate and won't take any action
- * effectively.
+ /* MMU cache flush operation here will depend on whether bottom level
+ * PGD is newly created or not.
+ *
+ * If bottom level PGD is newly created then no GPU cache maintenance is
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
+ * maintenance is required for existing PGD.
*/
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
+
kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
- KBASE_MMU_OP_FLUSH_PT);
+ flush_op);
+
+ if (newly_created_pgd) {
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+ new_pgds);
+ if (err) {
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+ __func__, err);
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+ kunmap(p);
+ goto fail_unlock_free_pgds;
+ }
+ }
+
+ phys += count;
+ insert_vpfn += count;
+ remain -= count;
kunmap(p);
}
@@ -2297,12 +2469,22 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
return 0;
+fail_unlock_free_pgds:
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
+ for (l = cur_level; l > insert_level; l--)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
fail_unlock:
- mutex_unlock(&mmut->mmu_lock);
+ if (insert_vpfn != start_vpfn) {
+ /* Invalidate the pages we have partially completed */
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds,
+ phys, ignore_page_migration);
+ }
mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
- kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+ mutex_unlock(&mmut->mmu_lock);
return err;
}
@@ -2318,7 +2500,6 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
{
int err;
u64 dirty_pgds = 0;
- LIST_HEAD(free_pgds_list);
/* Early out if there is nothing to do */
if (nr == 0)
@@ -2336,58 +2517,56 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
-/**
- * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
- * without retaining the kbase context.
- * @kctx: The KBase context.
- * @vpfn: The virtual page frame number to start the flush on.
- * @nr: The number of pages to flush.
- *
- * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
- * other locking.
- */
-static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr)
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
{
- struct kbase_device *kbdev = kctx->kbdev;
int err;
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
+ u64 dirty_pgds = 0;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ /* Imported allocations don't have metadata and therefore always ignore the
+ * page migration logic.
*/
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
- struct kbase_mmu_hw_op_param op_param;
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds, reg, true);
+ if (err)
+ return err;
- lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
+
+ return 0;
+}
+
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
+{
+ int err;
+ u64 dirty_pgds = 0;
/* Early out if there is nothing to do */
if (nr == 0)
- return;
+ return 0;
- /* flush L2 and unlock the VA (resumes the MMU) */
- op_param.vpfn = vpfn;
- op_param.nr = nr;
- op_param.op = KBASE_MMU_OP_FLUSH_MEM;
- op_param.kctx_id = kctx->id;
- op_param.mmu_sync_info = mmu_sync_info;
- if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- /* Value used to prevent skipping of any levels when flushing */
- op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
- err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr],
- &op_param);
- } else {
- err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr],
- &op_param);
- }
+ /* Memory aliases are always built on top of existing allocations,
+ * therefore the state of physical pages shall be updated.
+ */
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds, reg, false);
+ if (err)
+ return err;
- if (err) {
- /* Flush failed to complete, assume the
- * GPU has hung and perform a reset to recover
- */
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
- if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
- kbase_reset_gpu_locked(kbdev);
- }
+ return 0;
}
void kbase_mmu_update(struct kbase_device *kbdev,
@@ -2412,6 +2591,14 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
void kbase_mmu_disable(struct kbase_context *kctx)
{
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_mmu_hw_op_param op_param = { 0 };
+ int lock_err, flush_err;
+
/* ASSERT that the context has a valid as_nr, which is only the case
* when it's scheduled in.
*
@@ -2422,16 +2609,49 @@ void kbase_mmu_disable(struct kbase_context *kctx)
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
- /*
- * The address space is being disabled, drain all knowledge of it out
- * from the caches as pages and page tables might be freed after this.
- *
- * The job scheduler code will already be holding the locks and context
- * so just do the flush.
+ op_param.vpfn = 0;
+ op_param.nr = ~0;
+ op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+ op_param.kctx_id = kctx->id;
+ op_param.mmu_sync_info = mmu_sync_info;
+
+#if MALI_USE_CSF
+ /* 0xF value used to prevent skipping of any levels when flushing */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+#endif
+
+ /* lock MMU to prevent existing jobs on GPU from executing while the AS is
+ * not yet disabled
+ */
+ lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ if (lock_err)
+ dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
+ kctx->id);
+
+ /* Issue the flush command only when L2 cache is in stable power on state.
+ * Any other state for L2 cache implies that shader cores are powered off,
+ * which in turn implies there is no execution happening on the GPU.
*/
- kbase_mmu_flush_noretain(kctx, 0, ~0);
+ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
+ flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+ GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+ if (flush_err)
+ dev_err(kbdev->dev,
+ "Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
+ kctx->as_nr, kctx->tgid, kctx->id);
+ }
+ kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
+
+ if (!lock_err) {
+ /* unlock the MMU to allow it to resume */
+ lock_err =
+ kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ if (lock_err)
+ dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
+ kctx->tgid, kctx->id);
+ }
- kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
#if !MALI_USE_CSF
/*
* JM GPUs has some L1 read only caches that need to be invalidated
@@ -2439,7 +2659,7 @@ void kbase_mmu_disable(struct kbase_context *kctx)
* the slot_rb tracking field so such invalidation is performed when
* a new katom is executed on the affected slots.
*/
- kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx);
+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
#endif
}
KBASE_EXPORT_TEST_API(kbase_mmu_disable);
@@ -2447,8 +2667,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable);
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, phys_addr_t *pgds,
u64 vpfn, int level,
- enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
- struct list_head *free_pgds_list)
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
{
int current_level;
@@ -2480,7 +2699,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
current_pgd + (index * sizeof(u64)),
sizeof(u64), flush_op);
- kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
} else {
current_valid_entries--;
@@ -2500,13 +2719,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
/**
* mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
*
- * @kbdev: Pointer to kbase device.
- * @kctx: Pointer to kbase context.
- * @as_nr: Address space number, for GPU cache maintenance operations
- * that happen outside a specific kbase context.
- * @phys: Array of physical pages to flush.
- * @op_param: Non-NULL pointer to struct containing information about the flush
- * operation to perform.
+ * @kbdev: Pointer to kbase device.
+ * @kctx: Pointer to kbase context.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @phys: Array of physical pages to flush.
+ * @phys_page_nr: Number of physical pages to flush.
+ * @op_param: Non-NULL pointer to struct containing information about the flush
+ * operation to perform.
*
* This function will do one of three things:
* 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
@@ -2514,10 +2734,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
* 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
* supported on GPU or,
* 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
+ *
+ * When performing a partial GPU cache flush, the number of physical
+ * pages does not have to be identical to the number of virtual pages on the MMU,
+ * to support a single physical address flush for an aliased page.
*/
static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
struct kbase_context *kctx, int as_nr,
- struct tagged_addr *phys,
+ struct tagged_addr *phys, size_t phys_page_nr,
struct kbase_mmu_hw_op_param *op_param)
{
if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
@@ -2536,7 +2760,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
mmu_invalidate(kbdev, kctx, as_nr, op_param);
- for (i = 0; !flush_done && i < op_param->nr; i++) {
+ for (i = 0; !flush_done && i < phys_page_nr; i++) {
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
@@ -2549,76 +2773,15 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
#endif
}
-/**
- * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
- *
- * @kbdev: Pointer to kbase device.
- * @mmut: Pointer to GPU MMU page table.
- * @vpfn: Start page frame number of the GPU virtual pages to unmap.
- * @phys: Array of physical pages currently mapped to the virtual
- * pages to unmap, or NULL. This is used for GPU cache maintenance
- * and page migration support.
- * @nr: Number of pages to unmap.
- * @as_nr: Address space number, for GPU cache maintenance operations
- * that happen outside a specific kbase context.
- * @ignore_page_migration: Whether page migration metadata should be ignored.
- *
- * We actually discard the ATE and free the page table pages if no valid entries
- * exist in PGD.
- *
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
- *
- * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
- * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
- * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
- * instead of specific physical address ranges.
- *
- * Return: 0 on success, otherwise an error code.
- */
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr, int as_nr,
- bool ignore_page_migration)
+static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, size_t nr, u64 *dirty_pgds,
+ struct list_head *free_pgds_list,
+ enum kbase_mmu_op_type flush_op)
{
- const size_t requested_nr = nr;
- u64 start_vpfn = vpfn;
- enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
- struct kbase_mmu_mode const *mmu_mode;
- struct kbase_mmu_hw_op_param op_param;
- int err = -EFAULT;
- u64 dirty_pgds = 0;
- LIST_HEAD(free_pgds_list);
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
- if (nr == 0) {
- /* early out if nothing to do */
- return 0;
- }
-
- /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
- * the operation is invalidate but the granularity of cache maintenance may change
- * according to the situation.
- *
- * If GPU control command operations are present and the number of pages is "small",
- * then the optimal strategy is flushing on the physical address range of the pages
- * which are affected by the operation. That implies both the PGDs which are modified
- * or removed from the page table and the physical pages which are freed from memory.
- *
- * Otherwise, there's no alternative to invalidating the whole GPU cache.
- */
- if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
- flush_op = KBASE_MMU_OP_FLUSH_PT;
-
- mutex_lock(&mmut->mmu_lock);
+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
- mmu_mode = kbdev->mmu_mode;
+ lockdep_assert_held(&mmut->mmu_lock);
+ kbase_mmu_reset_free_pgds_list(mmut);
while (nr) {
unsigned int index = vpfn & 0x1FF;
@@ -2703,7 +2866,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
}
if (pcount > 0)
- dirty_pgds |= 1ULL << level;
+ *dirty_pgds |= 1ULL << level;
num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
if (WARN_ON_ONCE(num_of_valid_entries < pcount))
@@ -2725,11 +2888,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
pgd + (index * sizeof(u64)),
pcount * sizeof(u64), flush_op);
- kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
- flush_op, &dirty_pgds,
- &free_pgds_list);
+ flush_op, dirty_pgds);
vpfn += count;
nr -= count;
@@ -2746,19 +2908,77 @@ next:
vpfn += count;
nr -= count;
}
- err = 0;
out:
+ return 0;
+}
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr, bool ignore_page_migration)
+{
+ u64 start_vpfn = vpfn;
+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
+ struct kbase_mmu_hw_op_param op_param;
+ int err = -EFAULT;
+ u64 dirty_pgds = 0;
+ LIST_HEAD(free_pgds_list);
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ /* This function performs two operations: MMU maintenance and flushing
+ * the caches. To ensure internal consistency between the caches and the
+ * MMU, it does not make sense to be able to flush only the physical pages
+ * from the cache and keep the PTE, nor does it make sense to use this
+ * function to remove a PTE and keep the physical pages in the cache.
+ *
+ * However, we have legitimate cases where we can try to tear down a mapping
+ * with zero virtual and zero physical pages, so we must have the following
+ * behaviour:
+ * - if both physical and virtual page counts are zero, return early
+ * - if either physical and virtual page counts are zero, return early
+ * - if there are fewer physical pages than virtual pages, return -EINVAL
+ */
+ if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
+ return 0;
+
+ if (unlikely(nr_virt_pages < nr_phys_pages))
+ return -EINVAL;
+
+ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
+ * the operation is invalidate but the granularity of cache maintenance may change
+ * according to the situation.
+ *
+ * If GPU control command operations are present and the number of pages is "small",
+ * then the optimal strategy is flushing on the physical address range of the pages
+ * which are affected by the operation. That implies both the PGDs which are modified
+ * or removed from the page table and the physical pages which are freed from memory.
+ *
+ * Otherwise, there's no alternative to invalidating the whole GPU cache.
+ */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
+ nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
+ flush_op = KBASE_MMU_OP_FLUSH_PT;
+
+ mutex_lock(&mmut->mmu_lock);
+
+ err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
+ &free_pgds_list, flush_op);
+
/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
op_param = (struct kbase_mmu_hw_op_param){
.vpfn = start_vpfn,
- .nr = requested_nr,
+ .nr = nr_virt_pages,
.mmu_sync_info = mmu_sync_info,
.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
.op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
KBASE_MMU_OP_FLUSH_MEM,
.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
};
- mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param);
+ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
+ &op_param);
/* If page migration is enabled: the status of all physical pages involved
* shall be updated, unless they are not movable. Their status shall be
@@ -2766,15 +2986,14 @@ out:
* requests to migrate the pages, if they have been isolated.
*/
if (kbase_page_migration_enabled && phys && !ignore_page_migration)
- kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr);
+ kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
- mutex_unlock(&mmut->mmu_lock);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
- kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
+ mutex_unlock(&mmut->mmu_lock);
return err;
}
-
KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
/**
@@ -2834,7 +3053,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
if (is_huge(*phys) && (index == index_in_large_page(*phys)))
cur_level = MIDGARD_MMU_LEVEL(2);
- err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds);
+ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
if (WARN_ON(err))
goto fail_unlock;
@@ -3119,9 +3338,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
}
}
- ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL);
+ ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
if (ret) {
- dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
+ dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
goto get_pgd_at_level_error;
}
@@ -3167,10 +3386,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
if (ret < 0) {
mutex_unlock(&kbdev->mmu_hw_mutex);
mutex_unlock(&kbdev->pm.lock);
- dev_err(kbdev->dev,
- "%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.",
- __func__);
- goto gpu_reset;
+ dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
+ goto undo_mappings;
}
/* Copy memory content.
@@ -3270,7 +3487,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
/* Checking the final migration transaction error state */
if (ret < 0) {
dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
- goto gpu_reset;
+ goto undo_mappings;
}
/* Undertaking metadata transfer, while we are holding the mmu_lock */
@@ -3305,19 +3522,13 @@ new_page_map_error:
old_page_map_error:
return ret;
-gpu_reset:
- /* Unlock the MMU table before resetting the GPU and undo
- * mappings.
- */
+undo_mappings:
+ /* Unlock the MMU table and undo mappings. */
mutex_unlock(&mmut->mmu_lock);
kunmap(phys_to_page(pgd));
kunmap(as_page(new_phys));
kunmap(as_page(old_phys));
- /* Reset the GPU because of an unrecoverable error in locking or flushing. */
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
-
return ret;
}
@@ -3329,7 +3540,6 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
u64 *pgd_page_buffer = NULL;
- bool page_is_isolated = false;
struct page *p = phys_to_page(pgd);
lockdep_assert_held(&mmut->mmu_lock);
@@ -3342,7 +3552,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
/* Copy the page to our preallocated buffer so that we can minimize
* kmap_atomic usage
*/
- pgd_page_buffer = mmut->mmu_teardown_pages[level];
+ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
}
@@ -3370,41 +3580,27 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
}
}
- /* Top level PGD page is excluded from migration process. */
- if (level != MIDGARD_MMU_TOPLEVEL)
- page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
-
- if (likely(!page_is_isolated))
- kbase_mmu_free_pgd(kbdev, mmut, pgd);
+ kbase_mmu_free_pgd(kbdev, mmut, pgd);
}
int kbase_mmu_init(struct kbase_device *const kbdev,
struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
int const group_id)
{
- int level;
-
if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
WARN_ON(group_id < 0))
return -EINVAL;
+ compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
+ "List of free PGDs may not be large enough.");
+ compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
+ "Array of MMU levels is not large enough.");
+
mmut->group_id = group_id;
mutex_init(&mmut->mmu_lock);
mmut->kctx = kctx;
mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
- /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */
- for (level = MIDGARD_MMU_TOPLEVEL;
- level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
- mmut->mmu_teardown_pages[level] =
- kmalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (!mmut->mmu_teardown_pages[level]) {
- kbase_mmu_term(kbdev, mmut);
- return -ENOMEM;
- }
- }
-
/* We allocate pages into the kbdev memory pool, then
* kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
* avoid allocations from the kernel happening with the lock held.
@@ -3414,7 +3610,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[mmut->group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
if (err) {
kbase_mmu_term(kbdev, mmut);
return -ENOMEM;
@@ -3430,8 +3626,6 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
- int level;
-
WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
"kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
mmut->kctx->tgid, mmut->kctx->id);
@@ -3445,13 +3639,6 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
}
- for (level = MIDGARD_MMU_TOPLEVEL;
- level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
- if (!mmut->mmu_teardown_pages[level])
- break;
- kfree(mmut->mmu_teardown_pages[level]);
- }
-
mutex_destroy(&mmut->mmu_lock);
}
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 2b3e6c0..49b42e0 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -152,21 +152,71 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
struct tagged_addr phy, unsigned long flags, int level, int group_id);
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
unsigned long flags, int group_id, u64 *dirty_pgds,
struct kbase_va_region *reg, bool ignore_page_migration);
int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
struct kbase_va_region *reg, bool ignore_page_migration);
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg);
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys,
+ size_t nr, unsigned long flags, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool ignore_page_migration);
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
+/**
+ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev: Pointer to kbase device.
+ * @mmut: Pointer to GPU MMU page table.
+ * @vpfn: Start page frame number of the GPU virtual pages to unmap.
+ * @phys: Array of physical pages currently mapped to the virtual
+ * pages to unmap, or NULL. This is used for GPU cache maintenance
+ * and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr, int as_nr,
- bool ignore_page_migration);
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr, bool ignore_page_migration);
+
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags, int const group_id);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 68bc697..1a892dc 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_mem.h>
+#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu_hw.h>
#include <tl/mali_kbase_tracepoints.h>
#include <linux/delay.h>
@@ -156,37 +157,60 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
return 0;
}
-static int wait_ready(struct kbase_device *kbdev,
- unsigned int as_nr)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev: Kbase device to wait for a MMU command to complete.
+ * @as_nr: Address space to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command fails.
+ *
+ * Return: 0 on successful completion. negative error on failure.
+ */
+static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
{
- u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+ s64 diff;
- /* Wait for the MMU status to indicate there is no active command. */
- while (--max_loops &&
- kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
- AS_STATUS_AS_ACTIVE) {
- ;
- }
+ if (unlikely(kbdev->as[as_nr].is_unresponsive))
+ return -EBUSY;
- if (WARN_ON_ONCE(max_loops == 0)) {
- dev_err(kbdev->dev,
- "AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector",
- as_nr);
- return -1;
- }
+ do {
+ unsigned int i;
- return 0;
+ for (i = 0; i < 1000; i++) {
+ /* Wait for the MMU status to indicate there is no active command */
+ if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+ AS_STATUS_AS_ACTIVE))
+ return 0;
+ }
+
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while (diff < mmu_as_inactive_wait_time_ms);
+
+ dev_err(kbdev->dev,
+ "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
+ as_nr);
+ kbdev->as[as_nr].is_unresponsive = true;
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
+
+ return -ETIMEDOUT;
}
static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
{
- int status;
-
/* write AS_COMMAND when MMU is ready to accept another command */
- status = wait_ready(kbdev, as_nr);
- if (status == 0)
+ const int status = wait_ready(kbdev, as_nr);
+
+ if (likely(status == 0))
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
- else {
+ else if (status == -EBUSY) {
+ dev_dbg(kbdev->dev,
+ "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u",
+ as_nr, cmd);
+ } else {
dev_err(kbdev->dev,
"Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u",
as_nr, cmd);
@@ -259,17 +283,21 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c
/* Wait for the LOCK MMU command to complete, issued by the caller */
ret = wait_ready(kbdev, as_nr);
- if (ret)
+ if (unlikely(ret))
return ret;
ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
GPU_COMMAND_CACHE_CLN_INV_LSC);
- if (ret)
+ if (unlikely(ret))
return ret;
ret = wait_cores_power_trans_complete(kbdev);
- if (ret)
+ if (unlikely(ret)) {
+ if (kbase_prepare_to_reset_gpu_locked(kbdev,
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
return ret;
+ }
/* As LSC is guaranteed to have been flushed we can use FLUSH_PT
* MMU command to only flush the L2.
@@ -397,12 +425,21 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a
ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param);
- if (!ret)
- write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+ if (likely(!ret))
+ ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
return ret;
}
+/**
+ * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion.
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @as: Address space to issue the MMU operation on.
+ * @op_param: Pointer to a struct containing information about the MMU operation.
+ *
+ * Return: 0 if issuing the LOCK command was successful, otherwise an error code.
+ */
static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
@@ -443,10 +480,10 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
/* Wait for UNLOCK command to complete */
- if (!ret)
+ if (likely(!ret))
ret = wait_ready(kbdev, as->number);
- if (!ret) {
+ if (likely(!ret)) {
u64 lock_addr = 0x0;
/* read MMU_AS_CONTROL.LOCKADDR register */
lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
@@ -478,6 +515,16 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
return ret;
}
+/**
+ * mmu_hw_do_flush - Flush MMU and wait for its completion.
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @as: Address space to issue the MMU operation on.
+ * @op_param: Pointer to a struct containing information about the MMU operation.
+ * @hwaccess_locked: Flag to indicate if the lock has been held.
+ *
+ * Return: 0 if flushing MMU was successful, otherwise an error code.
+ */
static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
{
@@ -508,12 +555,9 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
return ret;
#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
- /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here
- * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is
- * supported, and this function doesn't gets called for the GPUs where
- * FLUSH_MEM/PT command is deprecated.
- */
- if (mmu_cmd == AS_COMMAND_FLUSH_MEM) {
+ /* WA for the BASE_HW_ISSUE_GPU2019_3901. */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) &&
+ mmu_cmd == AS_COMMAND_FLUSH_MEM) {
if (!hwaccess_locked) {
unsigned long flags = 0;
@@ -529,12 +573,13 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
}
#endif
- write_cmd(kbdev, as->number, mmu_cmd);
+ ret = write_cmd(kbdev, as->number, mmu_cmd);
/* Wait for the command to complete */
- ret = wait_ready(kbdev, as->number);
+ if (likely(!ret))
+ ret = wait_ready(kbdev, as->number);
- if (!ret)
+ if (likely(!ret))
mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr,
op_param->mmu_sync_info);
diff --git a/mali_kbase/tests/build.bp b/mali_kbase/tests/build.bp
index 7abae23..5581ba9 100644
--- a/mali_kbase/tests/build.bp
+++ b/mali_kbase/tests/build.bp
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -38,3 +38,9 @@ bob_defaults {
kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"],
},
}
+
+bob_defaults {
+ name: "kernel_unit_tests",
+ add_to_alias: ["unit_tests"],
+ srcs: [".*_unit_test/"],
+}
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index 34d2223..1e636b9 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -10,6 +10,7 @@
*/
#include "linux/mman.h"
+#include <linux/version_compat_defs.h>
#include <mali_kbase.h>
/* mali_kbase_mmap.c
@@ -90,7 +91,6 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
return false;
-
return true;
}
@@ -132,6 +132,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
*info, bool is_shader_code, bool is_same_4gb_page)
{
+#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long length, low_limit, high_limit, gap_start, gap_end;
@@ -225,7 +226,37 @@ check_current:
}
}
}
+#else
+ unsigned long length, high_limit, gap_start, gap_end;
+
+ MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+ /* Adjust search length to account for worst case alignment overhead */
+ length = info->length + info->align_mask;
+ if (length < info->length)
+ return -ENOMEM;
+
+ /*
+ * Adjust search limits by the desired length.
+ * See implementation comment at top of unmapped_area().
+ */
+ gap_end = info->high_limit;
+ if (gap_end < length)
+ return -ENOMEM;
+ high_limit = gap_end - length;
+ if (info->low_limit > high_limit)
+ return -ENOMEM;
+
+ while (true) {
+ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
+ return -ENOMEM;
+ gap_end = mas.last + 1;
+ gap_start = mas.min;
+
+ if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
+ return gap_end;
+ }
+#endif
return -ENOMEM;
}
@@ -242,8 +273,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
struct vm_unmapped_area_info info;
unsigned long align_offset = 0;
unsigned long align_mask = 0;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base);
+ unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr);
+#else
unsigned long high_limit = mm->mmap_base;
unsigned long low_limit = PAGE_SIZE;
+#endif
int cpu_va_bits = BITS_PER_LONG;
int gpu_pc_bits =
kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
@@ -270,6 +306,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
struct kbase_reg_zone *zone =
kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
+
+ /* requested length too big for entire address space */
+ if (len > mmap_end - kbase_mmap_min_addr)
+ return -ENOMEM;
+#endif
/* err on fixed address */
if ((flags & MAP_FIXED) || addr)
@@ -282,7 +325,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
high_limit =
- min_t(unsigned long, mm->mmap_base, same_va_end_addr);
+ min_t(unsigned long, high_limit, same_va_end_addr);
/* If there's enough (> 33 bits) of GPU VA space, align
* to 2MB boundaries.
@@ -359,9 +402,15 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
high_limit < same_va_end_addr) {
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ /* Retry above TASK_UNMAPPED_BASE */
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = min_t(u64, mmap_end, same_va_end_addr);
+#else
/* Retry above mmap_base */
info.low_limit = mm->mmap_base;
info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
+#endif
ret = kbase_unmapped_area_topdown(&info, is_shader_code,
is_same_4gb_page);
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index 09de3f0..20356d6 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,8 +24,6 @@
#include "mali_kbase_tracepoints.h"
#include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-
#include <linux/atomic.h>
#include <linux/file.h>
#include <linux/mutex.h>
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 359d063..ae57006 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,47 @@
#include <uapi/linux/eventpoll.h>
#endif
+static int kbase_unprivileged_global_profiling;
+
+/**
+ * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes
+ *
+ * @val: String containing value to set. Only strings representing positive
+ * integers are accepted as valid; any non-positive integer (including 0)
+ * is rejected.
+ * @kp: Module parameter associated with this method.
+ *
+ * This method can only be used to enable permissions for unprivileged processes,
+ * if they are disabled: for this reason, the only values which are accepted are
+ * strings representing positive integers. Since it's impossible to disable
+ * permissions once they're set, any integer which is non-positive is rejected,
+ * including 0.
+ *
+ * Return: 0 if success, otherwise error code.
+ */
+static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp)
+{
+ int new_val;
+ int ret = kstrtoint(val, 0, &new_val);
+
+ if (ret == 0) {
+ if (new_val < 1)
+ return -EINVAL;
+
+ kbase_unprivileged_global_profiling = 1;
+ }
+
+ return ret;
+}
+
+static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = {
+ .get = param_get_int,
+ .set = kbase_unprivileged_global_profiling_set,
+};
+
+module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops,
+ &kbase_unprivileged_global_profiling, 0600);
+
/* The timeline stream file operations functions. */
static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
size_t size, loff_t *f_pos);
@@ -43,6 +84,15 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
int datasync);
+static bool timeline_is_permitted(void)
+{
+#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE
+ return kbase_unprivileged_global_profiling || perfmon_capable();
+#else
+ return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN);
+#endif
+}
+
/**
* kbasep_timeline_io_packet_pending - check timeline streams for pending
* packets
@@ -328,6 +378,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
};
int err;
+ if (!timeline_is_permitted())
+ return -EPERM;
+
if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK))
return -EINVAL;
@@ -371,7 +424,7 @@ void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev)
if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
return;
- file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev,
+ file = debugfs_create_file("tlstream", 0400, kbdev->mali_debugfs_directory, kbdev,
&kbasep_tlstream_debugfs_fops);
if (IS_ERR_OR_NULL(file))
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index e8a74e9..f62c755 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -87,7 +87,9 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_GPUCMDQUEUE_KICK,
KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
- KBASE_TL_KBASE_DEVICE_HALT_CSG,
+ KBASE_TL_KBASE_DEVICE_HALTING_CSG,
+ KBASE_TL_KBASE_DEVICE_SUSPEND_CSG,
+ KBASE_TL_KBASE_DEVICE_CSG_IDLE,
KBASE_TL_KBASE_NEW_CTX,
KBASE_TL_KBASE_DEL_CTX,
KBASE_TL_KBASE_CTX_ASSIGN_AS,
@@ -98,6 +100,8 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET,
+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION,
+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE,
@@ -116,6 +120,9 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET,
+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START,
+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END,
+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START,
@@ -360,13 +367,21 @@ enum tl_msg_id_obj {
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
"CSG is programmed to a slot", \
"@IIIII", \
- "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \
+ "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
"CSG is deprogrammed from a slot", \
"@II", \
"kbase_device_id,kbase_device_csg_slot_index") \
- TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALT_CSG, \
- "CSG is halted", \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \
+ "CSG is halting", \
+ "@III", \
+ "kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \
+ "CSG is suspended", \
+ "@II", \
+ "kbase_device_id,kbase_device_csg_slot_index") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \
+ "KBase device is notified that CSG is idle.", \
"@II", \
"kbase_device_id,kbase_device_csg_slot_index") \
TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \
@@ -404,11 +419,19 @@ enum tl_msg_id_obj {
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
"KCPU Queue enqueues Wait on Cross Queue Sync Object", \
"@pLII", \
- "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \
+ "kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \
"KCPU Queue enqueues Set on Cross Queue Sync Object", \
"@pL", \
"kcpu_queue,cqs_obj_gpu_addr") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \
+ "KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \
+ "@pLLIII", \
+ "kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \
+ "KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \
+ "@pLLII", \
+ "kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
"KCPU Queue enqueues Map Import", \
"@pL", \
@@ -481,6 +504,18 @@ enum tl_msg_id_obj {
"KCPU Queue executes a Set on Cross Queue Sync Object", \
"@pI", \
"kcpu_queue,execute_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \
+ "KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \
+ "@p", \
+ "kcpu_queue") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \
+ "KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \
+ "@pI", \
+ "kcpu_queue,execute_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \
+ "KCPU Queue executes a Set Operation on Cross Queue Sync Object", \
+ "@pI", \
+ "kcpu_queue,execute_error") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
"KCPU Queue starts a Map Import", \
"@p", \
@@ -2130,7 +2165,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
u32 kbase_device_csg_slot_index,
- u32 kbase_device_csg_slot_resumed
+ u32 kbase_device_csg_slot_resuming
)
{
const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
@@ -2139,7 +2174,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
+ sizeof(kernel_ctx_id)
+ sizeof(gpu_cmdq_grp_handle)
+ sizeof(kbase_device_csg_slot_index)
- + sizeof(kbase_device_csg_slot_resumed)
+ + sizeof(kbase_device_csg_slot_resuming)
;
char *buffer;
unsigned long acq_flags;
@@ -2158,7 +2193,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
pos = kbasep_serialize_bytes(buffer,
- pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed));
+ pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -2190,13 +2225,71 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index,
+ u32 kbase_device_csg_slot_suspending
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kbase_device_id)
+ + sizeof(kbase_device_csg_slot_index)
+ + sizeof(kbase_device_csg_slot_suspending)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_id, sizeof(kbase_device_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kbase_device_id)
+ + sizeof(kbase_device_csg_slot_index)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_id, sizeof(kbase_device_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_csg_slot_index
)
{
- const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG;
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kbase_device_id)
+ sizeof(kbase_device_csg_slot_index)
@@ -2433,16 +2526,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u64 cqs_obj_gpu_addr,
- u32 cqs_obj_compare_value,
- u32 cqs_obj_inherit_error
+ u32 compare_value,
+ u32 inherit_error
)
{
const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
+ sizeof(cqs_obj_gpu_addr)
- + sizeof(cqs_obj_compare_value)
- + sizeof(cqs_obj_inherit_error)
+ + sizeof(compare_value)
+ + sizeof(inherit_error)
;
char *buffer;
unsigned long acq_flags;
@@ -2457,9 +2550,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
pos = kbasep_serialize_bytes(buffer,
pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
pos = kbasep_serialize_bytes(buffer,
- pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+ pos, &compare_value, sizeof(compare_value));
pos = kbasep_serialize_bytes(buffer,
- pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error));
+ pos, &inherit_error, sizeof(inherit_error));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -2491,6 +2584,88 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u64 cqs_obj_gpu_addr,
+ u64 compare_value,
+ u32 condition,
+ u32 data_type,
+ u32 inherit_error
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(cqs_obj_gpu_addr)
+ + sizeof(compare_value)
+ + sizeof(condition)
+ + sizeof(data_type)
+ + sizeof(inherit_error)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &compare_value, sizeof(compare_value));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &condition, sizeof(condition));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &data_type, sizeof(data_type));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &inherit_error, sizeof(inherit_error));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u64 cqs_obj_gpu_addr,
+ u64 value,
+ u32 operation,
+ u32 data_type
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(cqs_obj_gpu_addr)
+ + sizeof(value)
+ + sizeof(operation)
+ + sizeof(data_type)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &value, sizeof(value));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &operation, sizeof(operation));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &data_type, sizeof(data_type));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
struct kbase_tlstream *stream,
const void *kcpu_queue,
@@ -2981,6 +3156,83 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(execute_error)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &execute_error, sizeof(execute_error));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(execute_error)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &execute_error, sizeof(execute_error));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
struct kbase_tlstream *stream,
const void *kcpu_queue
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index 586fe67..f1f4761 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -408,7 +408,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
u32 kbase_device_csg_slot_index,
- u32 kbase_device_csg_slot_resumed
+ u32 kbase_device_csg_slot_resuming
);
void __kbase_tlstream_tl_kbase_device_deprogram_csg(
@@ -417,7 +417,20 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
u32 kbase_device_csg_slot_index
);
-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index,
+ u32 kbase_device_csg_slot_suspending
+);
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index
+);
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_csg_slot_index
@@ -474,8 +487,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u64 cqs_obj_gpu_addr,
- u32 cqs_obj_compare_value,
- u32 cqs_obj_inherit_error
+ u32 compare_value,
+ u32 inherit_error
);
void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
@@ -484,6 +497,25 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
u64 cqs_obj_gpu_addr
);
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u64 cqs_obj_gpu_addr,
+ u64 compare_value,
+ u32 condition,
+ u32 data_type,
+ u32 inherit_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u64 cqs_obj_gpu_addr,
+ u64 value,
+ u32 operation,
+ u32 data_type
+);
+
void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
struct kbase_tlstream *stream,
const void *kcpu_queue,
@@ -593,6 +625,23 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
u32 execute_error
);
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+);
+
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
struct kbase_tlstream *stream,
const void *kcpu_queue
@@ -2026,7 +2075,7 @@ struct kbase_tlstream;
* @kernel_ctx_id: Unique ID for the KBase Context
* @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
* @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
- * @kbase_device_csg_slot_resumed: Whether the csg is being resumed
+ * @kbase_device_csg_slot_resuming: Whether the csg is being resumed
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \
@@ -2035,7 +2084,7 @@ struct kbase_tlstream;
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index, \
- kbase_device_csg_slot_resumed \
+ kbase_device_csg_slot_resuming \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
@@ -2046,7 +2095,7 @@ struct kbase_tlstream;
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index, \
- kbase_device_csg_slot_resumed \
+ kbase_device_csg_slot_resuming \
); \
} while (0)
#else
@@ -2056,7 +2105,7 @@ struct kbase_tlstream;
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index, \
- kbase_device_csg_slot_resumed \
+ kbase_device_csg_slot_resuming \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -2066,7 +2115,7 @@ struct kbase_tlstream;
*
* @kbdev: Kbase device
* @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \
@@ -2093,14 +2142,49 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG - CSG is halted
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting
*
* @kbdev: Kbase device
* @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted
+ * @kbase_device_csg_slot_suspending: Whether the csg is being suspended
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_suspending \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_device_halting_csg( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kbase_device_id, \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_suspending \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_suspending \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \
kbdev, \
kbase_device_id, \
kbase_device_csg_slot_index \
@@ -2108,14 +2192,45 @@ struct kbase_tlstream;
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_device_halt_csg( \
+ __kbase_tlstream_tl_kbase_device_suspend_csg( \
__TL_DISPATCH_STREAM(kbdev, obj), \
kbase_device_id, \
kbase_device_csg_slot_index \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle.
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_device_csg_idle( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \
kbdev, \
kbase_device_id, \
kbase_device_csg_slot_index \
@@ -2373,16 +2488,16 @@ struct kbase_tlstream;
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
* @cqs_obj_gpu_addr: CQS Object GPU pointer
- * @cqs_obj_compare_value: Semaphore value that should be exceeded for the WAIT to pass
- * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ * @compare_value: Semaphore value that should be exceeded for the WAIT to pass
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \
kbdev, \
kcpu_queue, \
cqs_obj_gpu_addr, \
- cqs_obj_compare_value, \
- cqs_obj_inherit_error \
+ compare_value, \
+ inherit_error \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
@@ -2391,8 +2506,8 @@ struct kbase_tlstream;
__TL_DISPATCH_STREAM(kbdev, obj), \
kcpu_queue, \
cqs_obj_gpu_addr, \
- cqs_obj_compare_value, \
- cqs_obj_inherit_error \
+ compare_value, \
+ inherit_error \
); \
} while (0)
#else
@@ -2400,8 +2515,8 @@ struct kbase_tlstream;
kbdev, \
kcpu_queue, \
cqs_obj_gpu_addr, \
- cqs_obj_compare_value, \
- cqs_obj_inherit_error \
+ compare_value, \
+ inherit_error \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -2438,6 +2553,96 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @compare_value: Value that should be compared to semaphore value for the WAIT to pass
+ * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal)
+ * @data_type: Data type of a CQS Object's value
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ cqs_obj_gpu_addr, \
+ compare_value, \
+ condition, \
+ data_type, \
+ inherit_error \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ cqs_obj_gpu_addr, \
+ compare_value, \
+ condition, \
+ data_type, \
+ inherit_error \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ cqs_obj_gpu_addr, \
+ compare_value, \
+ condition, \
+ data_type, \
+ inherit_error \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @value: Value that will be set or added to semaphore
+ * @operation: Operation type performed on semaphore value (SET or ADD)
+ * @data_type: Data type of a CQS Object's value
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ cqs_obj_gpu_addr, \
+ value, \
+ operation, \
+ data_type \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ cqs_obj_gpu_addr, \
+ value, \
+ operation, \
+ data_type \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ cqs_obj_gpu_addr, \
+ value, \
+ operation, \
+ data_type \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
* KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import
*
* @kbdev: Kbase device
@@ -3000,6 +3205,95 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \
+ kbdev, \
+ kcpu_queue \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \
+ kbdev, \
+ kcpu_queue \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ execute_error \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ execute_error \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
* KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import
*
* @kbdev: Kbase device