summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/include/linux/version_compat_defs.h31
-rw-r--r--common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h11
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h240
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h80
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h30
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h6
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h260
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h4
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h231
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_base_kernel.h70
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h5
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h5
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_uk.h70
-rw-r--r--mali_kbase/Kbuild7
-rw-r--r--mali_kbase/Kconfig14
-rw-r--r--mali_kbase/Makefile27
-rw-r--r--mali_kbase/Mconfig19
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_pm.c9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c45
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h16
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c73
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c21
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c69
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c67
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c102
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c319
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.h61
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c29
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h10
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c271
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h33
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_metrics.c124
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c56
-rw-r--r--mali_kbase/build.bp10
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_csf.c4
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_jm.c2
-rw-r--r--mali_kbase/context/mali_kbase_context.c4
-rw-r--r--mali_kbase/csf/Kbuild6
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c65
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h29
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c195
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h6
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.c57
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h203
-rw-r--r--mali_kbase/csf/mali_kbase_csf_event.c2
-rw-r--r--mali_kbase/csf/mali_kbase_csf_event.h4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c342
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h67
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.c19
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.h8
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_log.c294
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_log.h56
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c222
-rw-r--r--mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c10
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c310
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h19
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c8
-rw-r--r--mali_kbase/csf/mali_kbase_csf_registers.h96
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c68
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c959
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h64
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c462
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.h35
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h73
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c11
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.c157
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.h28
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h146
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c8
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h4
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c5
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h144
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace.c8
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace.h14
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace_defs.h6
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c69
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c4
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_jm.c18
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c17
-rw-r--r--mali_kbase/device/mali_kbase_device.c36
-rw-r--r--mali_kbase/device/mali_kbase_device.h20
-rw-r--r--mali_kbase/device/mali_kbase_device_hw.c40
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c5
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h159
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h19
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_regmap.h16
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c2
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c4
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h8
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c39
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.c6
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_debugfs.c7
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_simple.c7
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h13
-rw-r--r--mali_kbase/jm/mali_kbase_jm_js.h42
-rw-r--r--mali_kbase/jm/mali_kbase_js_defs.h2
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h7
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h37
-rw-r--r--mali_kbase/mali_kbase.h41
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h44
-rw-r--r--mali_kbase/mali_kbase_core_linux.c417
-rw-r--r--mali_kbase/mali_kbase_cs_experimental.h8
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c20
-rw-r--r--mali_kbase/mali_kbase_debug.h6
-rw-r--r--mali_kbase/mali_kbase_debug_job_fault.c5
-rw-r--r--mali_kbase/mali_kbase_debug_mem_zones.c116
-rw-r--r--mali_kbase/mali_kbase_debug_mem_zones.h39
-rw-r--r--mali_kbase/mali_kbase_defs.h13
-rw-r--r--mali_kbase/mali_kbase_dma_fence.c6
-rw-r--r--mali_kbase/mali_kbase_dma_fence.h4
-rw-r--r--mali_kbase/mali_kbase_dvfs_debugfs.c6
-rw-r--r--mali_kbase/mali_kbase_fence.h12
-rw-r--r--mali_kbase/mali_kbase_fence_ops.c4
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c220
-rw-r--r--mali_kbase/mali_kbase_hw.c13
-rw-r--r--mali_kbase/mali_kbase_hwaccess_jm.h2
-rw-r--r--mali_kbase/mali_kbase_hwaccess_pm.h4
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf.c79
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if.h12
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c84
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm.c16
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c8
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.c90
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.h39
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu_narrow.c4
-rw-r--r--mali_kbase/mali_kbase_jd.c55
-rw-r--r--mali_kbase/mali_kbase_jd_debugfs.c20
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.c15
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.h4
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.c113
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.h4
-rw-r--r--mali_kbase/mali_kbase_mem.c114
-rw-r--r--mali_kbase/mali_kbase_mem.h6
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c218
-rw-r--r--mali_kbase/mali_kbase_mem_linux.h13
-rw-r--r--mali_kbase/mali_kbase_mem_pool_debugfs.c8
-rw-r--r--mali_kbase/mali_kbase_mem_pool_group.h6
-rw-r--r--mali_kbase/mali_kbase_mem_profile_debugfs.c4
-rw-r--r--mali_kbase/mali_kbase_pbha_debugfs.c8
-rw-r--r--mali_kbase/mali_kbase_pbha_debugfs.h4
-rw-r--r--mali_kbase/mali_kbase_platform_fake.c5
-rw-r--r--mali_kbase/mali_kbase_pm.c12
-rw-r--r--mali_kbase/mali_kbase_regs_history_debugfs.c8
-rw-r--r--mali_kbase/mali_kbase_smc.h4
-rw-r--r--mali_kbase/mali_kbase_softjobs.c28
-rw-r--r--mali_kbase/mali_kbase_sync_android.c2
-rw-r--r--mali_kbase/mali_kbase_sync_file.c9
-rw-r--r--mali_kbase/mali_kbase_vinstr.c16
-rw-r--r--mali_kbase/mali_malisw.h2
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c6
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c4
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c866
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h6
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw.h43
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c257
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c18
-rw-r--r--mali_kbase/platform/meson/Kbuild23
-rw-r--r--mali_kbase/platform/meson/mali_kbase_config_meson.c53
-rw-r--r--mali_kbase/platform/meson/mali_kbase_config_platform.h45
-rw-r--r--mali_kbase/platform/meson/mali_kbase_runtime_pm.c265
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_sscd.c2
-rw-r--r--mali_kbase/tests/Kbuild3
-rw-r--r--mali_kbase/tests/Kconfig14
-rw-r--r--mali_kbase/tests/Mconfig14
-rw-r--r--mali_kbase/tests/kutf/kutf_helpers_user.c4
-rw-r--r--mali_kbase/tests/kutf/kutf_suite.c35
-rw-r--r--mali_kbase/tests/kutf/kutf_utils.c4
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c12
-rw-r--r--mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c2
-rw-r--r--mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild25
-rw-r--r--mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp41
-rw-r--r--mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c210
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.c166
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.h8
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_io.c136
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_priv.h27
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c4
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h4
-rw-r--r--mali_pixel/memory_group_manager.c17
182 files changed, 7735 insertions, 3813 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h
new file mode 100644
index 0000000..a8e0874
--- /dev/null
+++ b/common/include/linux/version_compat_defs.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _VERSION_COMPAT_DEFS_H_
+#define _VERSION_COMPAT_DEFS_H_
+
+#include <linux/version.h>
+
+#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
+typedef unsigned int __poll_t;
+#endif
+
+#endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
index 9d677ca..613eb1f 100644
--- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
+++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,11 +43,18 @@
(KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32))
#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8
#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32
-#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \
+#define KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS 0
+#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \
(1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES)
+#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \
+ (KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS)
#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \
(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \
KBASE_DUMMY_MODEL_COUNTER_PER_CORE)
+#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \
+ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_VALUES_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \
+ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE)
#define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull)
#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull)
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
index 7f7b9dd..3b02350 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -23,99 +23,16 @@
#define _UAPI_BASE_CSF_KERNEL_H_
#include <linux/types.h>
+#include "../mali_base_common_kernel.h"
-/* Memory allocation, access/hint flags.
+/* Memory allocation, access/hint flags & mask specific to CSF GPU.
*
* See base_mem_alloc_flags.
*/
-/* IN */
-/* Read access CPU side
- */
-#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
-
-/* Write access CPU side
- */
-#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
-
-/* Read access GPU side
- */
-#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
-
-/* Write access GPU side
- */
-#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
-
-/* Execute allowed on the GPU side
- */
-#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
-
-/* Will be permanently mapped in kernel space.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
-
-/* The allocation will completely reside within the same 4GB chunk in the GPU
- * virtual space.
- * Since this flag is primarily required only for the TLS memory which will
- * not be used to contain executable code and also not used for Tiler heap,
- * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
- */
-#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
-
-/* Userspace is not allowed to free this memory.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
-
/* Must be FIXED memory. */
#define BASE_MEM_FIXED ((base_mem_alloc_flags)1 << 8)
-/* Grow backing store on GPU Page Fault
- */
-#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
-
-/* Page coherence Outer shareable, if available
- */
-#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
-
-/* Page coherence Inner shareable
- */
-#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
-
-/* IN/OUT */
-/* Should be cached on the CPU, returned if actually cached
- */
-#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
-
-/* IN/OUT */
-/* Must have same VA on both the GPU and the CPU
- */
-#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
-
-/* OUT */
-/* Must call mmap to acquire a GPU address for the alloc
- */
-#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
-
-/* IN */
-/* Page coherence Outer shareable, required.
- */
-#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
-
-/* Protected memory
- */
-#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
-
-/* Not needed physical memory
- */
-#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
-
-/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
- * addresses to be the same
- */
-#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
-
/* CSF event memory
*
* If Outer shareable coherence is not specified or not available, then on
@@ -131,46 +48,15 @@
#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20)
-/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
- * mode. Some components within the GPU might only be able to access memory
- * that is GPU cacheable. Refer to the specific GPU implementation for more
- * details. The 3 shareability flags will be ignored for GPU uncached memory.
- * If used while importing USER_BUFFER type memory, then the import will fail
- * if the memory is not aligned to GPU and CPU cache line width.
- */
-#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
-
-/*
- * Bits [22:25] for group_id (0~15).
- *
- * base_mem_group_id_set() should be used to pack a memory group ID into a
- * base_mem_alloc_flags value instead of accessing the bits directly.
- * base_mem_group_id_get() should be used to extract the memory group ID from
- * a base_mem_alloc_flags value.
- */
-#define BASEP_MEM_GROUP_ID_SHIFT 22
-#define BASE_MEM_GROUP_ID_MASK \
- ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
-
-/* Must do CPU cache maintenance when imported memory is mapped/unmapped
- * on GPU. Currently applicable to dma-buf type only.
- */
-#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
-
-/* OUT */
-/* Kernel side cache sync ops required */
-#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
/* Must be FIXABLE memory: its GPU VA will be determined at a later point,
* at which time it will be at a fixed GPU VA.
*/
#define BASE_MEM_FIXABLE ((base_mem_alloc_flags)1 << 29)
-/* Number of bits used as flags for base memory management
- *
- * Must be kept in sync with the base_mem_alloc_flags flags
+/* Note that the number of bits used for base_mem_alloc_flags
+ * must be less than BASE_MEM_FLAGS_NR_BITS !!!
*/
-#define BASE_MEM_FLAGS_NR_BITS 30
/* A mask of all the flags which are only valid for allocations within kbase,
* and may not be passed from user space.
@@ -178,62 +64,23 @@
#define BASEP_MEM_FLAGS_KERNEL_ONLY \
(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
-/* A mask for all output bits, excluding IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
-
-/* A mask for all input bits, including IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_INPUT_MASK \
- (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
-
/* A mask of all currently reserved flags
*/
#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20
-#define BASEP_MEM_INVALID_HANDLE (0ul)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
-/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+/* Special base mem handles specific to CSF.
+ */
#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT)
#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_FIRST_FREE_ADDRESS \
- ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \
((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \
LOCAL_PAGE_SHIFT)
-/**
- * Valid set of just-in-time memory allocation flags
- */
+/* Valid set of just-in-time memory allocation flags */
#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0)
-/* Flags to pass to ::base_context_init.
- * Flags can be ORed together to enable multiple things.
- *
- * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
- * not collide with them.
- */
-typedef __u32 base_context_create_flags;
-
-/* No flags set */
-#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
-
-/* Base context is embedded in a cctx object (flag used for CINSTR
- * software counter macros)
- */
-#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
-
-/* Base context is a 'System Monitor' context for Hardware counters.
- *
- * One important side effect of this is that job submission is disabled.
- */
-#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
- ((base_context_create_flags)1 << 1)
+/* flags for base context specific to CSF */
/* Base context creates a CSF event notification thread.
*
@@ -242,22 +89,6 @@ typedef __u32 base_context_create_flags;
*/
#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2)
-/* Bit-shift used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
-
-/* Bitmask used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
- ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
-
-/* Bitpattern describing the base_context_create_flags that can be
- * passed to the kernel
- */
-#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
- (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
- BASEP_CONTEXT_MMU_GROUP_ID_MASK)
-
/* Bitpattern describing the ::base_context_create_flags that can be
* passed to base_context_init()
*/
@@ -266,15 +97,7 @@ typedef __u32 base_context_create_flags;
BASE_CONTEXT_CSF_EVENT_THREAD | \
BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
-/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
- * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
- */
-#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
-
-/* Indicate that job dumping is enabled. This could affect certain timers
- * to account for the performance impact.
- */
-#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+/* Flags for base tracepoint specific to CSF */
/* Enable KBase tracepoints for CSF builds */
#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2)
@@ -306,6 +129,10 @@ typedef __u32 base_context_create_flags;
*/
#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
+/* CSF CSI EXCEPTION_HANDLER_FLAGS */
+#define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0)
+#define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG)
+
/**
* enum base_kcpu_command_type - Kernel CPU queue command type.
* @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal,
@@ -725,4 +552,45 @@ struct base_csf_notification {
} payload;
};
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ *
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ * 0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ * 4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ * release number.
+ * 8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ * release number.
+ * 4 bit values (0-15).
+ * @padding: padding to align to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ * clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ * is a bitpattern where a set bit indicates that the format is supported.
+ * Before using a texture format, it is recommended that the corresponding
+ * bit be checked.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ * It is unlikely that a client will be able to allocate all of this memory
+ * for their own purposes, but this at least provides an upper bound on the
+ * memory available to the GPU.
+ * This is required for OpenCL's clGetDeviceInfo() call when
+ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ * client will not be expecting to allocate anywhere near this value.
+ */
+struct mali_base_gpu_core_props {
+ __u32 product_id;
+ __u16 version_status;
+ __u16 minor_revision;
+ __u16 major_revision;
+ __u16 padding;
+ __u32 gpu_freq_khz_max;
+ __u32 log2_program_counter_size;
+ __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+ __u64 gpu_available_memory_size;
+};
+
#endif /* _UAPI_BASE_CSF_KERNEL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
index 1794ddc..cbb7310 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -56,10 +56,18 @@
* - Added new Base memory allocation interface
* 1.10:
* - First release of new HW performance counters interface.
+ * 1.11:
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
+ * 1.12:
+ * - Added support for incremental rendering flag in CSG create call
+ * 1.13:
+ * - Added ioctl to query a register of USER page.
+ * 1.14:
+ * - Added support for passing down the buffer descriptor VA in tiler heap init
*/
#define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 10
+#define BASE_UK_VERSION_MINOR 14
/**
* struct kbase_ioctl_version_check - Check version compatibility between
@@ -245,6 +253,9 @@ union kbase_ioctl_cs_queue_group_create_1_6 {
* allowed to use.
* @in.compute_max: Maximum number of compute endpoints the group is allowed
* to use.
+ * @in.csi_handlers: Flags to signal that the application intends to use CSI
+ * exception handlers in some linear buffers to deal with
+ * the given exception types.
* @in.padding: Currently unused, must be zero
* @out: Output parameters
* @out.group_handle: Handle of a newly created queue group.
@@ -261,9 +272,10 @@ union kbase_ioctl_cs_queue_group_create {
__u8 tiler_max;
__u8 fragment_max;
__u8 compute_max;
- __u8 padding[3];
+ __u8 csi_handlers;
+ __u8 padding[2];
/**
- * @reserved: Reserved
+ * @in.reserved: Reserved
*/
__u64 reserved;
} in;
@@ -353,6 +365,7 @@ struct kbase_ioctl_kcpu_queue_enqueue {
* allowed.
* @in.group_id: Group ID to be used for physical allocations.
* @in.padding: Padding
+ * @in.buf_desc_va: Buffer descriptor GPU VA for tiler heap reclaims.
* @out: Output parameters
* @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up
* for the heap.
@@ -368,6 +381,7 @@ union kbase_ioctl_cs_tiler_heap_init {
__u16 target_in_flight;
__u8 group_id;
__u8 padding;
+ __u64 buf_desc_va;
} in;
struct {
__u64 gpu_heap_va;
@@ -379,6 +393,43 @@ union kbase_ioctl_cs_tiler_heap_init {
_IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init)
/**
+ * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap,
+ * earlier version upto 1.13
+ * @in: Input parameters
+ * @in.chunk_size: Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks: Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ * keep in flight for which allocation of new chunks is
+ * allowed.
+ * @in.group_id: Group ID to be used for physical allocations.
+ * @in.padding: Padding
+ * @out: Output parameters
+ * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up
+ * for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ * actually points to the header of heap chunk and not to
+ * the low address of free memory in the chunk.
+ */
+union kbase_ioctl_cs_tiler_heap_init_1_13 {
+ struct {
+ __u32 chunk_size;
+ __u32 initial_chunks;
+ __u32 max_chunks;
+ __u16 target_in_flight;
+ __u8 group_id;
+ __u8 padding;
+ } in;
+ struct {
+ __u64 gpu_heap_va;
+ __u64 first_chunk_va;
+ } out;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \
+ _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13)
+
+/**
* struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap
* instance
*
@@ -479,6 +530,29 @@ union kbase_ioctl_mem_alloc_ex {
#define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex)
+/**
+ * union kbase_ioctl_read_user_page - Read a register of USER page
+ *
+ * @in: Input parameters.
+ * @in.offset: Register offset in USER page.
+ * @in.padding: Padding to round up to a multiple of 8 bytes, must be zero.
+ * @out: Output parameters.
+ * @out.val_lo: Value of 32bit register or the 1st half of 64bit register to be read.
+ * @out.val_hi: Value of the 2nd half of 64bit register to be read.
+ */
+union kbase_ioctl_read_user_page {
+ struct {
+ __u32 offset;
+ __u32 padding;
+ } in;
+ struct {
+ __u32 val_lo;
+ __u32 val_hi;
+ } out;
+};
+
+#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page)
+
/***************
* test ioctls *
***************/
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
new file mode 100644
index 0000000..75ae6a1
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
+#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
+
+/* IPA control registers */
+#define IPA_CONTROL_BASE 0x40000
+#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r))
+#define STATUS 0x004 /* (RO) Status register */
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
index deca665..ebe3b3e 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,7 +22,9 @@
#ifndef _UAPI_KBASE_GPU_REGMAP_H_
#define _UAPI_KBASE_GPU_REGMAP_H_
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
#include "backend/mali_kbase_gpu_regmap_jm.h"
#endif /* !MALI_USE_CSF */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
index b63575e..ae43908 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
@@ -23,100 +23,16 @@
#define _UAPI_BASE_JM_KERNEL_H_
#include <linux/types.h>
+#include "../mali_base_common_kernel.h"
-/* Memory allocation, access/hint flags.
+/* Memory allocation, access/hint flags & mask specific to JM GPU.
*
* See base_mem_alloc_flags.
*/
-/* IN */
-/* Read access CPU side
- */
-#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
-
-/* Write access CPU side
- */
-#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
-
-/* Read access GPU side
- */
-#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
-
-/* Write access GPU side
- */
-#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
-
-/* Execute allowed on the GPU side
- */
-#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
-
-/* Will be permanently mapped in kernel space.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
-
-/* The allocation will completely reside within the same 4GB chunk in the GPU
- * virtual space.
- * Since this flag is primarily required only for the TLS memory which will
- * not be used to contain executable code and also not used for Tiler heap,
- * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
- */
-#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
-
-/* Userspace is not allowed to free this memory.
- * Flag is only allowed on allocations originating from kbase.
- */
-#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
-
-/* Used as BASE_MEM_FIXED in other backends
- */
+/* Used as BASE_MEM_FIXED in other backends */
#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
-/* Grow backing store on GPU Page Fault
- */
-#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
-
-/* Page coherence Outer shareable, if available
- */
-#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
-
-/* Page coherence Inner shareable
- */
-#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
-
-/* IN/OUT */
-/* Should be cached on the CPU, returned if actually cached
- */
-#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
-
-/* IN/OUT */
-/* Must have same VA on both the GPU and the CPU
- */
-#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
-
-/* OUT */
-/* Must call mmap to acquire a GPU address for the allocation
- */
-#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
-
-/* IN */
-/* Page coherence Outer shareable, required.
- */
-#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
-
-/* Protected memory
- */
-#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
-
-/* Not needed physical memory
- */
-#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
-
-/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
- * addresses to be the same
- */
-#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
-
/**
* BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved.
*
@@ -131,47 +47,15 @@
*/
#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
-/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
- * mode. Some components within the GPU might only be able to access memory
- * that is GPU cacheable. Refer to the specific GPU implementation for more
- * details. The 3 shareability flags will be ignored for GPU uncached memory.
- * If used while importing USER_BUFFER type memory, then the import will fail
- * if the memory is not aligned to GPU and CPU cache line width.
- */
-#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
-
-/*
- * Bits [22:25] for group_id (0~15).
- *
- * base_mem_group_id_set() should be used to pack a memory group ID into a
- * base_mem_alloc_flags value instead of accessing the bits directly.
- * base_mem_group_id_get() should be used to extract the memory group ID from
- * a base_mem_alloc_flags value.
- */
-#define BASEP_MEM_GROUP_ID_SHIFT 22
-#define BASE_MEM_GROUP_ID_MASK \
- ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
-
-/* Must do CPU cache maintenance when imported memory is mapped/unmapped
- * on GPU. Currently applicable to dma-buf type only.
- */
-#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
-
/* Use the GPU VA chosen by the kernel client */
#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
-/* OUT */
-/* Kernel side cache sync ops required */
-#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
-
/* Force trimming of JIT allocations when creating a new allocation */
#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
-/* Number of bits used as flags for base memory management
- *
- * Must be kept in sync with the base_mem_alloc_flags flags
+/* Note that the number of bits used for base_mem_alloc_flags
+ * must be less than BASE_MEM_FLAGS_NR_BITS !!!
*/
-#define BASE_MEM_FLAGS_NR_BITS 30
/* A mask of all the flags which are only valid for allocations within kbase,
* and may not be passed from user space.
@@ -180,29 +64,11 @@
(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM)
-/* A mask for all output bits, excluding IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
-
-/* A mask for all input bits, including IN/OUT bits.
- */
-#define BASE_MEM_FLAGS_INPUT_MASK \
- (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
-
/* A mask of all currently reserved flags
*/
#define BASE_MEM_FLAGS_RESERVED \
(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
-#define BASEP_MEM_INVALID_HANDLE (0ul)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
-/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
-#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
-#define BASE_MEM_FIRST_FREE_ADDRESS \
- ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
* initial commit is aligned to 'extension' pages, where 'extension' must be a power
@@ -227,47 +93,6 @@
#define BASE_JIT_ALLOC_VALID_FLAGS \
(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
-/**
- * typedef base_context_create_flags - Flags to pass to ::base_context_init.
- *
- * Flags can be ORed together to enable multiple things.
- *
- * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
- * not collide with them.
- */
-typedef __u32 base_context_create_flags;
-
-/* No flags set */
-#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
-
-/* Base context is embedded in a cctx object (flag used for CINSTR
- * software counter macros)
- */
-#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
-
-/* Base context is a 'System Monitor' context for Hardware counters.
- *
- * One important side effect of this is that job submission is disabled.
- */
-#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
- ((base_context_create_flags)1 << 1)
-
-/* Bit-shift used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
-
-/* Bitmask used to encode a memory group ID in base_context_create_flags
- */
-#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
- ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
-
-/* Bitpattern describing the base_context_create_flags that can be
- * passed to the kernel
- */
-#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
- (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
- BASEP_CONTEXT_MMU_GROUP_ID_MASK)
-
/* Bitpattern describing the ::base_context_create_flags that can be
* passed to base_context_init()
*/
@@ -287,16 +112,7 @@ typedef __u32 base_context_create_flags;
#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
((base_context_create_flags)(1 << 31))
-/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
- * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
- */
-#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
-
-/* Indicate that job dumping is enabled. This could affect certain timers
- * to account for the performance impact.
- */
-#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
-
+/* Flags for base tracepoint specific to JM */
#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
BASE_TLSTREAM_JOB_DUMPING_ENABLED)
/*
@@ -509,9 +325,6 @@ typedef __u32 base_jd_core_req;
* takes priority
*
* This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
- *
- * If the core availability policy is keeping the required core group turned
- * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
*/
#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
@@ -1019,11 +832,6 @@ enum {
* BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the
* platform doesn't support the feature specified in
* the atom.
- * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used
- * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used
- * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used
- * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used
- * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used
* @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate
* to userspace that the KBase context has been
* destroyed and Base should stop listening for
@@ -1118,17 +926,10 @@ enum base_jd_event_code {
/* SW defined exceptions */
BASE_JD_EVENT_MEM_GROWTH_FAILED =
BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_TIMED_OUT =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
BASE_JD_EVENT_JOB_CANCELLED =
BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
BASE_JD_EVENT_JOB_INVALID =
BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
- BASE_JD_EVENT_PM_EVENT =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
-
- BASE_JD_EVENT_BAG_INVALID =
- BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT |
BASE_JD_SW_EVENT_RESERVED | 0x3FF,
@@ -1136,10 +937,6 @@ enum base_jd_event_code {
BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT |
BASE_JD_SW_EVENT_SUCCESS | 0x000,
- BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT |
- BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
- BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS |
- BASE_JD_SW_EVENT_BAG | 0x000,
BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT |
BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
@@ -1206,4 +1003,49 @@ struct base_dump_cpu_gpu_counters {
__u8 padding[36];
};
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ *
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ * 0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ * 4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ * release number.
+ * 8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ * release number.
+ * 4 bit values (0-15).
+ * @padding: padding to align to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ * clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ * is a bitpattern where a set bit indicates that the format is supported.
+ * Before using a texture format, it is recommended that the corresponding
+ * bit be checked.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ * It is unlikely that a client will be able to allocate all of this memory
+ * for their own purposes, but this at least provides an upper bound on the
+ * memory available to the GPU.
+ * This is required for OpenCL's clGetDeviceInfo() call when
+ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ * client will not be expecting to allocate anywhere near this value.
+ * @num_exec_engines: The number of execution engines. Only valid for tGOX
+ * (Bifrost) GPUs, where GPU_HAS_REG_CORE_FEATURES is defined. Otherwise,
+ * this is always 0.
+ */
+struct mali_base_gpu_core_props {
+ __u32 product_id;
+ __u16 version_status;
+ __u16 minor_revision;
+ __u16 major_revision;
+ __u16 padding;
+ __u32 gpu_freq_khz_max;
+ __u32 log2_program_counter_size;
+ __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+ __u64 gpu_available_memory_size;
+ __u8 num_exec_engines;
+};
+
#endif /* _UAPI_BASE_JM_KERNEL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
index 215f12d..20d931a 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -125,9 +125,11 @@
* - Removed Kernel legacy HWC interface
* 11.34:
* - First release of new HW performance counters interface.
+ * 11.35:
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 34
+#define BASE_UK_VERSION_MINOR 35
/**
* struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h
new file mode 100644
index 0000000..f837814
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_COMMON_KERNEL_H_
+#define _UAPI_BASE_COMMON_KERNEL_H_
+
+#include <linux/types.h>
+
+struct base_mem_handle {
+ struct {
+ __u64 handle;
+ } basep;
+};
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+/* Memory allocation, access/hint flags & mask.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* IN/OUT */
+/* Should be cached on the CPU, returned if actually cached
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the allocation
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Protected memory
+ */
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
+ * mode. Some components within the GPU might only be able to access memory
+ * that is GPU cacheable. Refer to the specific GPU implementation for more
+ * details. The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/* Must do CPU cache maintenance when imported memory is mapped/unmapped
+ * on GPU. Currently applicable to dma-buf type only.
+ */
+#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
+
+/* OUT */
+/* Kernel side cache sync ops required */
+#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 30
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* Special base mem handles.
+ */
+#define BASEP_MEM_INVALID_HANDLE (0ul)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
+
+/* Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef __u32 base_context_create_flags;
+
+/* Flags for base context */
+
+/* No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/* Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/* Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED ((base_context_create_flags)1 << 1)
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+ ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+ (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Flags for base tracepoint
+ */
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
+ */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact.
+ */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#endif /* _UAPI_BASE_COMMON_KERNEL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
index f3ffb36..6adbd81 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
@@ -27,19 +27,10 @@
#define _UAPI_BASE_KERNEL_H_
#include <linux/types.h>
-
-struct base_mem_handle {
- struct {
- __u64 handle;
- } basep;
-};
-
#include "mali_base_mem_priv.h"
#include "gpu/mali_kbase_gpu_id.h"
#include "gpu/mali_kbase_gpu_coherency.h"
-#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
-
#define BASE_MAX_COHERENT_GROUPS 16
#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
@@ -458,49 +449,6 @@ struct base_jd_debug_copy_buffer {
* 16 coherent groups, since core groups are typically 4 cores.
*/
-/**
- * struct mali_base_gpu_core_props - GPU core props info
- *
- * @product_id: Pro specific value.
- * @version_status: Status of the GPU release. No defined values, but starts at
- * 0 and increases by one for each release status (alpha, beta, EAC, etc.).
- * 4 bit values (0-15).
- * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
- * release number.
- * 8 bit values (0-255).
- * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
- * release number.
- * 4 bit values (0-15).
- * @padding: padding to allign to 8-byte
- * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
- * clGetDeviceInfo()
- * @log2_program_counter_size: Size of the shader program counter, in bits.
- * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
- * is a bitpattern where a set bit indicates that the format is supported.
- * Before using a texture format, it is recommended that the corresponding
- * bit be checked.
- * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
- * It is unlikely that a client will be able to allocate all of this memory
- * for their own purposes, but this at least provides an upper bound on the
- * memory available to the GPU.
- * This is required for OpenCL's clGetDeviceInfo() call when
- * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
- * client will not be expecting to allocate anywhere near this value.
- * @num_exec_engines: The number of execution engines.
- */
-struct mali_base_gpu_core_props {
- __u32 product_id;
- __u16 version_status;
- __u16 minor_revision;
- __u16 major_revision;
- __u16 padding;
- __u32 gpu_freq_khz_max;
- __u32 log2_program_counter_size;
- __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
- __u64 gpu_available_memory_size;
- __u8 num_exec_engines;
-};
-
/*
* More information is possible - but associativity and bus width are not
* required by upper-level apis.
@@ -531,7 +479,7 @@ struct mali_base_gpu_tiler_props {
* field.
* @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA,
* 3 = SW Model/Emulation
- * @padding: padding to allign to 8-byte
+ * @padding: padding to align to 8-byte
* @tls_alloc: Number of threads per core that TLS must be
* allocated for
*/
@@ -551,7 +499,7 @@ struct mali_base_gpu_thread_props {
* struct mali_base_gpu_coherent_group - descriptor for a coherent group
* @core_mask: Core restriction mask required for the group
* @num_cores: Number of cores in the group
- * @padding: padding to allign to 8-byte
+ * @padding: padding to align to 8-byte
*
* \c core_mask exposes all cores in that coherent group, and \c num_cores
* provides a cached population-count for that mask.
@@ -581,7 +529,7 @@ struct mali_base_gpu_coherent_group {
* are in the group[] member. Use num_groups instead.
* @coherency: Coherency features of the memory, accessed by gpu_mem_features
* methods
- * @padding: padding to allign to 8-byte
+ * @padding: padding to align to 8-byte
* @group: Descriptors of coherent groups
*
* Note that the sizes of the members could be reduced. However, the \c group
@@ -599,6 +547,12 @@ struct mali_base_gpu_coherent_group_info {
struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
};
+#if MALI_USE_CSF
+#include "csf/mali_base_csf_kernel.h"
+#else
+#include "jm/mali_base_jm_kernel.h"
+#endif
+
/**
* struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
* Configuration Discovery registers.
@@ -696,12 +650,6 @@ struct base_gpu_props {
struct mali_base_gpu_coherent_group_info coherency_info;
};
-#if MALI_USE_CSF
-#include "csf/mali_base_csf_kernel.h"
-#else
-#include "jm/mali_base_jm_kernel.h"
-#endif
-
#define BASE_MEM_GROUP_ID_GET(flags) \
((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT)
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
index 304a334..70f5b09 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,7 @@
#define _UAPI_BASE_MEM_PRIV_H_
#include <linux/types.h>
-
-#include "mali_base_kernel.h"
+#include "mali_base_common_kernel.h"
#define BASE_SYNCSET_OP_MSYNC (1U << 0)
#define BASE_SYNCSET_OP_CSYNC (1U << 1)
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
index d1d5f3d..73d54e9 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -563,7 +563,8 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
_IO(KBASE_IOCTL_TYPE, 34)
/**
- * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
+ * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault
+ * addresses.
* @in: Input parameters
* @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas.
* @in.size_buffer: Address of buffer to hold size of modified areas (in pages)
@@ -683,7 +684,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info {
_IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info)
/**
- * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * struct kbase_ioctl_kinstr_prfcnt_setup - Setup HWC dumper/reader
* @in: input parameters.
* @in.request_item_count: Number of requests in the requests array.
* @in.request_item_size: Size in bytes of each request in the requests array.
diff --git a/common/include/uapi/gpu/arm/midgard/mali_uk.h b/common/include/uapi/gpu/arm/midgard/mali_uk.h
deleted file mode 100644
index 78946f6..0000000
--- a/common/include/uapi/gpu/arm/midgard/mali_uk.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/**
- * DOC: Types and definitions that are common across OSs for both the user
- * and kernel side of the User-Kernel interface.
- */
-
-#ifndef _UAPI_UK_H_
-#define _UAPI_UK_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * DOC: uk_api User-Kernel Interface API
- *
- * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
- * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
- *
- * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
- * kernel-side API (UKK) via an OS-specific communication mechanism.
- *
- * This API is internal to the Midgard DDK and is not exposed to any applications.
- *
- */
-
-/**
- * enum uk_client_id - These are identifiers for kernel-side drivers
- * implementing a UK interface, aka UKK clients.
- * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client.
- * @UK_CLIENT_COUNT: The number of uk clients supported. This must be
- * the last member of the enum
- *
- * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
- * identifier to select a UKK client to the uku_open() function.
- *
- * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
- * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
- * provide a mapping of the identifier to the OS specific device name.
- *
- */
-enum uk_client_id {
- UK_CLIENT_MALI_T600_BASE,
- UK_CLIENT_COUNT
-};
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* _UAPI_UK_H_ */
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index ca02444..81b6e93 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -71,7 +71,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r36p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r38p1-01eac0"'
# We are building for Pixel
CONFIG_MALI_PLATFORM_NAME="pixel"
@@ -116,7 +116,7 @@ endif
#
# Experimental features must default to disabled, e.g.:
# MALI_EXPERIMENTAL_FEATURE ?= 0
-MALI_INCREMENTAL_RENDERING ?= 0
+MALI_INCREMENTAL_RENDERING_JM ?= 0
#
# ccflags
@@ -129,7 +129,7 @@ ccflags-y += \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
-DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \
-DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
- -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \
+ -DMALI_INCREMENTAL_RENDERING_JM=$(MALI_INCREMENTAL_RENDERING_JM) \
-DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR)
ifeq ($(KBUILD_EXTMOD),)
@@ -187,6 +187,7 @@ mali_kbase-y := \
mali_kbase_mem_profile_debugfs.o \
mali_kbase_disjoint_events.o \
mali_kbase_debug_mem_view.o \
+ mali_kbase_debug_mem_zones.o \
mali_kbase_smc.o \
mali_kbase_mem_pool.o \
mali_kbase_mem_pool_debugfs.o \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index ef16a7d..de27ae4 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -217,6 +217,20 @@ config MALI_GEM5_BUILD
comment "Debug options"
depends on MALI_MIDGARD && MALI_EXPERT
+config MALI_FW_CORE_DUMP
+ bool "Enable support for FW core dump"
+ depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
+ default n
+ help
+ Adds ability to request firmware core dump
+
+ Example:
+ * To explicitly request core dump:
+ echo 1 >/sys/kernel/debug/mali0/fw_core_dump
+ * To output current core dump (after explicitly requesting a core dump,
+ or kernel driver reported an internal firmware error):
+ cat /sys/kernel/debug/mali0/fw_core_dump
+
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index f64f568..d8522fc 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -149,16 +149,19 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
ifeq ($(CONFIG_MALI_KUTF), y)
CONFIG_MALI_KUTF_IRQ_TEST ?= y
CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
else
# Prevent misuse when CONFIG_MALI_KUTF=n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
else
# Prevent misuse when CONFIG_MALI_DEBUG=n
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
else
# Prevent misuse when CONFIG_MALI_MIDGARD=n
@@ -168,6 +171,7 @@ else
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
# All Mali CONFIG should be listed here
@@ -207,6 +211,7 @@ CONFIGS := \
CONFIG_MALI_KUTF \
CONFIG_MALI_KUTF_IRQ_TEST \
CONFIG_MALI_KUTF_CLK_RATE_TRACE \
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
CONFIG_MALI_XEN
# Pixel integration CONFIG options
@@ -247,6 +252,26 @@ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
#
+# The following were added to align with W=1 in scripts/Makefile.extrawarn
+# from the Linux source tree
+KBUILD_CFLAGS += -Wall -Werror
+KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
+KBUILD_CFLAGS += -Wmissing-declarations
+KBUILD_CFLAGS += -Wmissing-format-attribute
+KBUILD_CFLAGS += -Wmissing-prototypes
+KBUILD_CFLAGS += -Wold-style-definition
+KBUILD_CFLAGS += -Wmissing-include-dirs
+KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
+KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
+KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-missing-field-initializers
+KBUILD_CFLAGS += -Wno-sign-compare
+KBUILD_CFLAGS += -Wno-type-limits
+
+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
+
all:
$(MAKE) -C $(KDIR) M=$(M) W=1 $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index f76d68b..d03322c 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -65,8 +65,7 @@ config MALI_CSF_SUPPORT
config MALI_DEVFREQ
bool "Enable devfreq support for Mali"
depends on MALI_MIDGARD
- default y if PLATFORM_JUNO
- default y if PLATFORM_CUSTOM
+ default y
help
Support devfreq for Mali.
@@ -192,6 +191,20 @@ config MALI_CORESTACK
If unsure, say N.
+config MALI_FW_CORE_DUMP
+ bool "Enable support for FW core dump"
+ depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
+ default n
+ help
+ Adds ability to request firmware core dump
+
+ Example:
+ * To explicitly request core dump:
+ echo 1 >/sys/kernel/debug/mali0/fw_core_dump
+ * To output current core dump (after explicitly requesting a core dump,
+ or kernel driver reported an internal firmware error):
+ cat /sys/kernel/debug/mali0/fw_core_dump
+
choice
prompt "Error injection level"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
index d813a04..667552c 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
@@ -955,7 +955,6 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
enum kbase_pm_suspend_handler suspend_handler)
{
- struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
int res = 0;
@@ -1008,11 +1007,9 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
/* Need to synchronously wait for GPU assignment */
atomic_inc(&kbdev->pm.gpu_users_waiting);
mutex_unlock(&arb_vm_state->vm_state_lock);
- mutex_unlock(&kbdev->pm.lock);
- mutex_unlock(&js_devdata->runpool_mutex);
+ kbase_pm_unlock(kbdev);
kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev);
- mutex_lock(&js_devdata->runpool_mutex);
- mutex_lock(&kbdev->pm.lock);
+ kbase_pm_lock(kbdev);
mutex_lock(&arb_vm_state->vm_state_lock);
atomic_dec(&kbdev->pm.gpu_users_waiting);
}
@@ -1111,7 +1108,7 @@ static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
}
/**
- * gpu_clk_notifier_unregister() - Unregister clock rate change notifier
+ * arb_gpu_clk_notifier_unregister() - Unregister clock rate change notifier
* @kbdev: kbase_device pointer
* @gpu_clk_handle: Handle unique to the enumerated GPU clock
* @nb: notifier block containing the callback function pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index d6b9750..ddd03ca 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -72,49 +72,6 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
return callbacks;
}
-int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev)
-{
- /* Uses default reference frequency defined in below macro */
- u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
-
- /* Only check lowest frequency in cases when OPPs are used and
- * present in the device tree.
- */
-#ifdef CONFIG_PM_OPP
- struct dev_pm_opp *opp_ptr;
- unsigned long found_freq = 0;
-
- /* find lowest frequency OPP */
- opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq);
- if (IS_ERR(opp_ptr)) {
- dev_err(kbdev->dev,
- "No OPPs found in device tree! Scaling timeouts using %llu kHz",
- (unsigned long long)lowest_freq_khz);
- } else {
-#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
- dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */
-#endif
- /* convert found frequency to KHz */
- found_freq /= 1000;
-
- /* If lowest frequency in OPP table is still higher
- * than the reference, then keep the reference frequency
- * as the one to use for scaling .
- */
- if (found_freq < lowest_freq_khz)
- lowest_freq_khz = found_freq;
- }
-#else
- dev_err(kbdev->dev,
- "No operating-points-v2 node or operating-points property in DT");
-#endif
-
- kbdev->lowest_gpu_freq_khz = lowest_freq_khz;
- dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz",
- kbdev->lowest_gpu_freq_khz);
- return 0;
-}
-
static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
index a6ee959..35b3b8d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -61,20 +61,6 @@ struct kbase_clk_data {
int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
/**
- * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can
- * run as using the device tree, and save this
- * within kbdev.
- * @kbdev: Pointer to kbase device.
- *
- * This function could be called from kbase_clk_rate_trace_manager_init,
- * but is left separate as it can be called as soon as
- * dev_pm_opp_of_add_table() has been called to initialize the OPP table.
- *
- * Return: 0 in any case.
- */
-int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev);
-
-/**
* kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager.
*
* @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index 00b32b9..09c1863 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -57,7 +57,7 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true);
if (IS_ERR_OR_NULL(opp))
- dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+ dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp));
else {
voltage = dev_pm_opp_get_voltage(opp);
#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
@@ -133,8 +133,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
rcu_read_unlock();
#endif
if (IS_ERR_OR_NULL(opp)) {
- dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
- return PTR_ERR(opp);
+ dev_err(dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp));
+ return IS_ERR(opp) ? PTR_ERR(opp) : -ENODEV;
}
#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
dev_pm_opp_put(opp);
@@ -317,6 +317,7 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
dp->max_state = i;
+
/* Have the lowest clock as suspend clock.
* It may be overridden by 'opp-mali-errata-1485982'.
*/
@@ -636,6 +637,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
struct devfreq_dev_profile *dp;
int err;
unsigned int i;
+ bool free_devfreq_freq_table = true;
if (kbdev->nr_clocks == 0) {
dev_err(kbdev->dev, "Clock not available for devfreq\n");
@@ -669,32 +671,35 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
dp->freq_table[0] / 1000;
}
- err = kbase_devfreq_init_core_mask_table(kbdev);
+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
+ err = kbase_ipa_init(kbdev);
if (err) {
- kbase_devfreq_term_freq_table(kbdev);
- return err;
+ dev_err(kbdev->dev, "IPA initialization failed");
+ goto ipa_init_failed;
}
+#endif
+
+ err = kbase_devfreq_init_core_mask_table(kbdev);
+ if (err)
+ goto init_core_mask_table_failed;
kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
"simple_ondemand", NULL);
if (IS_ERR(kbdev->devfreq)) {
err = PTR_ERR(kbdev->devfreq);
kbdev->devfreq = NULL;
- kbase_devfreq_term_core_mask_table(kbdev);
- kbase_devfreq_term_freq_table(kbdev);
- dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err);
- return err;
+ dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err);
+ goto devfreq_add_dev_failed;
}
+ /* Explicit free of freq table isn't needed after devfreq_add_device() */
+ free_devfreq_freq_table = false;
+
/* Initialize devfreq suspend/resume workqueue */
err = kbase_devfreq_work_init(kbdev);
if (err) {
- if (devfreq_remove_device(kbdev->devfreq))
- dev_err(kbdev->dev, "Fail to rm devfreq\n");
- kbdev->devfreq = NULL;
- kbase_devfreq_term_core_mask_table(kbdev);
- dev_err(kbdev->dev, "Fail to init devfreq workqueue\n");
- return err;
+ dev_err(kbdev->dev, "Fail to init devfreq workqueue");
+ goto devfreq_work_init_failed;
}
/* devfreq_add_device only copies a few of kbdev->dev's fields, so
@@ -705,26 +710,20 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
if (err) {
dev_err(kbdev->dev,
- "Failed to register OPP notifier (%d)\n", err);
+ "Failed to register OPP notifier (%d)", err);
goto opp_notifier_failed;
}
#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
- err = kbase_ipa_init(kbdev);
- if (err) {
- dev_err(kbdev->dev, "IPA initialization failed\n");
- goto ipa_init_failed;
- }
-
kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
kbdev->dev->of_node,
kbdev->devfreq,
&kbase_ipa_power_model_ops);
if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
- err = PTR_ERR(kbdev->devfreq_cooling);
+ err = PTR_ERR_OR_ZERO(kbdev->devfreq_cooling);
dev_err(kbdev->dev,
- "Failed to register cooling device (%d)\n",
- err);
+ "Failed to register cooling device (%d)", err);
+ err = err == 0 ? -ENODEV : err;
goto cooling_reg_failed;
}
#endif
@@ -733,21 +732,29 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
cooling_reg_failed:
- kbase_ipa_term(kbdev);
-ipa_init_failed:
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
#endif /* CONFIG_DEVFREQ_THERMAL */
opp_notifier_failed:
kbase_devfreq_work_term(kbdev);
+devfreq_work_init_failed:
if (devfreq_remove_device(kbdev->devfreq))
- dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err);
kbdev->devfreq = NULL;
+devfreq_add_dev_failed:
kbase_devfreq_term_core_mask_table(kbdev);
+init_core_mask_table_failed:
+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
+ kbase_ipa_term(kbdev);
+ipa_init_failed:
+#endif
+ if (free_devfreq_freq_table)
+ kbase_devfreq_term_freq_table(kbdev);
+
return err;
}
@@ -760,8 +767,6 @@ void kbase_devfreq_term(struct kbase_device *kbdev)
#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
if (kbdev->devfreq_cooling)
devfreq_cooling_unregister(kbdev->devfreq_cooling);
-
- kbase_ipa_term(kbdev);
#endif
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
@@ -775,4 +780,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev)
kbdev->devfreq = NULL;
kbase_devfreq_term_core_mask_table(kbdev);
+
+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
+ kbase_ipa_term(kbdev);
+#endif
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
index 0ea14bc..10e92ec 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,19 +40,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
registers.l2_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_FEATURES));
- registers.core_features = 0;
-#if !MALI_USE_CSF
- /* TGOx */
- registers.core_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CORE_FEATURES));
-#else /* !MALI_USE_CSF */
- if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
- GPU_ID2_PRODUCT_TDUX) ||
- ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
- GPU_ID2_PRODUCT_TODX)))
- registers.core_features =
- kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
-#endif /* MALI_USE_CSF */
+
registers.tiler_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_FEATURES));
registers.mem_features = kbase_reg_read(kbdev,
@@ -170,6 +158,11 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
regdump->coherency_features = coherency_features;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES))
+ regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
+ else
+ regdump->core_features = 0;
+
kbase_pm_register_access_disable(kbdev);
return error;
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 0ece571..b89b917 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,20 @@
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
+static int wait_prfcnt_ready(struct kbase_device *kbdev)
+{
+ u32 loops;
+
+ for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) {
+ const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+ GPU_STATUS_PRFCNT_ACTIVE;
+ if (!prfcnt_active)
+ return 0;
+ }
+
+ dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n");
+ return -EBUSY;
+}
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
@@ -43,20 +57,20 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
/* alignment failure */
if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
- goto out_err;
+ return err;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is already enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- goto out_err;
+ return err;
}
if (kbase_is_gpu_removed(kbdev)) {
/* GPU has been removed by Arbiter */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- goto out_err;
+ return err;
}
/* Enable interrupt */
@@ -81,9 +95,19 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
#endif
+ /* Wait until prfcnt config register can be written */
+ err = wait_prfcnt_ready(kbdev);
+ if (err)
+ return err;
+
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
+ /* Wait until prfcnt is disabled before writing configuration registers */
+ err = wait_prfcnt_ready(kbdev);
+ if (err)
+ return err;
+
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
enable->dump_buffer & 0xFFFFFFFF);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
@@ -111,12 +135,8 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
- err = 0;
-
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx);
- return err;
- out_err:
- return err;
+ return 0;
}
static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
@@ -135,7 +155,10 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
- /* Disable the counters */
+ /* Wait until prfcnt config register can be written, then disable the counters.
+ * Return value is ignored as we are disabling anyway.
+ */
+ wait_prfcnt_ready(kbdev);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
kbdev->hwcnt.kctx = NULL;
@@ -146,7 +169,6 @@ static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
{
unsigned long flags, pm_flags;
- int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
@@ -167,14 +189,14 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- return err;
+ return -EINVAL;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
- return err;
+ return -EINVAL;
}
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
@@ -233,6 +255,11 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+ /* Wait until prfcnt is ready to request dump */
+ err = wait_prfcnt_ready(kbdev);
+ if (err)
+ goto unlock;
+
/* Reconfigure the dump address */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
kbdev->hwcnt.addr & 0xFFFFFFFF);
@@ -248,11 +275,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx);
- err = 0;
-
unlock:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
@@ -346,21 +370,24 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
*/
if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_IDLE)
- goto out;
+ goto unlock;
if (kbase_is_gpu_removed(kbdev)) {
/* GPU has been removed by Arbiter */
- goto out;
+ goto unlock;
}
+ /* Wait until prfcnt is ready to clear */
+ err = wait_prfcnt_ready(kbdev);
+ if (err)
+ goto unlock;
+
/* Clear the counters */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_CLEAR);
- err = 0;
-
-out:
+unlock:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 32bdf72..20905f7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -191,9 +191,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
-void kbase_job_hw_submit(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom,
- int js)
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
{
struct kbase_context *kctx;
u32 cfg;
@@ -202,13 +200,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];
lockdep_assert_held(&kbdev->hwaccess_lock);
- KBASE_DEBUG_ASSERT(kbdev);
- KBASE_DEBUG_ASSERT(katom);
kctx = katom->kctx;
/* Command register must be available */
- KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+ if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
+ "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
+ return -EPERM;
dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
jc_head, (void *)katom);
@@ -281,7 +279,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
/* Write an approximate start timestamp.
* It's approximate because there might be a job in the HEAD register.
*/
- katom->start_timestamp = ktime_get();
+ katom->start_timestamp = ktime_get_raw();
/* GO ! */
dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx",
@@ -329,6 +327,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
JS_COMMAND_START);
+
+ return 0;
}
/**
@@ -393,11 +393,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
lockdep_assert_held(&kbdev->hwaccess_lock);
- KBASE_DEBUG_ASSERT(kbdev);
-
KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done);
- end_timestamp = ktime_get();
+ end_timestamp = ktime_get_raw();
while (done) {
u32 failed = done >> 16;
@@ -409,7 +407,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
* numbered interrupts before the higher numbered ones.
*/
i = ffs(finished) - 1;
- KBASE_DEBUG_ASSERT(i >= 0);
+ if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
+ break;
do {
int nr_done;
@@ -590,7 +589,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
failed = done >> 16;
finished = (done & 0xFFFF) | failed;
if (done)
- end_timestamp = ktime_get();
+ end_timestamp = ktime_get_raw();
} while (finished & (1 << i));
kbasep_job_slot_update_head_start_timestamp(kbdev, i,
@@ -619,7 +618,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
u64 job_in_head_before;
u32 status_reg_after;
- KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+ WARN_ON(action & (~JS_COMMAND_MASK));
/* Check the head pointer */
job_in_head_before = ((u64) kbase_reg_read(kbdev,
@@ -697,7 +696,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js);
break;
default:
- BUG();
+ WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
+ (void *)target_katom, (void *)target_katom->kctx);
break;
}
} else {
@@ -726,7 +726,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js);
break;
default:
- BUG();
+ WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
+ (void *)target_katom, (void *)target_katom->kctx);
break;
}
}
@@ -752,9 +753,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
int i;
bool stop_sent = false;
- KBASE_DEBUG_ASSERT(kctx != NULL);
kbdev = kctx->kbdev;
- KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -934,7 +933,11 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
- KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+ if (sw_flags & JS_COMMAND_MASK) {
+ WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom,
+ target_katom ? (void *)target_katom->kctx : NULL, sw_flags);
+ sw_flags &= ~((u32)JS_COMMAND_MASK);
+ }
kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
JS_COMMAND_SOFT_STOP | sw_flags);
}
@@ -1052,17 +1055,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
{
unsigned long flags;
struct kbase_device *kbdev;
- ktime_t end_timestamp = ktime_get();
+ ktime_t end_timestamp = ktime_get_raw();
struct kbasep_js_device_data *js_devdata;
bool silent = false;
u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
- KBASE_DEBUG_ASSERT(data);
-
kbdev = container_of(data, struct kbase_device,
hwaccess.backend.reset_work);
- KBASE_DEBUG_ASSERT(kbdev);
js_devdata = &kbdev->js_data;
if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
@@ -1097,7 +1097,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
return;
}
- KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+ WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&kbdev->mmu_mask_change);
@@ -1138,7 +1138,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
mutex_lock(&kbdev->pm.lock);
/* We hold the pm lock, so there ought to be a current policy */
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+ if (unlikely(!kbdev->pm.backend.pm_current_policy))
+ dev_warn(kbdev->dev, "No power policy set!");
/* All slot have been soft-stopped and we've waited
* SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
@@ -1235,8 +1236,6 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
hwaccess.backend.reset_timer);
- KBASE_DEBUG_ASSERT(kbdev);
-
/* Reset still pending? */
if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
@@ -1257,8 +1256,6 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
int i;
int pending_jobs = 0;
- KBASE_DEBUG_ASSERT(kbdev);
-
/* Count the number of jobs */
for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
@@ -1316,8 +1313,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
{
int i;
- KBASE_DEBUG_ASSERT(kbdev);
-
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbase_pm_is_gpu_lost(kbdev)) {
/* GPU access has been removed, reset will be done by
@@ -1371,13 +1366,11 @@ KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
*/
void kbase_reset_gpu(struct kbase_device *kbdev)
{
- KBASE_DEBUG_ASSERT(kbdev);
-
/* Note this is an assert/atomic_set because it is a software issue for
* a race to be occurring here
*/
- KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
- KBASE_RESET_GPU_PREPARED);
+ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
+ return;
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED);
@@ -1395,13 +1388,11 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu);
void kbase_reset_gpu_locked(struct kbase_device *kbdev)
{
- KBASE_DEBUG_ASSERT(kbdev);
-
/* Note this is an assert/atomic_set because it is a software issue for
* a race to be occurring here
*/
- KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
- KBASE_RESET_GPU_PREPARED);
+ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
+ return;
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED);
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index 1039e85..1ebb843 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -76,7 +76,6 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
}
#endif
-
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
@@ -88,10 +87,10 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
+ *
+ * Return: 0 if the job was successfully submitted to hardware, an error otherwise.
*/
-void kbase_job_hw_submit(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom,
- int js);
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
#if !MALI_USE_CSF
/**
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 48d1de8..4fe8046 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -346,16 +346,35 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
katom->protected_state.exit !=
KBASE_ATOM_EXIT_PROTECTED_CHECK)
kbdev->protected_mode_transition = false;
+
+ /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means
+ * one of two events prevented it from progressing to the next state and
+ * ultimately reach protected mode:
+ * - hwcnts were enabled, and the atom had to schedule a worker to
+ * disable them.
+ * - the hwcnts were already disabled, but some other error occurred.
+ * In the first case, if the worker has not yet completed
+ * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable
+ * them and signal to the worker they have already been enabled
+ */
+ if (kbase_jd_katom_is_protected(katom) &&
+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
+ kbdev->protected_mode_hwcnt_desired = true;
+ if (kbdev->protected_mode_hwcnt_disabled) {
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+ kbdev->protected_mode_hwcnt_disabled = false;
+ }
+ }
+
/* If the atom has suspended hwcnt but has not yet entered
* protected mode, then resume hwcnt now. If the GPU is now in
* protected mode then hwcnt will be resumed by GPU reset so
* don't resume it here.
*/
if (kbase_jd_katom_is_protected(katom) &&
- ((katom->protected_state.enter ==
- KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
- (katom->protected_state.enter ==
- KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
+ ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
kbdev->protected_mode_hwcnt_desired = true;
if (kbdev->protected_mode_hwcnt_disabled) {
@@ -506,17 +525,14 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
if (err) {
/*
- * Failed to switch into protected mode, resume
- * GPU hwcnt and fail atom.
+ * Failed to switch into protected mode.
+ *
+ * At this point we expect:
+ * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+ * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED
+ * ==>
+ * kbdev->protected_mode_hwcnt_disabled = false
*/
- WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
- kbdev->protected_mode_hwcnt_desired = true;
- if (kbdev->protected_mode_hwcnt_disabled) {
- kbase_hwcnt_context_enable(
- kbdev->hwcnt_gpu_ctx);
- kbdev->protected_mode_hwcnt_disabled = false;
- }
-
katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
/*
@@ -536,12 +552,9 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
/*
* Protected mode sanity checks.
*/
- KBASE_DEBUG_ASSERT_MSG(
- kbase_jd_katom_is_protected(katom[idx]) ==
- kbase_gpu_in_protected_mode(kbdev),
- "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
- kbase_jd_katom_is_protected(katom[idx]),
- kbase_gpu_in_protected_mode(kbdev));
+ WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev),
+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+ kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_READY;
@@ -951,18 +964,6 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
cores_ready = kbase_pm_cores_requested(kbdev,
true);
- if (katom[idx]->event_code ==
- BASE_JD_EVENT_PM_EVENT) {
- KBASE_KTRACE_ADD_JM_SLOT_INFO(
- kbdev, JM_MARK_FOR_RETURN_TO_JS,
- katom[idx]->kctx, katom[idx],
- katom[idx]->jc, js,
- katom[idx]->event_code);
- katom[idx]->gpu_rb_state =
- KBASE_ATOM_GPU_RB_RETURN_TO_JS;
- break;
- }
-
if (!cores_ready)
break;
@@ -1011,9 +1012,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
kbase_pm_request_gpu_cycle_counter_l2_is_on(
kbdev);
- kbase_job_hw_submit(kbdev, katom[idx], js);
- katom[idx]->gpu_rb_state =
- KBASE_ATOM_GPU_RB_SUBMITTED;
+ if (!kbase_job_hw_submit(kbdev, katom[idx], js))
+ katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
+ else
+ break;
kbasep_platform_event_work_begin(katom[idx]);
@@ -1346,11 +1348,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
} else {
char js_string[16];
- trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
- js_string,
- sizeof(js_string)),
- ktime_to_ns(ktime_get()), 0, 0,
- 0);
+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string,
+ sizeof(js_string)),
+ ktime_to_ns(ktime_get_raw()), 0, 0, 0);
}
}
#endif
@@ -1406,14 +1406,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
if (katom->protected_state.exit ==
KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
/* protected mode sanity checks */
- KBASE_DEBUG_ASSERT_MSG(
- kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
- "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
- kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
- KBASE_DEBUG_ASSERT_MSG(
- (kbase_jd_katom_is_protected(katom) && js == 0) ||
- !kbase_jd_katom_is_protected(katom),
- "Protected atom on JS%d not supported", js);
+ WARN(kbase_jd_katom_is_protected(katom) !=
+ kbase_gpu_in_protected_mode(kbdev),
+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+ kbase_jd_katom_is_protected(katom),
+ kbase_gpu_in_protected_mode(kbdev));
+ WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
+ kbase_jd_katom_is_protected(katom),
+ "Protected atom on JS%d not supported", js);
}
if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
!kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1804,11 +1804,9 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
base_jd_core_req core_req)
{
if (!kbdev->pm.active_count) {
- mutex_lock(&kbdev->js_data.runpool_mutex);
- mutex_lock(&kbdev->pm.lock);
+ kbase_pm_lock(kbdev);
kbase_pm_update_active(kbdev);
- mutex_unlock(&kbdev->pm.lock);
- mutex_unlock(&kbdev->js_data.runpool_mutex);
+ kbase_pm_unlock(kbdev);
}
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 603ffcf..961a951 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,6 +80,7 @@ static bool ipa_control_timer_enabled;
#endif
#define LO_MASK(M) ((M) & 0xFFFFFFFF)
+#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
static u32 get_implementation_register(u32 reg)
{
@@ -104,20 +105,15 @@ static u32 get_implementation_register(u32 reg)
}
struct {
+ spinlock_t access_lock;
+#if !MALI_USE_CSF
unsigned long prfcnt_base;
+#endif /* !MALI_USE_CSF */
u32 *prfcnt_base_cpu;
- struct kbase_device *kbdev;
- struct tagged_addr *pages;
- size_t page_count;
u32 time;
- struct {
- u32 jm;
- u32 tiler;
- u32 l2;
- u32 shader;
- } prfcnt_en;
+ struct gpu_model_prfcnt_en prfcnt_en;
u64 l2_present;
u64 shader_present;
@@ -181,7 +177,9 @@ struct control_reg_values_t {
struct dummy_model_t {
int reset_completed;
int reset_completed_mask;
+#if !MALI_USE_CSF
int prfcnt_sample_completed;
+#endif /* !MALI_USE_CSF */
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
int power_changed; /* 1bit */
bool clean_caches_completed;
@@ -464,6 +462,7 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
u32 event_index;
u64 value = 0;
u32 core;
+ unsigned long flags;
if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
return 0;
@@ -487,6 +486,8 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
event_index -= 4;
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+
switch (core_type) {
case KBASE_IPA_CORE_TYPE_CSHW:
core_count = 1;
@@ -514,28 +515,46 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
}
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+
if (is_low_word)
return (value & U32_MAX);
else
return (value >> 32);
}
+#endif /* MALI_USE_CSF */
-void gpu_model_clear_prfcnt_values(void)
+/**
+ * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values
+ *
+ * Sets all performance counter values to zero. The performance counter access
+ * lock must be held when calling this function.
+ */
+static void gpu_model_clear_prfcnt_values_nolock(void)
{
- memset(performance_counters.cshw_counters, 0,
- sizeof(performance_counters.cshw_counters));
-
- memset(performance_counters.tiler_counters, 0,
- sizeof(performance_counters.tiler_counters));
-
- memset(performance_counters.l2_counters, 0,
- sizeof(performance_counters.l2_counters));
-
+ lockdep_assert_held(&performance_counters.access_lock);
+#if !MALI_USE_CSF
+ memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters));
+#else
+ memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters));
+#endif /* !MALI_USE_CSF */
+ memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters));
+ memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters));
memset(performance_counters.shader_counters, 0,
sizeof(performance_counters.shader_counters));
}
+
+#if MALI_USE_CSF
+void gpu_model_clear_prfcnt_values(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+ gpu_model_clear_prfcnt_values_nolock();
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+}
KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
-#endif
+#endif /* MALI_USE_CSF */
/**
* gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
@@ -545,17 +564,20 @@ KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
* @block_count: Number of blocks to dump
* @prfcnt_enable_mask: Counter enable mask
* @blocks_present: Available blocks bit mask
+ *
+ * The performance counter access lock must be held before calling this
+ * function.
*/
-static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
- u32 block_count,
- u32 prfcnt_enable_mask,
- u64 blocks_present)
+static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count,
+ u32 prfcnt_enable_mask, u64 blocks_present)
{
u32 block_idx, counter;
u32 counter_value = 0;
u32 *prfcnt_base;
u32 index = 0;
+ lockdep_assert_held(&performance_counters.access_lock);
+
prfcnt_base = performance_counters.prfcnt_base_cpu;
for (block_idx = 0; block_idx < block_count; block_idx++) {
@@ -594,35 +616,18 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
}
}
-/**
- * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values
- *
- * Used to ensure counter values are not lost if cache invalidation is performed
- * prior to reading.
- */
-static void gpu_model_sync_dummy_prfcnt(void)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < performance_counters.page_count; i++) {
- pg = as_page(performance_counters.pages[i]);
- kbase_sync_single_for_device(performance_counters.kbdev,
- kbase_dma_addr(pg), PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- }
-}
-
-static void midgard_model_dump_prfcnt(void)
+static void gpu_model_dump_nolock(void)
{
u32 index = 0;
+ lockdep_assert_held(&performance_counters.access_lock);
+
#if !MALI_USE_CSF
- gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index,
- 1, 0xffffffff, 0x1);
+ gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1,
+ performance_counters.prfcnt_en.fe, 0x1);
#else
- gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index,
- 1, 0xffffffff, 0x1);
+ gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1,
+ performance_counters.prfcnt_en.fe, 0x1);
#endif /* !MALI_USE_CSF */
gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
&index, 1,
@@ -637,12 +642,48 @@ static void midgard_model_dump_prfcnt(void)
performance_counters.prfcnt_en.shader,
performance_counters.shader_present);
- gpu_model_sync_dummy_prfcnt();
+ /* Counter values are cleared after each dump */
+ gpu_model_clear_prfcnt_values_nolock();
/* simulate a 'long' time between samples */
performance_counters.time += 10;
}
+#if !MALI_USE_CSF
+static void midgard_model_dump_prfcnt(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+ gpu_model_dump_nolock();
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+}
+#else
+void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps)
+{
+ unsigned long flags;
+
+ if (WARN_ON(!sample_buf))
+ return;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+ performance_counters.prfcnt_base_cpu = sample_buf;
+ performance_counters.prfcnt_en = enable_maps;
+ gpu_model_dump_nolock();
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+}
+
+void gpu_model_glb_request_job_irq(void *model)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
+ hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
+ gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
+}
+#endif /* !MALI_USE_CSF */
+
static void init_register_statuses(struct dummy_model_t *dummy)
{
int i;
@@ -673,6 +714,8 @@ static void init_register_statuses(struct dummy_model_t *dummy)
static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
{
+ lockdep_assert_held(&hw_error_status.access_lock);
+
if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
if (job_slot == hw_error_status.current_job_slot) {
#if !MALI_USE_CSF
@@ -922,6 +965,7 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
{
int i;
+ lockdep_assert_held(&hw_error_status.access_lock);
pr_debug("%s", "Updating the JS_ACTIVE register");
for (i = 0; i < NUM_SLOTS; i++) {
@@ -990,6 +1034,9 @@ void *midgard_model_create(const void *config)
{
struct dummy_model_t *dummy = NULL;
+ spin_lock_init(&hw_error_status.access_lock);
+ spin_lock_init(&performance_counters.access_lock);
+
dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
if (dummy) {
@@ -1009,14 +1056,18 @@ static void midgard_model_get_outputs(void *h)
{
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+ lockdep_assert_held(&hw_error_status.access_lock);
+
if (hw_error_status.job_irq_status)
gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
if ((dummy->power_changed && dummy->power_changed_mask) ||
(dummy->reset_completed & dummy->reset_completed_mask) ||
hw_error_status.gpu_error_irq ||
- (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ||
- dummy->prfcnt_sample_completed)
+#if !MALI_USE_CSF
+ dummy->prfcnt_sample_completed ||
+#endif
+ (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
@@ -1028,6 +1079,8 @@ static void midgard_model_update(void *h)
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
int i;
+ lockdep_assert_held(&hw_error_status.access_lock);
+
for (i = 0; i < NUM_SLOTS; i++) {
if (!dummy->slots[i].job_active)
continue;
@@ -1074,6 +1127,8 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
{
int i;
+ lockdep_assert_held(&hw_error_status.access_lock);
+
for (i = 0; i < NUM_SLOTS; i++) {
if (dummy->slots[i].job_active) {
hw_error_status.job_irq_rawstat |= (1 << (16 + i));
@@ -1085,7 +1140,11 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
{
+ unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
+
#if !MALI_USE_CSF
if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
@@ -1188,9 +1247,10 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
if (value & (1 << 17))
dummy->clean_caches_completed = false;
- if (value & (1 << 16))
+#if !MALI_USE_CSF
+ if (value & PRFCNT_SAMPLE_COMPLETED)
dummy->prfcnt_sample_completed = 0;
-
+#endif /* !MALI_USE_CSF */
/*update error status */
hw_error_status.gpu_error_irq &= ~(value);
} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
@@ -1214,9 +1274,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
pr_debug("clean caches requested");
dummy->clean_caches_completed = true;
break;
+#if !MALI_USE_CSF
case GPU_COMMAND_PRFCNT_SAMPLE:
midgard_model_dump_prfcnt();
dummy->prfcnt_sample_completed = 1;
+#endif /* !MALI_USE_CSF */
default:
break;
}
@@ -1346,20 +1408,24 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
mem_addr_space, addr, value);
break;
}
- } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) &&
- addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) {
+ } else {
switch (addr) {
+#if !MALI_USE_CSF
case PRFCNT_BASE_LO:
- performance_counters.prfcnt_base |= value;
+ performance_counters.prfcnt_base =
+ HI_MASK(performance_counters.prfcnt_base) | value;
+ performance_counters.prfcnt_base_cpu =
+ (u32 *)(uintptr_t)performance_counters.prfcnt_base;
break;
case PRFCNT_BASE_HI:
- performance_counters.prfcnt_base |= ((u64) value) << 32;
+ performance_counters.prfcnt_base =
+ LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32);
+ performance_counters.prfcnt_base_cpu =
+ (u32 *)(uintptr_t)performance_counters.prfcnt_base;
break;
-#if !MALI_USE_CSF
case PRFCNT_JM_EN:
- performance_counters.prfcnt_en.jm = value;
+ performance_counters.prfcnt_en.fe = value;
break;
-#endif /* !MALI_USE_CSF */
case PRFCNT_SHADER_EN:
performance_counters.prfcnt_en.shader = value;
break;
@@ -1369,9 +1435,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
case PRFCNT_MMU_L2_EN:
performance_counters.prfcnt_en.l2 = value;
break;
- }
- } else {
- switch (addr) {
+#endif /* !MALI_USE_CSF */
case TILER_PWRON_LO:
dummy->power_on |= (value & 1) << 1;
/* Also ensure L2 is powered on */
@@ -1416,6 +1480,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
case PWR_OVERRIDE0:
#if !MALI_USE_CSF
case JM_CONFIG:
+ case PRFCNT_CONFIG:
#else /* !MALI_USE_CSF */
case CSF_CONFIG:
#endif /* !MALI_USE_CSF */
@@ -1434,13 +1499,18 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
return 1;
}
u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
{
+ unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
+
*value = 0; /* 0 by default */
#if !MALI_USE_CSF
if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
@@ -1475,24 +1545,31 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
#endif /* !MALI_USE_CSF */
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |
- (dummy->power_changed_mask << 9) | (1 << 7) | 1;
+ ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
+ (dummy->power_changed_mask << 9) | (1 << 7) | 1;
pr_debug("GPU_IRQ_MASK read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
*value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
(dummy->reset_completed << 8) |
+#if !MALI_USE_CSF
+ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
+#endif /* !MALI_USE_CSF */
((dummy->clean_caches_completed ? 1u : 0u) << 17) |
- (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
+ hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
*value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
+#if !MALI_USE_CSF
+ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
+#endif /* !MALI_USE_CSF */
(((dummy->clean_caches_completed &&
dummy->clean_caches_completed_irq_enabled) ?
1u :
0u)
<< 17) |
- (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
+ hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_STAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
*value = 0;
@@ -1827,6 +1904,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
counter_index, is_low_word);
+ } else if (addr == USER_REG(LATEST_FLUSH)) {
+ *value = 0;
}
#endif
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
@@ -1840,18 +1919,20 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = 0;
}
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
CSTD_UNUSED(dummy);
return 1;
}
-static u32 set_user_sample_core_type(u64 *counters,
- u32 *usr_data_start, u32 usr_data_offset,
- u32 usr_data_size, u32 core_count)
+static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
+ u32 usr_data_size, u32 core_count)
{
u32 sample_size;
u32 *usr_data = NULL;
+ lockdep_assert_held(&performance_counters.access_lock);
+
sample_size =
core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);
@@ -1866,11 +1947,7 @@ static u32 set_user_sample_core_type(u64 *counters,
u32 i;
for (i = 0; i < loop_cnt; i++) {
- if (copy_from_user(&counters[i], &usr_data[i],
- sizeof(u32))) {
- model_error_log(KBASE_CORE, "Unable to set counter sample 2");
- break;
- }
+ counters[i] = usr_data[i];
}
}
@@ -1884,6 +1961,8 @@ static u32 set_kernel_sample_core_type(u64 *counters,
u32 sample_size;
u64 *usr_data = NULL;
+ lockdep_assert_held(&performance_counters.access_lock);
+
sample_size =
core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);
@@ -1900,49 +1979,70 @@ static u32 set_kernel_sample_core_type(u64 *counters,
}
/* Counter values injected through ioctl are of 32 bits */
-void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size)
+int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size)
{
+ unsigned long flags;
+ u32 *user_data;
u32 offset = 0;
+ if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32))
+ return -EINVAL;
+
+ /* copy_from_user might sleep so can't be called from inside a spinlock
+ * allocate a temporary buffer for user data and copy to that before taking
+ * the lock
+ */
+ user_data = kmalloc(size, GFP_KERNEL);
+ if (!user_data)
+ return -ENOMEM;
+
+ if (copy_from_user(user_data, data, size)) {
+ model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace");
+ kfree(user_data);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
- offset = set_user_sample_core_type(performance_counters.jm_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset,
+ size, 1);
#else
- offset = set_user_sample_core_type(performance_counters.cshw_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset,
+ size, 1);
#endif /* !MALI_USE_CSF */
- offset = set_user_sample_core_type(performance_counters.tiler_counters,
- usr_data, offset, usr_data_size,
- hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
- offset = set_user_sample_core_type(performance_counters.l2_counters,
- usr_data, offset, usr_data_size,
- KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
- offset = set_user_sample_core_type(performance_counters.shader_counters,
- usr_data, offset, usr_data_size,
- KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
+ offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset,
+ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset,
+ size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
+ offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset,
+ size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+
+ kfree(user_data);
+ return 0;
}
/* Counter values injected through kutf are of 64 bits */
-void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size)
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size)
{
+ unsigned long flags;
u32 offset = 0;
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
- offset = set_kernel_sample_core_type(performance_counters.jm_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size,
+ 1);
#else
- offset = set_kernel_sample_core_type(performance_counters.cshw_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size,
+ 1);
#endif /* !MALI_USE_CSF */
- offset = set_kernel_sample_core_type(performance_counters.tiler_counters,
- usr_data, offset, usr_data_size,
- hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
- offset = set_kernel_sample_core_type(performance_counters.l2_counters,
- usr_data, offset, usr_data_size,
- hweight64(performance_counters.l2_present));
- offset = set_kernel_sample_core_type(performance_counters.shader_counters,
- usr_data, offset, usr_data_size,
- hweight64(performance_counters.shader_present));
+ offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset,
+ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size,
+ hweight64(performance_counters.l2_present));
+ offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset,
+ size, hweight64(performance_counters.shader_present));
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);
@@ -1977,21 +2077,12 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);
-void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
- struct tagged_addr *pages,
- size_t page_count)
-{
- performance_counters.prfcnt_base_cpu = base;
- performance_counters.kbdev = kbdev;
- performance_counters.pages = pages;
- performance_counters.page_count = page_count;
-}
-
int gpu_model_control(void *model,
struct kbase_model_control_params *params)
{
struct dummy_model_t *dummy = (struct dummy_model_t *)model;
int i;
+ unsigned long flags;
if (params->command == KBASE_MC_DISABLE_JOBS) {
for (i = 0; i < NUM_SLOTS; i++)
@@ -2000,8 +2091,10 @@ int gpu_model_control(void *model,
return -EINVAL;
}
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
return 0;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
index 87690f4..8eaf1b0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
@@ -116,6 +116,8 @@ struct kbase_error_atom {
/*struct to track the system error state*/
struct error_status_t {
+ spinlock_t access_lock;
+
u32 errors_mask;
u32 mmu_table_level;
int faulty_mmu_as;
@@ -138,6 +140,20 @@ struct error_status_t {
u64 as_transtab[NUM_MMU_AS];
};
+/**
+ * struct gpu_model_prfcnt_en - Performance counter enable masks
+ * @fe: Enable mask for front-end block
+ * @tiler: Enable mask for tiler block
+ * @l2: Enable mask for L2/Memory system blocks
+ * @shader: Enable mask for shader core blocks
+ */
+struct gpu_model_prfcnt_en {
+ u32 fe;
+ u32 tiler;
+ u32 l2;
+ u32 shader;
+};
+
void *midgard_model_create(const void *config);
void midgard_model_destroy(void *h);
u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
@@ -148,18 +164,53 @@ int job_atom_inject_error(struct kbase_error_params *params);
int gpu_model_control(void *h,
struct kbase_model_control_params *params);
-void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size);
-void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size);
+/**
+ * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values
+ * @data: Userspace pointer to array of counter values
+ * @size: Size of counter value array
+ *
+ * Counter values set by this function will be used for one sample dump only
+ * after which counters will be cleared back to zero.
+ *
+ * Return: 0 on success, else error code.
+ */
+int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size);
+
+/**
+ * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values
+ * @data: Pointer to array of counter values
+ * @size: Size of counter value array
+ *
+ * Counter values set by this function will be used for one sample dump only
+ * after which counters will be cleared back to zero.
+ */
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size);
+
void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
u64 *l2_present, u64 *shader_present);
void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
u64 l2_present, u64 shader_present);
-void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
- struct tagged_addr *pages,
- size_t page_count);
+
/* Clear the counter values array maintained by the dummy model */
void gpu_model_clear_prfcnt_values(void);
+#if MALI_USE_CSF
+/**
+ * gpu_model_prfcnt_dump_request() - Request performance counter sample dump.
+ * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array
+ * in which to store dumped performance counter values.
+ * @enable_maps: Physical enable maps for performance counter blocks.
+ */
+void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps);
+
+/**
+ * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request
+ * flag set.
+ * @model: Model pointer returned by midgard_model_create().
+ */
+void gpu_model_glb_request_job_irq(void *model);
+#endif /* MALI_USE_CSF */
+
enum gpu_dummy_irq {
GPU_DUMMY_JOB_IRQ,
GPU_DUMMY_GPU_IRQ,
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index 3d92251..fcf98b0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -437,8 +437,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
return;
/* Stop the metrics gathering framework */
- if (kbase_pm_metrics_is_active(kbdev))
- kbase_pm_metrics_stop(kbdev);
+ kbase_pm_metrics_stop(kbdev);
/* Keep the current freq to restore it upon resume */
kbdev->previous_frequency = clk_get_rate(clk);
@@ -880,7 +879,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
#if !MALI_USE_CSF
- kbase_backend_slot_update(kbdev);
+ kbase_backend_slot_update(kbdev);
#endif /* !MALI_USE_CSF */
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -990,7 +989,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
{
unsigned long flags;
- ktime_t end_timestamp = ktime_get();
+ ktime_t end_timestamp = ktime_get_raw();
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
if (!kbdev->arb.arb_if)
@@ -1065,6 +1064,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->csf.scheduler.lock);
lockdep_assert_held(&kbdev->pm.lock);
+#ifdef CONFIG_MALI_DEBUG
/* In case of no active CSG on slot, powering up L2 could be skipped and
* proceed directly to suspend GPU.
* ToDo: firmware has to be reloaded after wake-up as no halt command
@@ -1074,6 +1074,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
dev_info(
kbdev->dev,
"No active CSGs. Can skip the power up of L2 and go for suspension directly");
+#endif
ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
if (ret) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index 7d14be9..a4d7168 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -92,29 +92,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
* for those cores to get powered down
*/
if ((core_mask & old_core_mask) != old_core_mask) {
- bool can_wait;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- /* This check is ideally not required, the wait function can
- * deal with the GPU power down. But it has been added to
- * address the scenario where down-scaling request comes from
- * the platform specific code soon after the GPU power down
- * and at the time same time application thread tries to
- * power up the GPU (on the flush of GPU queue).
- * The platform specific @ref callback_power_on that gets
- * invoked on power up does not return until down-scaling
- * request is complete. The check mitigates the race caused by
- * the problem in platform specific code.
- */
- if (likely(can_wait)) {
- if (kbase_pm_wait_for_desired_state(kbdev)) {
- dev_warn(kbdev->dev,
- "Wait for update of core_mask from %llx to %llx failed",
- old_core_mask, core_mask);
- }
+ if (kbase_pm_wait_for_cores_down_scale(kbdev)) {
+ dev_warn(kbdev->dev,
+ "Wait for update of core_mask from %llx to %llx failed",
+ old_core_mask, core_mask);
}
}
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index a249b1e..66ca0b6 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -136,7 +136,7 @@ struct kbasep_pm_metrics {
* or removed from a GPU slot.
* @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
* @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot.
- * @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ * @lock: spinlock protecting the kbasep_pm_metrics_state structure
* @platform_data: pointer to data controlled by platform specific code
* @kbdev: pointer to kbase device for which metrics are collected
* @values: The current values of the power management metrics. The
@@ -145,7 +145,7 @@ struct kbasep_pm_metrics {
* @initialized: tracks whether metrics_state has been initialized or not.
* @timer: timer to regularly make DVFS decisions based on the power
* management metrics.
- * @timer_active: boolean indicating @timer is running
+ * @timer_state: atomic indicating current @timer state, on, off, or stopped.
* @dvfs_last: values of the PM metrics from the last DVFS tick
* @dvfs_diff: different between the current and previous PM metrics.
*/
@@ -169,7 +169,7 @@ struct kbasep_pm_metrics_state {
#ifdef CONFIG_MALI_MIDGARD_DVFS
bool initialized;
struct hrtimer timer;
- bool timer_active;
+ atomic_t timer_state;
struct kbasep_pm_metrics dvfs_last;
struct kbasep_pm_metrics dvfs_diff;
#endif
@@ -572,7 +572,7 @@ struct kbase_pm_backend_data {
};
#if MALI_USE_CSF
-/* CSF PM flag, signaling that the MCU CORE should be kept on */
+/* CSF PM flag, signaling that the MCU shader Core should be kept on */
#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0)
/* CSF PM flag, signaling no scheduler suspension on idle groups */
#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 52e228c..aab07c9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -656,6 +656,38 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK;
kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val);
}
+
+/**
+ * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called to wait for the AS used by MCU FW to get configured
+ * before DB notification on MCU is enabled, as a workaround for HW issue.
+ */
+static void wait_mcu_as_inactive(struct kbase_device *kbdev)
+{
+ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716))
+ return;
+
+ /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */
+ while (--max_loops &&
+ kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+ AS_STATUS_AS_ACTIVE_INT)
+ ;
+
+ if (!WARN_ON_ONCE(max_loops == 0))
+ return;
+
+ dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR);
+
+ if (kbase_prepare_to_reset_gpu(kbdev, 0))
+ kbase_reset_gpu(kbdev);
+}
#endif
/**
@@ -665,10 +697,10 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
* @kbdev: Pointer to the device
* @enable: boolean indicating to enable interrupts or not
*
- * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
- * after L2 has been turned on when FW is controlling the power for the shader
- * cores. Correspondingly, the interrupts can be re-enabled after the MCU has
- * been disabled before the power down of L2.
+ * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on
+ * when FW is controlling the power for the shader cores. Correspondingly, the
+ * interrupts can be re-enabled after the MCU has been disabled before the
+ * power down of L2.
*/
static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
{
@@ -679,15 +711,15 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
- (void)enable;
/* For IFPO, we require the POWER_CHANGED_ALL interrupt to be always on */
- irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
-#else
- if (enable)
- irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
- else
- irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
-#endif /* CONFIG_MALI_HOST_CONTROLS_SC_RAILS */
+ enable = true;
+#endif
+ if (enable) {
+ irq_mask |= POWER_CHANGED_ALL;
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL);
+ } else {
+ irq_mask &= ~POWER_CHANGED_ALL;
+ }
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
}
@@ -921,7 +953,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_HALT:
if (kbase_csf_firmware_mcu_halted(kbdev)) {
- KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL,
+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
if (kbdev->csf.firmware_hctl_core_pwr)
backend->mcu_state =
@@ -968,7 +1000,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_SLEEP:
if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) {
- KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL,
+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
backend->mcu_state = KBASE_MCU_IN_SLEEP;
kbase_pm_enable_db_mirror_interrupt(kbdev);
@@ -984,6 +1016,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_IN_SLEEP:
if (kbase_pm_is_mcu_desired(kbdev) &&
backend->l2_state == KBASE_L2_ON) {
+ wait_mcu_as_inactive(kbdev);
KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP(
kbdev, kbase_backend_get_cycle_cnt(kbdev));
kbase_pm_enable_mcu_db_notification(kbdev);
@@ -994,6 +1027,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
if (!kbdev->csf.firmware_hctl_core_pwr)
kbasep_pm_toggle_power_interrupt(kbdev, false);
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
break;
#endif
@@ -1120,13 +1154,24 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
#endif
}
+static bool need_tiler_control(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+ if (kbase_pm_no_mcu_core_pwroff(kbdev))
+ return true;
+ else
+ return false;
+#else
+ return true;
+#endif
+}
+
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
u64 l2_present = kbdev->gpu_props.curr_config.l2_present;
-#if !MALI_USE_CSF
u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
-#endif
+ bool l2_power_up_done;
enum kbase_l2_core_state prev_state;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1137,24 +1182,18 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
KBASE_PM_CORE_L2);
u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_L2);
-
-#if !MALI_USE_CSF
- u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
- KBASE_PM_CORE_TILER);
- u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
- KBASE_PM_CORE_TILER);
-#endif
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ u64 tiler_trans = kbase_pm_get_trans_cores(
+ kbdev, KBASE_PM_CORE_TILER);
+ u64 tiler_ready = kbase_pm_get_ready_cores(
+ kbdev, KBASE_PM_CORE_TILER);
/*
* kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
* are vulnerable to corruption if gpu is lost
*/
if (kbase_is_gpu_removed(kbdev)
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
|| kbase_pm_is_gpu_lost(kbdev)) {
-#else
- ) {
-#endif
backend->shaders_state =
KBASE_SHADERS_OFF_CORESTACK_OFF;
backend->hwcnt_desired = false;
@@ -1177,32 +1216,45 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
}
break;
}
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
/* mask off ready from trans in case transitions finished
* between the register reads
*/
l2_trans &= ~l2_ready;
-#if !MALI_USE_CSF
- tiler_trans &= ~tiler_ready;
-#endif
+
prev_state = backend->l2_state;
switch (backend->l2_state) {
case KBASE_L2_OFF:
if (kbase_pm_is_l2_desired(kbdev)) {
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ /* Enable HW timer of IPA control before
+ * L2 cache is powered-up.
+ */
+ kbase_ipa_control_handle_gpu_sleep_exit(kbdev);
+#endif
/*
* Set the desired config for L2 before
* powering it on
*/
kbase_pm_l2_config_override(kbdev);
kbase_pbha_write_settings(kbdev);
-#if !MALI_USE_CSF
- /* L2 is required, power on. Powering on the
- * tiler will also power the first L2 cache.
- */
- kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER,
- tiler_present, ACTION_PWRON);
+ /* If Host is controlling the power for shader
+ * cores, then it also needs to control the
+ * power for Tiler.
+ * Powering on the tiler will also power the
+ * L2 cache.
+ */
+ if (need_tiler_control(kbdev)) {
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present,
+ ACTION_PWRON);
+ } else {
+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present,
+ ACTION_PWRON);
+ }
+#if !MALI_USE_CSF
/* If we have more than one L2 cache then we
* must power them on explicitly.
*/
@@ -1212,30 +1264,36 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
ACTION_PWRON);
/* Clear backend slot submission kctx */
kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev);
-#else
- /* With CSF firmware, Host driver doesn't need to
- * handle power management with both shader and tiler cores.
- * The CSF firmware will power up the cores appropriately.
- * So only power the l2 cache explicitly.
- */
- kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
- l2_present, ACTION_PWRON);
#endif
backend->l2_state = KBASE_L2_PEND_ON;
}
break;
case KBASE_L2_PEND_ON:
-#if !MALI_USE_CSF
- if (!l2_trans && l2_ready == l2_present && !tiler_trans
- && tiler_ready == tiler_present) {
- KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL,
- tiler_ready);
-#else
+ l2_power_up_done = false;
if (!l2_trans && l2_ready == l2_present) {
- KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
- l2_ready);
-#endif
+ if (need_tiler_control(kbdev)) {
+#ifndef CONFIG_MALI_ARBITER_SUPPORT
+ u64 tiler_trans = kbase_pm_get_trans_cores(
+ kbdev, KBASE_PM_CORE_TILER);
+ u64 tiler_ready = kbase_pm_get_ready_cores(
+ kbdev, KBASE_PM_CORE_TILER);
+#endif
+
+ tiler_trans &= ~tiler_ready;
+ if (!tiler_trans && tiler_ready == tiler_present) {
+ KBASE_KTRACE_ADD(kbdev,
+ PM_CORES_CHANGE_AVAILABLE_TILER,
+ NULL, tiler_ready);
+ l2_power_up_done = true;
+ }
+ } else {
+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
+ l2_ready);
+ l2_power_up_done = true;
+ }
+ }
+ if (l2_power_up_done) {
/*
* Ensure snoops are enabled after L2 is powered
* up. Note that kbase keeps track of the snoop
@@ -1431,12 +1489,26 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
/* We only need to check the L2 here - if the L2
* is off then the tiler is definitely also off.
*/
- if (!l2_trans && !l2_ready)
+ if (!l2_trans && !l2_ready) {
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ /* Allow clock gating within the GPU and prevent it
+ * from being seen as active during sleep.
+ */
+ kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
+#endif
/* L2 is now powered off */
backend->l2_state = KBASE_L2_OFF;
+ }
} else {
- if (!kbdev->cache_clean_in_progress)
+ if (!kbdev->cache_clean_in_progress) {
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ /* Allow clock gating within the GPU and prevent it
+ * from being seen as active during sleep.
+ */
+ kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
+#endif
backend->l2_state = KBASE_L2_OFF;
+ }
}
break;
@@ -2293,12 +2365,14 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
/* Wait for cores */
#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
- remaining = wait_event_killable_timeout(
+ remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+ kbase_pm_is_in_desired_state_with_l2_powered(kbdev),
+ timeout);
#else
remaining = wait_event_timeout(
-#endif
kbdev->pm.backend.gpu_in_desired_state_wait,
kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout);
+#endif
if (!remaining) {
kbase_pm_timed_out(kbdev);
@@ -2353,6 +2427,66 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
+#if MALI_USE_CSF
+/**
+ * core_mask_update_done - Check if downscaling of shader cores is done
+ *
+ * @kbdev: The kbase device structure for the device.
+ *
+ * This function checks if the downscaling of cores is effectively complete.
+ *
+ * Return: true if the downscale is done.
+ */
+static bool core_mask_update_done(struct kbase_device *kbdev)
+{
+ bool update_done = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ /* If MCU is in stable ON state then it implies that the downscale
+ * request had completed.
+ * If MCU is not active then it implies all cores are off, so can
+ * consider the downscale request as complete.
+ */
+ if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) ||
+ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state))
+ update_done = true;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return update_done;
+}
+
+int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
+{
+ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
+ long remaining;
+ int err = 0;
+
+ /* Wait for core mask update to complete */
+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
+ remaining = wait_event_killable_timeout(
+ kbdev->pm.backend.gpu_in_desired_state_wait,
+ core_mask_update_done(kbdev), timeout);
+#else
+ remaining = wait_event_timeout(
+ kbdev->pm.backend.gpu_in_desired_state_wait,
+ core_mask_update_done(kbdev), timeout);
+#endif
+
+ if (!remaining) {
+ kbase_pm_timed_out(kbdev);
+ err = -ETIMEDOUT;
+ } else if (remaining < 0) {
+ dev_info(
+ kbdev->dev,
+ "Wait for cores down scaling got interrupted");
+ err = (int)remaining;
+ }
+
+ return err;
+}
+#endif
+
void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
{
unsigned long flags;
@@ -2416,14 +2550,21 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
- if (kbdev->csf.mali_file_inode) {
- /* This would zap the pte corresponding to the mapping of User
- * register page for all the Kbase contexts.
- */
- unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
- BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
- PAGE_SIZE, 1);
+
+ /* Only if the mappings for USER page exist, update all PTEs associated to it */
+ if (kbdev->csf.nr_user_page_mapped > 0) {
+ if (likely(kbdev->csf.mali_file_inode)) {
+ /* This would zap the pte corresponding to the mapping of User
+ * register page for all the Kbase contexts.
+ */
+ unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
+ BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
+ } else {
+ dev_err(kbdev->dev,
+ "Device file inode not exist even if USER page previously mapped");
+ }
}
+
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index 68ded7d..cd5a6a3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -269,6 +269,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
*/
int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
+#if MALI_USE_CSF
+/**
+ * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function can be called to ensure that the downscaling of cores is
+ * effectively complete and it would be safe to lower the voltage.
+ * The function assumes that caller had exercised the MCU state machine for the
+ * downscale request through the kbase_pm_update_state() function.
+ *
+ * This function needs to be used by the caller to safely wait for the completion
+ * of downscale request, instead of kbase_pm_wait_for_desired_state().
+ * The downscale request would trigger a state change in MCU state machine
+ * and so when MCU reaches the stable ON state, it can be inferred that
+ * downscaling is complete. But it has been observed that the wake up of the
+ * waiting thread can get delayed by few milli seconds and by the time the
+ * thread wakes up the power down transition could have started (after the
+ * completion of downscale request).
+ * On the completion of power down transition another wake up signal would be
+ * sent, but again by the time thread wakes up the power up transition can begin.
+ * And the power up transition could then get blocked inside the platform specific
+ * callback_power_on() function due to the thread that called into Kbase (from the
+ * platform specific code) to perform the downscaling and then ended up waiting
+ * for the completion of downscale request.
+ *
+ * Return: 0 on success, error code on error or remaining jiffies on timeout.
+ */
+int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev);
+#endif
+
/**
* kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state
* machines after changing shader core
@@ -800,7 +831,7 @@ bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev)
/**
* kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the
- * MCU core powered in accordance to the active
+ * MCU shader Core powered in accordance to the active
* power management policy
*
* @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index f85b466..2df6804 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
*/
#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@@ -48,27 +49,51 @@
#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
#endif
+/*
+ * Possible state transitions
+ * ON -> ON | OFF | STOPPED
+ * STOPPED -> ON | OFF
+ * OFF -> ON
+ *
+ *
+ * ┌─e─┐┌────────────f─────────────┐
+ * │ v│ v
+ * └───ON ──a──> STOPPED ──b──> OFF
+ * ^^ │ │
+ * │└──────c─────┘ │
+ * │ │
+ * └─────────────d─────────────┘
+ *
+ * Transition effects:
+ * a. None
+ * b. Timer expires without restart
+ * c. Timer is not stopped, timer period is unaffected
+ * d. Timer must be restarted
+ * e. Callback is executed and the timer is restarted
+ * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during
+ * tear-down and should not be subject to a race from an OFF->ON transition
+ */
+enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON };
+
#ifdef CONFIG_MALI_MIDGARD_DVFS
static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
{
- unsigned long flags;
struct kbasep_pm_metrics_state *metrics;
- KBASE_DEBUG_ASSERT(timer != NULL);
+ if (WARN_ON(!timer))
+ return HRTIMER_NORESTART;
metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
- kbase_pm_get_dvfs_action(metrics->kbdev);
- spin_lock_irqsave(&metrics->lock, flags);
+ /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */
+ if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON)
+ return HRTIMER_NORESTART;
- if (metrics->timer_active)
- hrtimer_start(timer,
- HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
- HRTIMER_MODE_REL);
-
- spin_unlock_irqrestore(&metrics->lock, flags);
+ kbase_pm_get_dvfs_action(metrics->kbdev);
- return HRTIMER_NORESTART;
+ /* Set the new expiration time and restart (transition e) */
+ hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period));
+ return HRTIMER_RESTART;
}
#endif /* CONFIG_MALI_MIDGARD_DVFS */
@@ -83,7 +108,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.metrics.kbdev = kbdev;
- kbdev->pm.backend.metrics.time_period_start = ktime_get();
+ kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
kbdev->pm.backend.metrics.values.time_busy = 0;
kbdev->pm.backend.metrics.values.time_idle = 0;
kbdev->pm.backend.metrics.values.time_in_protm = 0;
@@ -111,7 +136,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
#else
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.metrics.kbdev = kbdev;
- kbdev->pm.backend.metrics.time_period_start = ktime_get();
+ kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
kbdev->pm.backend.metrics.gpu_active = false;
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
@@ -134,6 +159,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
HRTIMER_MODE_REL);
kbdev->pm.backend.metrics.timer.function = dvfs_callback;
kbdev->pm.backend.metrics.initialized = true;
+ atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
kbase_pm_metrics_start(kbdev);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
@@ -152,16 +178,12 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
void kbasep_pm_metrics_term(struct kbase_device *kbdev)
{
#ifdef CONFIG_MALI_MIDGARD_DVFS
- unsigned long flags;
-
KBASE_DEBUG_ASSERT(kbdev != NULL);
- spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
- kbdev->pm.backend.metrics.timer_active = false;
- spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-
- hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+ /* Cancel the timer, and block if the callback is currently executing (transition f) */
kbdev->pm.backend.metrics.initialized = false;
+ atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
+ hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
#if MALI_USE_CSF
@@ -199,7 +221,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
* elapsed time. The lock taken inside kbase_ipa_control_query()
* function can cause lot of variation.
*/
- now = ktime_get();
+ now = ktime_get_raw();
if (err) {
dev_err(kbdev->dev,
@@ -231,12 +253,14 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
* time.
*/
if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) {
- /* Use a margin value that is approximately 1% of the time
- * difference.
+ /* The margin is scaled to allow for the worst-case
+ * scenario where the samples are maximally separated,
+ * plus a small offset for sampling errors.
*/
- u64 margin_ns = diff_ns >> 6;
+ u64 const MARGIN_NS =
+ IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2;
- if (gpu_active_counter > (diff_ns + margin_ns)) {
+ if (gpu_active_counter > (diff_ns + MARGIN_NS)) {
dev_info(
kbdev->dev,
"GPU activity takes longer than time interval: %llu ns > %llu ns",
@@ -331,7 +355,7 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
#if MALI_USE_CSF
kbase_pm_get_dvfs_utilisation_calc(kbdev);
#else
- kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+ kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw());
#endif
memset(diff, 0, sizeof(*diff));
@@ -396,57 +420,33 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
{
- bool isactive;
- unsigned long flags;
-
KBASE_DEBUG_ASSERT(kbdev != NULL);
- spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
- isactive = kbdev->pm.backend.metrics.timer_active;
- spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-
- return isactive;
+ return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON;
}
KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
void kbase_pm_metrics_start(struct kbase_device *kbdev)
{
- unsigned long flags;
- bool update = true;
+ struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics;
- if (unlikely(!kbdev->pm.backend.metrics.initialized))
+ if (unlikely(!metrics->initialized))
return;
- spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
- if (!kbdev->pm.backend.metrics.timer_active)
- kbdev->pm.backend.metrics.timer_active = true;
- else
- update = false;
- spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-
- if (update)
- hrtimer_start(&kbdev->pm.backend.metrics.timer,
- HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
- HRTIMER_MODE_REL);
+ /* Transition to ON, from a stopped state (transition c) */
+ if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF)
+ /* Start the timer only if it's been fully stopped (transition d)*/
+ hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+ HRTIMER_MODE_REL);
}
void kbase_pm_metrics_stop(struct kbase_device *kbdev)
{
- unsigned long flags;
- bool update = true;
-
if (unlikely(!kbdev->pm.backend.metrics.initialized))
return;
- spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
- if (kbdev->pm.backend.metrics.timer_active)
- kbdev->pm.backend.metrics.timer_active = false;
- else
- update = false;
- spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-
- if (update)
- hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+ /* Timer is Stopped if its currently on (transition a) */
+ atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED);
}
@@ -512,7 +512,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
if (!timestamp) {
- now = ktime_get();
+ now = ktime_get_raw();
timestamp = &now;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index 5f16434..deeb1b5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -310,7 +310,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
mutex_lock(&kbdev->pm.backend.policy_change_lock);
if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
- dev_warn(kbdev->dev, "Set PM policy failed to prevent gpu reset");
+ dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset");
reset_op_prevented = false;
}
@@ -332,7 +332,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
* the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON
* flag bit.
*/
- sched_suspend = kbdev->csf.firmware_inited && reset_op_prevented &&
+ sched_suspend = reset_op_prevented &&
(CSF_DYNAMIC_PM_CORE_KEEP_ON &
(new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags));
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index a83206a..5110e3d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,6 +21,9 @@
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
+#if MALI_USE_CSF
+#include <csf/mali_kbase_csf_timeout.h>
+#endif
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_config_defaults.h>
@@ -113,13 +116,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
*/
u64 timeout, nr_cycles = 0;
- /* Default value to mean 'no cap' */
- u64 timeout_cap = U64_MAX;
- u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+ u64 freq_khz;
+
/* Only for debug messages, safe default in case it's mis-maintained */
const char *selector_str = "(unknown)";
- WARN_ON(!freq_khz);
+ if (WARN(!kbdev->lowest_gpu_freq_khz,
+ "Lowest frequency uninitialized! Using reference frequency for scaling")) {
+ freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+ } else {
+ freq_khz = kbdev->lowest_gpu_freq_khz;
+ }
switch (selector) {
case KBASE_TIMEOUT_SELECTOR_COUNT:
@@ -135,16 +142,15 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
fallthrough;
case CSF_FIRMWARE_TIMEOUT:
selector_str = "CSF_FIRMWARE_TIMEOUT";
- nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
- /* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
- * if calculated timeout exceeds it. This should be adapted to
- * a direct timeout comparison once the
- * FIRMWARE_PING_INTERVAL_MS option is added to this timeout
- * function. A compile-time check such as BUILD_BUG_ON can also
- * be done once the firmware ping interval in cycles becomes
- * available as a macro.
+ /* Any FW timeout cannot be longer than the FW ping interval, after which
+ * the firmware_aliveness_monitor will be triggered and may restart
+ * the GPU if the FW is unresponsive.
*/
- timeout_cap = FIRMWARE_PING_INTERVAL_MS;
+ nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES);
+
+ if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES)
+ dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n",
+ selector_str);
break;
case CSF_PM_TIMEOUT:
selector_str = "CSF_PM_TIMEOUT";
@@ -154,21 +160,33 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
selector_str = "CSF_GPU_RESET_TIMEOUT";
nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
break;
+ case CSF_CSG_SUSPEND_TIMEOUT:
+ selector_str = "CSF_CSG_SUSPEND_TIMEOUT";
+ nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES;
+ break;
+ case CSF_FIRMWARE_BOOT_TIMEOUT:
+ selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT";
+ nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES;
+ break;
+ case CSF_FIRMWARE_PING_TIMEOUT:
+ selector_str = "CSF_FIRMWARE_PING_TIMEOUT";
+ nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES;
+ break;
+ case CSF_SCHED_PROTM_PROGRESS_TIMEOUT:
+ selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT";
+ nr_cycles = kbase_csf_timeout_get(kbdev);
+ break;
#endif
}
timeout = div_u64(nr_cycles, freq_khz);
- if (timeout > timeout_cap) {
- dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
- (unsigned long long)timeout, (unsigned long long)timeout_cap);
- timeout = timeout_cap;
- }
if (WARN(timeout > UINT_MAX,
"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
(unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
timeout = UINT_MAX;
return (unsigned int)timeout;
}
+KBASE_EXPORT_TEST_API(kbase_get_timeout_ms);
u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
{
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index 04768fe..96aa329 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -139,6 +139,12 @@ bob_defaults {
mali_host_controls_sc_rails: {
kbuild_options: ["CONFIG_MALI_HOST_CONTROLS_SC_RAILS=y"],
},
+ platform_is_fpga: {
+ kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
+ },
+ mali_fw_core_dump: {
+ kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
+ },
kbuild_options: [
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
"MALI_CUSTOMER_RELEASE={{.release}}",
@@ -159,7 +165,7 @@ bob_defaults {
// is an umbrella feature that would be open for inappropriate use
// (catch-all for experimental CS code without separating it into
// different features).
- "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}",
+ "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}",
"MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}",
"MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}",
"MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}",
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 34504f7..201349c 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,11 +39,13 @@
#include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
#include <mali_kbase_debug_mem_view.h>
+#include <mali_kbase_debug_mem_zones.h>
#include <mali_kbase_mem_pool_debugfs.h>
void kbase_context_debugfs_init(struct kbase_context *const kctx)
{
kbase_debug_mem_view_init(kctx);
+ kbase_debug_mem_zones_init(kctx);
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
kbase_jit_debugfs_init(kctx);
kbase_csf_queue_group_debugfs_init(kctx);
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 74402ec..4091fb7 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -36,11 +36,13 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <mali_kbase_debug_mem_view.h>
+#include <mali_kbase_debug_mem_zones.h>
#include <mali_kbase_mem_pool_debugfs.h>
void kbase_context_debugfs_init(struct kbase_context *const kctx)
{
kbase_debug_mem_view_init(kctx);
+ kbase_debug_mem_zones_init(kctx);
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
kbase_jit_debugfs_init(kctx);
kbasep_jd_debugfs_ctx_init(kctx);
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index c7d7585..95bd641 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -286,7 +286,9 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
/* Add checks, so that the terminating process Should not
* hold any gpu_memory.
*/
+ spin_lock(&kctx->kbdev->gpu_mem_usage_lock);
WARN_ON(kprcs->total_gpu_pages);
+ spin_unlock(&kctx->kbdev->gpu_mem_usage_lock);
WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root));
kobject_del(&kprcs->kobj);
kobject_put(&kprcs->kobj);
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index 29983fb..11672a1 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -34,12 +34,14 @@ mali_kbase-y += \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
csf/mali_kbase_csf_cpu_queue_debugfs.o \
- csf/mali_kbase_csf_event.o
+ csf/mali_kbase_csf_event.o \
+ csf/mali_kbase_csf_firmware_log.o
mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+
ifeq ($(KBUILD_EXTMOD),)
# in-tree
-include $(src)/csf/ipa_control/Kbuild
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index a56b689..ccdc48c 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,6 +20,7 @@
*/
#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include "mali_kbase_csf_ipa_control.h"
@@ -44,19 +45,9 @@
#define COMMAND_RESET_ACK ((u32)5)
/*
- * Default value for the TIMER register of the IPA Control interface,
- * expressed in milliseconds.
- *
- * The chosen value is a trade off between two requirements: the IPA Control
- * interface should sample counters with a resolution in the order of
- * milliseconds, while keeping GPU overhead as limited as possible.
- */
-#define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */
-
-/*
* Number of timer events per second.
*/
-#define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS)
+#define TIMER_EVENTS_PER_SECOND ((u32)1000 / IPA_CONTROL_TIMER_DEFAULT_VALUE_MS)
/*
* Maximum number of loops polling the GPU before we assume the GPU has hung.
@@ -602,9 +593,10 @@ int kbase_ipa_control_register(
*/
for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
session_idx++) {
- session = &ipa_ctrl->sessions[session_idx];
- if (!session->active)
+ if (!ipa_ctrl->sessions[session_idx].active) {
+ session = &ipa_ctrl->sessions[session_idx];
break;
+ }
}
if (!session) {
@@ -659,7 +651,7 @@ int kbase_ipa_control_register(
/* Reports to this client for GPU time spent in protected mode
* should begin from the point of registration.
*/
- session->last_query_time = ktime_get_ns();
+ session->last_query_time = ktime_get_raw_ns();
/* Initially, no time has been spent in protected mode */
session->protm_time = 0;
@@ -829,7 +821,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
}
if (protected_time) {
- u64 time_now = ktime_get_ns();
+ u64 time_now = ktime_get_raw_ns();
/* This is the amount of protected-mode time spent prior to
* the current protm period.
@@ -973,6 +965,43 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post);
+#ifdef KBASE_PM_RUNTIME
+void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) {
+ /* GPU Sleep is treated as a power down */
+ kbase_ipa_control_handle_gpu_power_off(kbdev);
+
+ /* SELECT_CSHW register needs to be cleared to prevent any
+ * IPA control message to be sent to the top level GPU HWCNT.
+ */
+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0);
+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0);
+
+ /* No need to issue the APPLY command here */
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter);
+
+void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) {
+ /* To keep things simple, currently exit from
+ * GPU Sleep is treated as a power on event where
+ * all 4 SELECT registers are reconfigured.
+ * On exit from sleep, reconfiguration is needed
+ * only for the SELECT_CSHW register.
+ */
+ kbase_ipa_control_handle_gpu_power_on(kbdev);
+ }
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit);
+#endif
+
#if MALI_UNIT_TEST
void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
u32 clk_index, u32 clk_rate_hz)
@@ -992,14 +1021,14 @@ void kbase_ipa_control_protm_entered(struct kbase_device *kbdev)
struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
lockdep_assert_held(&kbdev->hwaccess_lock);
- ipa_ctrl->protm_start = ktime_get_ns();
+ ipa_ctrl->protm_start = ktime_get_raw_ns();
}
void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
{
struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
size_t i;
- u64 time_now = ktime_get_ns();
+ u64 time_now = ktime_get_raw_ns();
u32 status;
lockdep_assert_held(&kbdev->hwaccess_lock);
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
index 0469c48..69ff897 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -198,6 +198,33 @@ void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev);
*/
void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev);
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event
+ *
+ * @kbdev: Pointer to kbase device.
+ *
+ * This function is called after MCU has been put to sleep state & L2 cache has
+ * been powered down. The top level part of GPU is still powered up when this
+ * function is called.
+ */
+void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event
+ *
+ * @kbdev: Pointer to kbase device.
+ *
+ * This function is called when L2 needs to be powered up and MCU can exit the
+ * sleep state. The top level part of GPU is powered up when this function is
+ * called.
+ *
+ * This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter()
+ * was called previously.
+ */
+void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev);
+#endif
+
#if MALI_UNIT_TEST
/**
* kbase_ipa_control_rate_change_notify_test - Notify GPU rate change
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 2678baf..12ab66f 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,7 +35,7 @@
#include <mali_kbase_hwaccess_time.h>
#include "mali_kbase_csf_event.h"
#include <mali_linux_trace.h>
-
+#include <linux/protected_memory_allocator.h>
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -61,7 +61,7 @@ const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_
*
* @protm_grp: Possibly schedulable group that requested protected mode in the interrupt.
* If NULL, no such case observed in the tracked interrupt case.
- * @idle_seq: The highest priority group that notified idle. If no such instnace in the
+ * @idle_seq: The highest priority group that notified idle. If no such instance in the
* interrupt case, marked with the largest field value: U32_MAX.
* @idle_slot: The slot number if @p idle_seq is valid in the given tracking case.
*/
@@ -131,13 +131,13 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
return 0;
}
-static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
- struct kbase_va_region *reg)
+static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
+ struct tagged_addr *phys)
{
size_t num_pages = 2;
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- reg->start_pfn, num_pages, MCU_AS_NR);
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys,
+ num_pages, MCU_AS_NR);
WARN_ON(reg->flags & KBASE_REG_FREE);
@@ -178,12 +178,6 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- mem_flags |=
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
-#else
if (kbdev->system_coherency == COHERENCY_NONE) {
mem_flags |=
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
@@ -191,7 +185,6 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
mem_flags |= KBASE_REG_SHARE_BOTH |
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
}
-#endif
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
@@ -220,8 +213,7 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
return 0;
bad_insert_output_page:
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn, 1, MCU_AS_NR);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR);
bad_insert:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
@@ -250,6 +242,8 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
{
struct page *page_list[2];
pgprot_t cpu_map_prot;
+ unsigned long flags;
+ char *user_io_addr;
int ret = 0;
size_t i;
@@ -264,27 +258,25 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
/* The pages are mapped to Userspace also, so use the same mapping
* attributes as used inside the CPU page fault handler.
*/
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- cpu_map_prot = pgprot_device(PAGE_KERNEL);
-#else
if (kctx->kbdev->system_coherency == COHERENCY_NONE)
cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
else
cpu_map_prot = PAGE_KERNEL;
-#endif
for (i = 0; i < ARRAY_SIZE(page_list); i++)
page_list[i] = as_page(queue->phys[i]);
- queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
+ user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
- if (!queue->user_io_addr)
+ if (!user_io_addr)
ret = -ENOMEM;
else
atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
+ queue->user_io_addr = user_io_addr;
+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
+
unlock:
kbase_gpu_vm_unlock(kctx);
return ret;
@@ -321,7 +313,7 @@ static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
{
const size_t num_pages = 2;
- gpu_munmap_user_io_pages(kctx, queue->reg);
+ gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]);
kernel_unmap_user_io_pages(kctx, queue);
kbase_mem_pool_free_pages(
@@ -820,8 +812,8 @@ static void pending_submission_worker(struct kthread_work *work)
if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
dev_dbg(kbdev->dev, "queue is not bound to a group");
- else
- WARN_ON(kbase_csf_scheduler_queue_start(queue));
+ else if (kbase_csf_scheduler_queue_start(queue))
+ dev_dbg(kbdev->dev, "Failed to start queue");
}
}
@@ -954,7 +946,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
bitmap_clear(queue->group->protm_pending_bitmap,
queue->csi_index, 1);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR,
queue->group, queue, queue->group->protm_pending_bitmap[0]);
queue->group->bound_queues[queue->csi_index] = NULL;
queue->group = NULL;
@@ -1364,10 +1356,13 @@ static int create_queue_group(struct kbase_context *const kctx,
group->tiler_max = create->in.tiler_max;
group->fragment_max = create->in.fragment_max;
group->compute_max = create->in.compute_max;
+ group->csi_handlers = create->in.csi_handlers;
group->priority = kbase_csf_priority_queue_group_priority_to_relative(
kbase_csf_priority_check(kctx->kbdev, create->in.priority));
group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
group->faulted = false;
+ group->cs_unrecoverable = false;
+ group->reevaluate_idle_status = false;
group->group_uid = generate_group_uid();
@@ -1411,6 +1406,14 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
const u32 tiler_count = hweight64(create->in.tiler_mask);
const u32 fragment_count = hweight64(create->in.fragment_mask);
const u32 compute_count = hweight64(create->in.compute_mask);
+ size_t i;
+
+ for (i = 0; i < sizeof(create->in.padding); i++) {
+ if (create->in.padding[i] != 0) {
+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
+ return -EINVAL;
+ }
+ }
rt_mutex_lock(&kctx->csf.lock);
@@ -1429,6 +1432,10 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
"No CSG has at least %d CSs",
create->in.cs_min);
err = -EINVAL;
+ } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) {
+ dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
+ create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
+ err = -EINVAL;
} else if (create->in.reserved) {
dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
err = -EINVAL;
@@ -1467,9 +1474,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
lockdep_assert_held(&kctx->csf.lock);
- WARN_ON(kbase_mmu_teardown_pages(
- kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
+ WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
+ s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR));
WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
@@ -1499,10 +1505,16 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
{
const size_t nr_pages =
PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
+ size_t i = 0;
- WARN_ON(kbase_mmu_teardown_pages(
- kbdev, &kbdev->csf.mcu_mmu,
- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
+ for (i = 0; phys && i < nr_pages; i++)
+ phys[i] = as_tagged(s_buf->pma[i]->pa);
+
+ WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys,
+ nr_pages, MCU_AS_NR));
+
+ kfree(phys);
WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
@@ -1732,7 +1744,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
int kbase_csf_ctx_init(struct kbase_context *kctx)
{
- struct kbase_device *kbdev = kctx->kbdev;
int err = -ENOMEM;
INIT_LIST_HEAD(&kctx->csf.queue_list);
@@ -1741,19 +1752,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
kbase_csf_event_init(kctx);
kctx->csf.user_reg_vma = NULL;
- mutex_lock(&kbdev->pm.lock);
- /* The inode information for /dev/malixx file is not available at the
- * time of device probe as the inode is created when the device node
- * is created by udevd (through mknod).
- */
- if (kctx->filp) {
- if (!kbdev->csf.mali_file_inode)
- kbdev->csf.mali_file_inode = kctx->filp->f_inode;
-
- /* inode is unique for a file */
- WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
- }
- mutex_unlock(&kbdev->pm.lock);
/* Mark all the cookies as 'free' */
bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@@ -1763,14 +1761,9 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
if (unlikely(!kctx->csf.wq))
goto out;
- kthread_init_worker(&kctx->csf.pending_submission_worker);
- kctx->csf.pending_sub_worker_thread = kbase_create_realtime_thread(
- kctx->kbdev,
- kthread_worker_fn,
- &kctx->csf.pending_submission_worker,
- "mali_submit");
-
- if (IS_ERR(kctx->csf.pending_sub_worker_thread)) {
+ err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
+ &kctx->csf.pending_submission_worker, "mali_submit");
+ if (err) {
dev_err(kctx->kbdev->dev, "error initializing pending submission worker thread");
goto out_err_kthread;
}
@@ -1798,7 +1791,7 @@ out_err_tiler_heap_context:
out_err_kcpu_queue_context:
kbase_csf_scheduler_context_term(kctx);
out_err_scheduler_context:
- kthread_stop(kctx->csf.pending_sub_worker_thread);
+ kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
out_err_kthread:
destroy_workqueue(kctx->csf.wq);
out:
@@ -1957,8 +1950,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
rt_mutex_unlock(&kctx->csf.lock);
- kthread_flush_worker(&kctx->csf.pending_submission_worker);
- kthread_stop(kctx->csf.pending_sub_worker_thread);
+ kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
@@ -1972,7 +1964,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* handle_oom_event - Handle the OoM event generated by the firmware for the
* CSI.
*
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
+ * @group: Pointer to the CSG group the oom-event belongs to.
* @stream: Pointer to the structure containing info provided by the firmware
* about the CSI.
*
@@ -1987,9 +1979,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* Return: 0 if successfully handled the request, otherwise a negative error
* code on failure.
*/
-static int handle_oom_event(struct kbase_context *const kctx,
- struct kbase_csf_cmd_stream_info const *const stream)
+static int handle_oom_event(struct kbase_queue_group *const group,
+ struct kbase_csf_cmd_stream_info const *const stream)
{
+ struct kbase_context *const kctx = group->kctx;
u64 gpu_heap_va =
kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
@@ -2016,12 +2009,18 @@ static int handle_oom_event(struct kbase_context *const kctx,
err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
- /* It is okay to acknowledge with a NULL chunk (firmware will then wait
- * for the fragment jobs to complete and release chunks)
- */
- if (err == -EBUSY)
+ if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) &&
+ (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) {
+ /* The group allows incremental rendering, trigger it */
new_chunk_ptr = 0;
- else if (err)
+ dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n",
+ group->handle, group->csg_nr);
+ } else if (err == -EBUSY) {
+ /* Acknowledge with a NULL chunk (firmware will then wait for
+ * the fragment jobs to complete and release chunks)
+ */
+ new_chunk_ptr = 0;
+ } else if (err)
return err;
kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
@@ -2136,7 +2135,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
if (cs_oom_ack == cs_oom_req)
goto unlock;
- err = handle_oom_event(kctx, stream);
+ err = handle_oom_event(group, stream);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
@@ -2273,7 +2272,7 @@ static void protm_event_worker(struct work_struct *data)
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, protm_event_work);
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
group, 0u);
kbase_csf_scheduler_group_protm_enter(group);
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
@@ -2440,6 +2439,11 @@ handle_fatal_event(struct kbase_queue *const queue,
CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
queue_work(system_wq, &kbdev->csf.fw_error_work);
} else {
+ if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
+ queue->group->cs_unrecoverable = true;
+ if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(queue->kctx->kbdev);
+ }
get_queue(queue);
queue->cs_fatal = cs_fatal;
queue->cs_fatal_info = cs_fatal_info;
@@ -2493,8 +2497,9 @@ static void handle_queue_exception_event(struct kbase_queue *const queue,
* @ginfo: The CSG interface provided by the firmware.
* @irqreq: CSG's IRQ request bitmask (one bit per CS).
* @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
- * @track: Pointer that tracks the highest idle CSG and the newly possible viable
- * protcted mode requesting group, in current IRQ context.
+ * @track: Pointer that tracks the highest scanout priority idle CSG
+ * and any newly potentially viable protected mode requesting
+ * CSG in current IRQ context.
*
* If the interrupt request bitmask differs from the acknowledge bitmask
* then the firmware is notifying the host of an event concerning those
@@ -2537,7 +2542,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
(cs_ack & CS_ACK_EXCEPTION_MASK)) {
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
+ group, queue, cs_req ^ cs_ack);
handle_queue_exception_event(queue, cs_req, cs_ack);
}
@@ -2549,16 +2555,18 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
- group, queue, cs_req_remain ^ cs_ack_remain);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev,
+ CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED,
+ group, queue,
+ cs_req_remain ^ cs_ack_remain);
continue;
}
if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
(cs_ack & CS_ACK_TILER_OOM_MASK))) {
get_queue(queue);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
- cs_req ^ cs_ack);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
+ group, queue, cs_req ^ cs_ack);
if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
/* The work item shall not have been
* already queued, there can be only
@@ -2571,8 +2579,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
(cs_ack & CS_ACK_PROTM_PEND_MASK)) {
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
- cs_req ^ cs_ack);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND,
+ group, queue, cs_req ^ cs_ack);
dev_dbg(kbdev->dev,
"Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
@@ -2580,7 +2588,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
group->csg_nr);
bitmap_set(group->protm_pending_bitmap, i, 1);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue,
group->protm_pending_bitmap[0]);
protm_pend = true;
}
@@ -2611,7 +2619,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @csg_nr: CSG number.
* @track: Pointer that tracks the highest idle CSG and the newly possible viable
- * protcted mode requesting group, in current IRQ context.
+ * protected mode requesting group, in current IRQ context.
*
* Handles interrupts for a CSG and for CSs within it.
*
@@ -2634,7 +2642,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
return;
- KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
ginfo = &kbdev->csf.global_iface.groups[csg_nr];
req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
@@ -2674,7 +2682,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
kbase_csf_firmware_csg_input_mask(ginfo,
CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
/* SYNC_UPDATE events shall invalidate GPU idle event */
atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
@@ -2691,7 +2699,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
set_bit(csg_nr, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
scheduler->csg_slots_idle_mask[0]);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack);
dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
group->handle, csg_nr);
@@ -2699,7 +2707,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
/* If there are non-idle CSGs waiting for a slot, fire
* a tock for a replacement.
*/
- kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+ kbase_csf_scheduler_invoke_tock(kbdev);
}
if (group->scan_seq_num < track->idle_seq) {
@@ -2712,7 +2720,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT,
group, req ^ ack);
dev_info(kbdev->dev,
"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
@@ -2874,7 +2882,7 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
GLB_REQ_PROTM_EXIT_MASK);
if (likely(scheduler->active_protm_grp)) {
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT,
scheduler->active_protm_grp, 0u);
scheduler->active_protm_grp = NULL;
} else {
@@ -2898,19 +2906,22 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+ if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID))
+ return;
+
/* Handle protm from the tracked information */
if (track->idle_seq < current_protm_pending_seq) {
/* If the protm enter was prevented due to groups priority, then fire a tock
* for the scheduler to re-examine the case.
- */
+ */
dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
- kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+ kbase_csf_scheduler_invoke_tock(kbdev);
} else if (group) {
u32 i, num_groups = kbdev->csf.global_iface.group_num;
struct kbase_queue_group *grp;
bool tock_triggered = false;
- /* A new protem request, and track->idle_seq is not sufficient, check across
+ /* A new protm request, and track->idle_seq is not sufficient, check across
* previously notified idle CSGs in the current tick/tock cycle.
*/
for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
@@ -2927,7 +2938,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
tock_triggered = true;
dev_dbg(kbdev->dev,
"Attempt new protm from tick/tock idle slot %d\n", i);
- kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+ kbase_csf_scheduler_invoke_tock(kbdev);
break;
}
}
@@ -2975,7 +2986,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
lockdep_assert_held(&kbdev->hwaccess_lock);
- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
order_job_irq_clear_with_iface_mem_read();
@@ -3010,7 +3021,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
global_iface, GLB_REQ);
glb_ack = kbase_csf_firmware_global_output(
global_iface, GLB_ACK);
- KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack);
check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index f689205..0b87f50 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -45,8 +45,6 @@
*/
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
-#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */
-
/* 60ms optimizes power while minimizing latency impact for UI test cases. */
#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (60)
#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US (600)
@@ -162,7 +160,7 @@ int kbase_csf_queue_bind(struct kbase_context *kctx,
* resources allocated for this queue if there
* are any.
*
- * @queue: Pointer to queue to be unbound.
+ * @queue: Pointer to queue to be unbound.
* @process_exit: Flag to indicate if process exit is happening.
*/
void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit);
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index 6b1186e..e598f8b 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,11 +23,11 @@
#include <mali_kbase.h>
#include <linux/seq_file.h>
#include <linux/delay.h>
-#include <csf/mali_kbase_csf_trace_buffer.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "mali_kbase_csf_tl_reader.h"
+#include <linux/version_compat_defs.h>
#define MAX_SCHED_STATE_STRING_LEN (16)
static const char *scheduler_state_to_string(struct kbase_device *kbdev,
@@ -77,16 +77,32 @@ static const char *blocked_reason_to_string(u32 reason_id)
return cs_blocked_reason[reason_id];
}
+static bool sb_source_supported(u32 glb_version)
+{
+ bool supported = false;
+
+ if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) &&
+ (GLB_VERSION_MINOR_GET(glb_version) >= 5)) ||
+ ((GLB_VERSION_MAJOR_GET(glb_version) == 2) &&
+ (GLB_VERSION_MINOR_GET(glb_version) >= 6)) ||
+ ((GLB_VERSION_MAJOR_GET(glb_version) == 1) &&
+ (GLB_VERSION_MINOR_GET(glb_version) >= 3)))
+ supported = true;
+
+ return supported;
+}
+
static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
- struct seq_file *file, u32 wait_status, u32 wait_sync_value,
- u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status,
- u32 blocked_reason)
+ struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value,
+ u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason)
{
#define WAITING "Waiting"
#define NOT_WAITING "Not waiting"
seq_printf(file, "SB_MASK: %d\n",
CS_STATUS_WAIT_SB_MASK_GET(wait_status));
+ if (sb_source_supported(glb_version))
+ seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status));
seq_printf(file, "PROGRESS_WAIT: %s\n",
CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ?
WAITING : NOT_WAITING);
@@ -156,10 +172,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
struct kbase_vmap_struct *mapping;
u64 *evt;
u64 wait_sync_live_value;
+ u32 glb_version;
if (!queue)
return;
+ glb_version = queue->kctx->kbdev->csf.global_iface.version;
+
if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID ||
!queue->group))
return;
@@ -200,9 +219,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
}
kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
- file, wait_status, wait_sync_value,
- wait_sync_live_value, wait_sync_pointer,
- sb_status, blocked_reason);
+ file, glb_version, wait_status, wait_sync_value,
+ wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason);
}
} else {
struct kbase_device const *const kbdev =
@@ -257,9 +275,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
}
kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
- file, wait_status, wait_sync_value,
- wait_sync_live_value, wait_sync_pointer, sb_status,
- blocked_reason);
+ file, glb_version, wait_status, wait_sync_value, wait_sync_live_value,
+ wait_sync_pointer, sb_status, blocked_reason);
/* Dealing with cs_trace */
if (kbase_csf_scheduler_queue_has_trace(queue))
kbasep_csf_scheduler_dump_active_cs_trace(file, stream);
@@ -500,11 +517,7 @@ static const struct file_operations kbasep_csf_queue_group_debugfs_fops = {
void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx)
{
struct dentry *file;
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
return;
@@ -556,14 +569,11 @@ static int kbasep_csf_debugfs_scheduling_timer_kick_set(
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops,
- &kbasep_csf_debugfs_scheduling_timer_enabled_get,
- &kbasep_csf_debugfs_scheduling_timer_enabled_set,
- "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops,
- NULL,
- &kbasep_csf_debugfs_scheduling_timer_kick_set,
- "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops,
+ &kbasep_csf_debugfs_scheduling_timer_enabled_get,
+ &kbasep_csf_debugfs_scheduling_timer_enabled_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL,
+ &kbasep_csf_debugfs_scheduling_timer_kick_set, "%llu\n");
/**
* kbase_csf_debugfs_scheduler_state_get() - Get the state of scheduler.
@@ -671,7 +681,6 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
&kbasep_csf_debugfs_scheduler_state_fops);
kbase_csf_tl_reader_debugfs_init(kbdev);
- kbase_csf_firmware_trace_buffer_debugfs_init(kbdev);
}
#else
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index d65f729..836b558 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -55,7 +55,7 @@
#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31)
/**
- * enum kbase_csf_bind_state - bind state of the queue
+ * enum kbase_csf_queue_bind_state - bind state of the queue
*
* @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link
* between queue and the group to which it was bound or being bound is removed.
@@ -259,6 +259,11 @@ enum kbase_queue_group_priority {
* @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
* Shader, L2 and MCU state.
* @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
+ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended.
+ * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
+ * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
+ * to a ping from KBase.
+ * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
@@ -266,6 +271,10 @@ enum kbase_timeout_selector {
CSF_FIRMWARE_TIMEOUT,
CSF_PM_TIMEOUT,
CSF_GPU_RESET_TIMEOUT,
+ CSF_CSG_SUSPEND_TIMEOUT,
+ CSF_FIRMWARE_BOOT_TIMEOUT,
+ CSF_FIRMWARE_PING_TIMEOUT,
+ CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -446,6 +455,7 @@ struct kbase_protected_suspend_buffer {
* allowed to use.
* @compute_max: Maximum number of compute endpoints the group is
* allowed to use.
+ * @csi_handlers: Requested CSI exception handler flags for the group.
* @tiler_mask: Mask of tiler endpoints the group is allowed to use.
* @fragment_mask: Mask of fragment endpoints the group is allowed to use.
* @compute_mask: Mask of compute endpoints the group is allowed to use.
@@ -467,6 +477,12 @@ struct kbase_protected_suspend_buffer {
* @faulted: Indicates that a GPU fault occurred for the queue group.
* This flag persists until the fault has been queued to be
* reported to userspace.
+ * @cs_unrecoverable: Flag to unblock the thread waiting for CSG termination in
+ * case of CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE
+ * @reevaluate_idle_status : Flag set when work is submitted for the normal group
+ * or it becomes unblocked during protected mode. The
+ * flag helps Scheduler confirm if the group actually
+ * became non idle or not.
* @bound_queues: Array of registered queues bound to this queue group.
* @doorbell_nr: Index of the hardware doorbell page assigned to the
* group.
@@ -494,6 +510,7 @@ struct kbase_queue_group {
u8 tiler_max;
u8 fragment_max;
u8 compute_max;
+ u8 csi_handlers;
u64 tiler_mask;
u64 fragment_mask;
@@ -507,6 +524,8 @@ struct kbase_queue_group {
u32 prepared_seq_num;
u32 scan_seq_num;
bool faulted;
+ bool cs_unrecoverable;
+ bool reevaluate_idle_status;
struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP];
@@ -529,12 +548,10 @@ struct kbase_queue_group {
* @lock: Lock preventing concurrent access to @array and the @in_use bitmap.
* @array: Array of pointers to kernel CPU command queues.
* @in_use: Bitmap which indicates which kernel CPU command queues are in use.
- * @csf_kcpu_worker: Dedicated worker for processing kernel CPU command
- * queues.
- * @csf_kcpu_thread: The kthread used to process kernel CPU command queues.
* @num_cmds: The number of commands that have been enqueued across
* all the KCPU command queues. This could be used as a
* timestamp to determine the command's enqueueing time.
+ * @jit_lock: Lock protecting jit_cmds_head and jit_blocked_queues.
* @jit_cmds_head: A list of the just-in-time memory commands, both
* allocate & free, in submission order, protected
* by kbase_csf_kcpu_queue_context.lock.
@@ -547,10 +564,8 @@ struct kbase_csf_kcpu_queue_context {
struct mutex lock;
struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES];
DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
- struct kthread_worker csf_kcpu_worker;
- struct task_struct *csf_kcpu_thread;
- u64 num_cmds;
-
+ atomic64_t num_cmds;
+ spinlock_t jit_lock;
struct list_head jit_cmds_head;
struct list_head jit_blocked_queues;
};
@@ -608,6 +623,8 @@ struct kbase_csf_heap_context_allocator {
* @ctx_alloc: Allocator for heap context structures.
* @nr_of_heaps: Total number of tiler heaps that were added during the
* life time of the context.
+ * @est_count_pages: Estimated potentially freeable pages from all the heaps
+ * on the @list.
*
* This contains all of the CSF state relating to chunked tiler heaps for one
* @kbase_context. It is not the same as a heap context structure allocated by
@@ -618,30 +635,66 @@ struct kbase_csf_tiler_heap_context {
struct list_head list;
struct kbase_csf_heap_context_allocator ctx_alloc;
u64 nr_of_heaps;
+ atomic_t est_count_pages;
+};
+
+#define CSF_CTX_RECLAIM_CANDI_FLAG (1ul << 0)
+#define CSF_CTX_RECLAIM_SCAN_FLAG (1ul << 1)
+/**
+ * struct kbase_kctx_heap_info - Object representing the data section of a kctx
+ * for tiler heap reclaim manger
+ * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists
+ * @attach_jiffies: jiffies when the kctx is attached to the reclaim manager.
+ * @nr_scan_pages: Number of a better estimated freeable pages from the kctx
+ * after all its CSGs are off-slots and have been properly
+ * gone through the freeable pages count process. This field
+ * is updated when the kctx is moved to the reclaim manager's
+ * pending scan (freeing) action list, after the counting.
+ * @nr_est_pages: Estimated number of pages of the kctx when all its CSGs are
+ * off-slot. This is a nominal value used for estimating an
+ * available page counts from the kctx. The kctx is on the
+ * reclaim manager's candidate list, waiting for count.
+ * @flags: reflecting the kctx's internal state in relation to the
+ * scheduler's heap reclaim manager.
+ * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a
+ * kctx has groups on-slot, the scheduler will detach it from
+ * the tiler heap reclaim manager, i.e. no tiler heap memory
+ * reclaiming operations on the kctx.
+ */
+struct kbase_kctx_heap_info {
+ struct list_head mgr_link;
+ unsigned long attach_jiffies;
+ u32 nr_scan_pages;
+ u32 nr_est_pages;
+ u16 flags;
+ u8 on_slot_grps;
};
/**
* struct kbase_csf_scheduler_context - Object representing the scheduler's
* context for a GPU address space.
*
- * @runnable_groups: Lists of runnable GPU command queue groups in the kctx,
- * one per queue group relative-priority level.
- * @num_runnable_grps: Total number of runnable groups across all priority
- * levels in @runnable_groups.
- * @idle_wait_groups: A list of GPU command queue groups in which all enabled
- * GPU command queues are idle and at least one of them
- * is blocked on a sync wait operation.
- * @num_idle_wait_grps: Length of the @idle_wait_groups list.
- * @sync_update_worker: Dedicated workqueue to process work items corresponding
- * to the sync_update events by sync_set/sync_add
- * instruction execution on CSs bound to groups
- * of @idle_wait_groups list.
- * @sync_update_worker_thread: Task struct for @csf_worker.
- * @sync_update_work: work item to process the sync_update events by
- * sync_set / sync_add instruction execution on command
- * streams bound to groups of @idle_wait_groups list.
- * @ngrp_to_schedule: Number of groups added for the context to the
- * 'groups_to_schedule' list of scheduler instance.
+ * @runnable_groups: Lists of runnable GPU command queue groups in the kctx,
+ * one per queue group relative-priority level.
+ * @num_runnable_grps: Total number of runnable groups across all priority
+ * levels in @runnable_groups.
+ * @idle_wait_groups: A list of GPU command queue groups in which all enabled
+ * GPU command queues are idle and at least one of them
+ * is blocked on a sync wait operation.
+ * @num_idle_wait_grps: Length of the @idle_wait_groups list.
+ * @sync_update_worker: Dedicated workqueue to process work items corresponding
+ * to the sync_update events by sync_set/sync_add
+ * instruction execution on CSs bound to groups
+ * of @idle_wait_groups list.
+ * @sync_update_work: work item to process the sync_update events by
+ * sync_set / sync_add instruction execution on command
+ * streams bound to groups of @idle_wait_groups list.
+ * @ngrp_to_schedule: Number of groups added for the context to the
+ * 'groups_to_schedule' list of scheduler instance.
+ * @heap_info: Heap reclaim information data of the kctx. As the
+ * reclaim action needs to be coordinated with the scheduler
+ * operations, the data is placed inside the scheduler's
+ * context object for this linkage.
*/
struct kbase_csf_scheduler_context {
struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
@@ -649,9 +702,9 @@ struct kbase_csf_scheduler_context {
struct list_head idle_wait_groups;
u32 num_idle_wait_grps;
struct kthread_worker sync_update_worker;
- struct task_struct *sync_update_worker_thread;
struct kthread_work sync_update_work;
u32 ngrp_to_schedule;
+ struct kbase_kctx_heap_info heap_info;
};
/**
@@ -735,7 +788,6 @@ struct kbase_csf_event {
* @sched: Object representing the scheduler's context
* @pending_submission_worker: Worker for the pending submission work item
* @pending_submission_work: Work item to process pending kicked GPU command queues.
- * @pending_sub_work_thread: task_struct for @pending_submission_worker
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
*/
@@ -756,7 +808,6 @@ struct kbase_csf_context {
struct kbase_csf_scheduler_context sched;
struct kthread_worker pending_submission_worker;
struct kthread_work pending_submission_work;
- struct task_struct *pending_sub_worker_thread;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_cpu_queue_context cpu_queue;
#endif
@@ -799,6 +850,25 @@ struct kbase_csf_csg_slot {
};
/**
+ * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
+ * kctx lists inside the CSF device's scheduler.
+ *
+ * @candidate_ctxs: List of kctxs that have all their CSGs off-slots. Candidates
+ * are ready for reclaim count examinations.
+ * @scan_list_ctxs: List counted kctxs, ready for reclaim scan operations.
+ * @est_cand_pages: Estimated pages based on chunks that could be free-able from the
+ * candidate list. For each addition of an acandidate, the number is
+ * increased with an estimate, and decreased vice versa.
+ * @mgr_scan_pages: Number of pagess free-able in the scan list, device wide.
+ */
+struct kbase_csf_sched_heap_reclaim_mgr {
+ struct list_head candidate_ctxs;
+ struct list_head scan_list_ctxs;
+ atomic_t est_cand_pages;
+ atomic_t mgr_scan_pages;
+};
+
+/**
* struct kbase_csf_scheduler - Object representing the scheduler used for
* CSF for an instance of GPU platform device.
* @lock: Lock to serialize the scheduler operations and
@@ -862,7 +932,6 @@ struct kbase_csf_csg_slot {
* then it will only perform scheduling under the
* influence of external factors e.g., IRQs, IOCTLs.
* @csf_worker: Dedicated kthread_worker to execute the @tick_work.
- * @csf_worker_thread: Task struct for @csf_worker.
* @tick_timer: High-resolution timer employed to schedule tick
* workqueue items (kernel-provided delayed_work
* items do not use hrtimer and for some reason do
@@ -871,6 +940,8 @@ struct kbase_csf_csg_slot {
* operation to implement timeslice-based scheduling.
* @tock_work: Work item that would perform the schedule on tock
* operation to implement the asynchronous scheduling.
+ * @pending_tock_work: Indicates that the tock work item should re-execute
+ * once it's finished instead of going back to sleep.
* @ping_work: Work item that would ping the firmware at regular
* intervals, only if there is a single active CSG
* slot, to check if firmware is alive and would
@@ -880,8 +951,6 @@ struct kbase_csf_csg_slot {
* @top_grp.
* @top_grp: Pointer to queue group inside @groups_to_schedule
* list that was assigned the highest slot priority.
- * @tock_pending_request: A "tock" request is pending: a group that is not
- * currently on the GPU demands to be scheduled.
* @active_protm_grp: Indicates if firmware has been permitted to let GPU
* enter protected mode with the given group. On exit
* from protected mode the pointer is reset to NULL.
@@ -939,6 +1008,8 @@ struct kbase_csf_csg_slot {
* is disabled on FW side. It is set for the power
* policy where the power managment of shader cores
* needs to be done by the Host.
+ * @protm_enter_time: GPU protected mode enter time.
+ * @reclaim_mgr: CSGs tiler heap manager object.
*/
struct kbase_csf_scheduler {
struct mutex lock;
@@ -962,14 +1033,13 @@ struct kbase_csf_scheduler {
unsigned long last_schedule;
bool timer_enabled;
struct kthread_worker csf_worker;
- struct task_struct *csf_worker_thread;
struct hrtimer tick_timer;
struct kthread_work tick_work;
struct kthread_delayed_work tock_work;
+ atomic_t pending_tock_work;
struct delayed_work ping_work;
struct kbase_context *top_ctx;
struct kbase_queue_group *top_grp;
- bool tock_pending_request;
struct kbase_queue_group *active_protm_grp;
struct delayed_work gpu_idle_work;
struct workqueue_struct *idle_wq;
@@ -986,6 +1056,8 @@ struct kbase_csf_scheduler {
bool gpu_idle_work_pending;
#endif
bool gpu_idle_fw_timer_enabled;
+ ktime_t protm_enter_time;
+ struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
};
/*
@@ -1206,6 +1278,57 @@ struct kbase_csf_hwcnt {
bool enable_pending;
};
+/*
+ * struct kbase_csf_mcu_fw - Object containing device loaded MCU firmware data.
+ *
+ * @size: Loaded firmware data size. Meaningful only when the
+ * other field @p data is not NULL.
+ * @data: Pointer to the device retained firmware data. If NULL
+ * means not loaded yet or error in loading stage.
+ */
+struct kbase_csf_mcu_fw {
+ size_t size;
+ u8 *data;
+};
+
+/*
+ * Firmware log polling period.
+ */
+#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
+
+/**
+ * enum kbase_csf_firmware_log_mode - Firmware log operating mode
+ *
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read
+ * manually by the userspace (and it will also be dumped automatically into
+ * dmesg on GPU reset).
+ *
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
+ * will be periodically emptied into dmesg, manual reading through debugfs is
+ * disabled.
+ */
+enum kbase_csf_firmware_log_mode {
+ KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
+ KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
+};
+
+/**
+ * struct kbase_csf_firmware_log - Object containing members for handling firmware log.
+ *
+ * @mode: Firmware log operating mode.
+ * @busy: Indicating whether a firmware log operation is in progress.
+ * @poll_work: Work item that would poll firmware log buffer
+ * at regular intervals to perform any periodic
+ * activities required by current log mode.
+ * @dump_buf: Buffer used for dumping the log.
+ */
+struct kbase_csf_firmware_log {
+ enum kbase_csf_firmware_log_mode mode;
+ atomic_t busy;
+ struct delayed_work poll_work;
+ u8 *dump_buf;
+};
+
/**
* struct kbase_csf_device - Object representing CSF for an instance of GPU
* platform device.
@@ -1249,11 +1372,14 @@ struct kbase_csf_hwcnt {
* in the address space of every process, that created
* a Base context, to enable the access to LATEST_FLUSH
* register from userspace.
+ * @nr_user_page_mapped: The number of clients using the mapping of USER page.
+ * This is used to maintain backward compatibility.
+ * It's protected by @reg_lock.
* @mali_file_inode: Pointer to the inode corresponding to mali device
* file. This is needed in order to switch to the
* @dummy_user_reg_page on GPU power down.
* All instances of the mali device file will point to
- * the same inode.
+ * the same inode. It's protected by @reg_lock.
* @reg_lock: Lock to serialize the MCU firmware related actions
* that affect all contexts such as allocation of
* regions from shared interface area, assignment of
@@ -1318,6 +1444,9 @@ struct kbase_csf_hwcnt {
* for any request sent to the firmware.
* @hwcnt: Contain members required for handling the dump of
* HW counters.
+ * @fw: Copy of the loaded MCU firmware image.
+ * @fw_log: Contain members required for handling firmware log.
+ * @tiler_heap_reclaim: Tiler heap reclaim shrinker object.
*/
struct kbase_csf_device {
struct kbase_mmu_table mcu_mmu;
@@ -1332,6 +1461,7 @@ struct kbase_csf_device {
u32 db_file_offsets;
struct tagged_addr dummy_db_page;
struct tagged_addr dummy_user_reg_page;
+ u32 nr_user_page_mapped;
struct inode *mali_file_inode;
struct mutex reg_lock;
wait_queue_head_t event_wait;
@@ -1358,6 +1488,9 @@ struct kbase_csf_device {
u32 gpu_idle_dur_count;
unsigned int fw_timeout_ms;
struct kbase_csf_hwcnt hwcnt;
+ struct kbase_csf_mcu_fw fw;
+ struct kbase_csf_firmware_log fw_log;
+ struct shrinker tiler_heap_reclaim;
};
/**
diff --git a/mali_kbase/csf/mali_kbase_csf_event.c b/mali_kbase/csf/mali_kbase_csf_event.c
index e336658..52a6b10 100644
--- a/mali_kbase/csf/mali_kbase_csf_event.c
+++ b/mali_kbase/csf/mali_kbase_csf_event.c
@@ -102,7 +102,7 @@ static void sync_update_notify_gpu(struct kbase_context *kctx)
if (can_notify_gpu) {
kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT, kctx, 0u);
}
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/csf/mali_kbase_csf_event.h b/mali_kbase/csf/mali_kbase_csf_event.h
index 4c853b5..52122a9 100644
--- a/mali_kbase/csf/mali_kbase_csf_event.h
+++ b/mali_kbase/csf/mali_kbase_csf_event.h
@@ -30,8 +30,8 @@ struct kbase_csf_event;
enum kbase_csf_event_callback_action;
/**
- * kbase_csf_event_callback_action - type for callback functions to be
- * called upon CSF events.
+ * kbase_csf_event_callback - type for callback functions to be
+ * called upon CSF events.
* @param: Generic parameter to pass to the callback function.
*
* This is the type of callback functions that can be registered
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index da89d73..b5e3f0c 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -21,6 +21,7 @@
#include "mali_kbase.h"
#include "mali_kbase_csf_firmware_cfg.h"
+#include "mali_kbase_csf_firmware_log.h"
#include "mali_kbase_csf_trace_buffer.h"
#include "mali_kbase_csf_timeout.h"
#include "mali_kbase_mem.h"
@@ -44,11 +45,13 @@
#include <linux/mman.h>
#include <linux/string.h>
#include <linux/mutex.h>
+#include <linux/ctype.h>
#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
#include <linux/set_memory.h>
#endif
#include <mmu/mali_kbase_mmu.h>
#include <asm/arch_timer.h>
+#include <linux/delay.h>
#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
@@ -57,7 +60,7 @@ module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
MODULE_PARM_DESC(fw_name, "firmware image");
/* The waiting time for firmware to boot */
-static unsigned int csf_firmware_boot_timeout_ms = 500;
+static unsigned int csf_firmware_boot_timeout_ms;
module_param(csf_firmware_boot_timeout_ms, uint, 0444);
MODULE_PARM_DESC(csf_firmware_boot_timeout_ms,
"Maximum time to wait for firmware to boot.");
@@ -93,6 +96,7 @@ MODULE_PARM_DESC(fw_debug,
#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2)
#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
+#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3)
@@ -103,12 +107,18 @@ MODULE_PARM_DESC(fw_debug,
#define TL_METADATA_ENTRY_NAME_OFFSET (0x8)
+#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4)
+#define BUILD_INFO_GIT_SHA_LEN (40U)
+#define BUILD_INFO_GIT_DIRTY_LEN (1U)
+#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: "
+
#define CSF_MAX_FW_STOP_LOOPS (100000)
#define CSF_GLB_REQ_CFG_MASK \
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
+
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -249,10 +259,15 @@ static void stop_csf_firmware(struct kbase_device *kbdev)
static void wait_for_firmware_boot(struct kbase_device *kbdev)
{
- const long wait_timeout =
- kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms);
+ long wait_timeout;
long remaining;
+ if (!csf_firmware_boot_timeout_ms)
+ csf_firmware_boot_timeout_ms =
+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_BOOT_TIMEOUT);
+
+ wait_timeout = kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms);
+
/* Firmware will generate a global interface interrupt once booting
* is complete
*/
@@ -429,24 +444,17 @@ static int reload_fw_image(struct kbase_device *kbdev)
{
const u32 magic = FIRMWARE_HEADER_MAGIC;
struct kbase_csf_firmware_interface *interface;
- const struct firmware *firmware;
+ struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw;
int ret = 0;
- if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) {
- dev_err(kbdev->dev,
- "Failed to reload firmware image '%s'\n",
- fw_name);
- return -ENOENT;
- }
-
- /* Do couple of basic sanity checks */
- if (firmware->size < FIRMWARE_HEADER_LENGTH) {
- dev_err(kbdev->dev, "Firmware image unexpectedly too small\n");
+ if (WARN_ON(mcu_fw->data == NULL)) {
+ dev_err(kbdev->dev, "Firmware image copy not loaded\n");
ret = -EINVAL;
goto out;
}
- if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) {
+ /* Do a basic sanity check on MAGIC signature */
+ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) {
dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n");
ret = -EINVAL;
goto out;
@@ -461,16 +469,14 @@ static int reload_fw_image(struct kbase_device *kbdev)
continue;
}
- load_fw_image_section(kbdev, firmware->data, interface->phys,
- interface->num_pages, interface->flags,
- interface->data_start, interface->data_end);
+ load_fw_image_section(kbdev, mcu_fw->data, interface->phys, interface->num_pages,
+ interface->flags, interface->data_start, interface->data_end);
}
kbdev->csf.firmware_full_reload_needed = false;
kbase_csf_firmware_reload_trace_buffers_data(kbdev);
out:
- release_firmware(firmware);
return ret;
}
@@ -540,8 +546,8 @@ static inline bool entry_find_large_page_to_reuse(
* Return: 0 if successful, negative error code on failure
*/
static int parse_memory_setup_entry(struct kbase_device *kbdev,
- const struct firmware *fw,
- const u32 *entry, unsigned int size)
+ const struct kbase_csf_mcu_fw *const fw, const u32 *entry,
+ unsigned int size)
{
int ret = 0;
const u32 flags = entry[0];
@@ -773,7 +779,8 @@ out:
* @size: Size (in bytes) of the section
*/
static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
- const struct firmware *fw, const u32 *entry, unsigned int size)
+ const struct kbase_csf_mcu_fw *const fw, const u32 *entry,
+ unsigned int size)
{
const u32 data_start = entry[0];
const u32 data_size = entry[1];
@@ -816,6 +823,57 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
}
/**
+ * parse_build_info_metadata_entry() - Process a "build info metadata" section
+ * @kbdev: Kbase device structure
+ * @fw: Firmware image containing the section
+ * @entry: Pointer to the section
+ * @size: Size (in bytes) of the section
+ *
+ * This prints the git SHA of the firmware on frimware load.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+static int parse_build_info_metadata_entry(struct kbase_device *kbdev,
+ const struct kbase_csf_mcu_fw *const fw,
+ const u32 *entry, unsigned int size)
+{
+ const u32 meta_start_addr = entry[0];
+ char *ptr = NULL;
+ size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN);
+
+ /* Only print git SHA to avoid releasing sensitive information */
+ ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN);
+ /* Check that we won't overrun the found string */
+ if (ptr &&
+ strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) {
+ char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1];
+ int i = 0;
+
+ /* Move ptr to start of SHA */
+ ptr += sha_pattern_len;
+ for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) {
+ /* Ensure that the SHA is made up of hex digits */
+ if (!isxdigit(ptr[i]))
+ break;
+
+ git_sha[i] = ptr[i];
+ }
+
+ /* Check if the next char indicates git SHA is dirty */
+ if (ptr[i] == ' ' || ptr[i] == '+') {
+ git_sha[i] = ptr[i];
+ i++;
+ }
+ git_sha[i] = '\0';
+
+ dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha);
+ } else
+ dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n");
+
+ return 0;
+}
+
+/**
* load_firmware_entry() - Process an entry from a firmware image
*
* @kbdev: Kbase device
@@ -831,9 +889,8 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
*
* Return: 0 if successful, negative error code on failure
*/
-static int load_firmware_entry(struct kbase_device *kbdev,
- const struct firmware *fw,
- u32 offset, u32 header)
+static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw,
+ u32 offset, u32 header)
{
const unsigned int type = entry_type(header);
unsigned int size = entry_size(header);
@@ -895,6 +952,13 @@ static int load_firmware_entry(struct kbase_device *kbdev,
return -EINVAL;
}
return parse_timeline_metadata_entry(kbdev, fw, entry, size);
+ case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA:
+ if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) {
+ dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n",
+ size);
+ return -EINVAL;
+ }
+ return parse_build_info_metadata_entry(kbdev, fw, entry, size);
}
if (!optional) {
@@ -1298,6 +1362,26 @@ u32 kbase_csf_firmware_global_output(
KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
/**
+ * csf_doorbell_offset() - Calculate the offset to the CSF host doorbell
+ * @doorbell_nr: Doorbell number
+ *
+ * Return: CSF host register offset for the specified doorbell number.
+ */
+static u32 csf_doorbell_offset(int doorbell_nr)
+{
+ WARN_ON(doorbell_nr < 0);
+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
+
+ return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE);
+}
+
+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr)
+{
+ kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1);
+}
+EXPORT_SYMBOL(kbase_csf_ring_doorbell);
+
+/**
* handle_internal_firmware_fatal - Handler for CS internal firmware fault.
*
* @kbdev: Pointer to kbase device
@@ -1479,6 +1563,7 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
+
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
u32 const ack_irq_mask =
@@ -1660,7 +1745,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
if (version != kbdev->csf.global_iface.version)
dev_err(kbdev->dev, "Version check failed in firmware reboot.");
- KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u);
/* Tell MCU state machine to transit to next state */
kbdev->csf.firmware_reloaded = true;
@@ -1694,8 +1779,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u
dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
spin_unlock(&kbdev->pm.clk_rtm.lock);
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
- "use cycle counter format with firmware idle hysteresis!");
+ dev_info(
+ kbdev->dev,
+ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
}
/* Formula for dur_val = ((dur_us/MICROSECONDS_PER_SECOND) * freq_HZ) >> 10) */
@@ -1827,8 +1913,9 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
spin_unlock(&kbdev->pm.clk_rtm.lock);
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
- "use cycle counter with MCU Core Poweroff timer!");
+ dev_info(
+ kbdev->dev,
+ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
}
/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
@@ -1852,7 +1939,14 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
{
- return kbdev->csf.mcu_core_pwroff_dur_us;
+ u32 pwroff;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return pwroff;
}
u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
@@ -1865,7 +1959,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
+ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
return pwroff;
}
@@ -1947,11 +2041,28 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
+ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
+ INIT_LIST_HEAD(&kbdev->csf.firmware_config);
+ INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
+ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+ INIT_WORK(&kbdev->csf.firmware_reload_work,
+ kbase_csf_firmware_reload_worker);
+ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
+ INIT_WORK(&kbdev->csf.coredump_work, coredump_worker);
+
+ mutex_init(&kbdev->csf.reg_lock);
+
+ kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL };
+
+ return 0;
+}
+
+int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
+{
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
- kbdev->csf.gpu_idle_hysteresis_ms /=
- FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+ kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
@@ -1959,7 +2070,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US);
- /* Set to the lowest posssible value for FW to immediately write
+ /* Set to the lowest possible value for FW to immediately write
* to the power off register to disable the cores.
*/
kbdev->csf.mcu_core_pwroff_dur_count = 1;
@@ -1971,23 +2082,13 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
#endif
- INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
- INIT_LIST_HEAD(&kbdev->csf.firmware_config);
- INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
- INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
- INIT_WORK(&kbdev->csf.firmware_reload_work,
- kbase_csf_firmware_reload_worker);
- INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
- INIT_WORK(&kbdev->csf.coredump_work, coredump_worker);
-
- mutex_init(&kbdev->csf.reg_lock);
-
return 0;
}
-int kbase_csf_firmware_init(struct kbase_device *kbdev)
+int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
{
- const struct firmware *firmware;
+ const struct firmware *firmware = NULL;
+ struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw;
const u32 magic = FIRMWARE_HEADER_MAGIC;
u8 version_major, version_minor;
u32 version_hash;
@@ -2014,7 +2115,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
if (ret != 0) {
dev_err(kbdev->dev,
"Failed to setup the rb tree for managing shared interface segment\n");
- goto error;
+ goto err_out;
}
if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) {
@@ -2022,43 +2123,59 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
"Failed to load firmware image '%s'\n",
fw_name);
ret = -ENOENT;
- goto error;
+ } else {
+ /* Try to save a copy and then release the loaded firmware image */
+ mcu_fw->size = firmware->size;
+ mcu_fw->data = vmalloc((unsigned long)mcu_fw->size);
+
+ if (mcu_fw->data == NULL) {
+ ret = -ENOMEM;
+ } else {
+ memcpy(mcu_fw->data, firmware->data, mcu_fw->size);
+ dev_dbg(kbdev->dev, "Firmware image (%zu-bytes) retained in csf.fw\n",
+ mcu_fw->size);
+ }
+
+ release_firmware(firmware);
}
- if (firmware->size < FIRMWARE_HEADER_LENGTH) {
+ /* If error in loading or saving the image, branches to error out */
+ if (ret)
+ goto err_out;
+
+ if (mcu_fw->size < FIRMWARE_HEADER_LENGTH) {
dev_err(kbdev->dev, "Firmware too small\n");
ret = -EINVAL;
- goto error;
+ goto err_out;
}
- if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) {
+ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) {
dev_err(kbdev->dev, "Incorrect firmware magic\n");
ret = -EINVAL;
- goto error;
+ goto err_out;
}
- version_minor = firmware->data[4];
- version_major = firmware->data[5];
+ version_minor = mcu_fw->data[4];
+ version_major = mcu_fw->data[5];
if (version_major != FIRMWARE_HEADER_VERSION) {
dev_err(kbdev->dev,
"Firmware header version %d.%d not understood\n",
version_major, version_minor);
ret = -EINVAL;
- goto error;
+ goto err_out;
}
- memcpy(&version_hash, &firmware->data[8], sizeof(version_hash));
+ memcpy(&version_hash, &mcu_fw->data[8], sizeof(version_hash));
dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash);
- memcpy(&entry_end_offset, &firmware->data[0x10],
- sizeof(entry_end_offset));
+ memcpy(&entry_end_offset, &mcu_fw->data[0x10], sizeof(entry_end_offset));
- if (entry_end_offset > firmware->size) {
+ if (entry_end_offset > mcu_fw->size) {
dev_err(kbdev->dev, "Firmware image is truncated\n");
ret = -EINVAL;
- goto error;
+ goto err_out;
}
entry_offset = FIRMWARE_HEADER_LENGTH;
@@ -2066,15 +2183,14 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
u32 header;
unsigned int size;
- memcpy(&header, &firmware->data[entry_offset], sizeof(header));
+ memcpy(&header, &mcu_fw->data[entry_offset], sizeof(header));
size = entry_size(header);
- ret = load_firmware_entry(kbdev, firmware, entry_offset,
- header);
+ ret = load_firmware_entry(kbdev, mcu_fw, entry_offset, header);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to load firmware image\n");
- goto error;
+ goto err_out;
}
entry_offset += size;
}
@@ -2082,25 +2198,25 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
if (!kbdev->csf.shared_interface) {
dev_err(kbdev->dev, "Shared interface region not found\n");
ret = -EINVAL;
- goto error;
+ goto err_out;
} else {
ret = setup_shared_iface_static_region(kbdev);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n");
- goto error;
+ goto err_out;
}
}
ret = kbase_csf_firmware_trace_buffers_init(kbdev);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to initialize trace buffers\n");
- goto error;
+ goto err_out;
}
ret = kbasep_platform_fw_config_init(kbdev);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to perform platform specific FW configuration");
- goto error;
+ goto err_out;
}
/* Make sure L2 cache is powered up */
@@ -2113,50 +2229,54 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
ret = parse_capabilities(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = kbase_csf_doorbell_mapping_init(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = kbase_csf_scheduler_init(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = kbase_csf_setup_dummy_user_reg_page(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = kbase_csf_timeout_init(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = global_init_on_boot(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = kbase_csf_firmware_cfg_init(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
ret = kbase_device_csf_iterator_trace_init(kbdev);
if (ret != 0)
- goto error;
+ goto err_out;
- /* Firmware loaded successfully */
- release_firmware(firmware);
- KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL,
+ ret = kbase_csf_firmware_log_init(kbdev);
+ if (ret != 0) {
+ dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
+ goto err_out;
+ }
+
+ /* Firmware loaded successfully, ret = 0 */
+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
(((u64)version_hash) << 32) |
(((u64)version_major) << 8) | version_minor);
return 0;
-error:
- kbase_csf_firmware_term(kbdev);
- release_firmware(firmware);
+err_out:
+ kbase_csf_firmware_unload_term(kbdev);
return ret;
}
-void kbase_csf_firmware_term(struct kbase_device *kbdev)
+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
{
unsigned long flags;
int ret = 0;
@@ -2167,6 +2287,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
WARN(ret, "failed to wait for GPU reset");
+ kbase_csf_firmware_log_term(kbdev);
+
kbase_csf_firmware_cfg_term(kbdev);
kbase_csf_timeout_term(kbdev);
@@ -2242,6 +2364,13 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
kfree(metadata);
}
+ if (kbdev->csf.fw.data) {
+ /* Free the copy of the firmware image */
+ vfree(kbdev->csf.fw.data);
+ kbdev->csf.fw.data = NULL;
+ dev_dbg(kbdev->dev, "Free retained image csf.fw (%zu-bytes)\n", kbdev->csf.fw.size);
+ }
+
/* This will also free up the region allocated for the shared interface
* entry parsed from the firmware image.
*/
@@ -2344,10 +2473,46 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
{
- int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+ int err;
+
+ lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+ err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+
+ if (!err) {
+#define WAIT_TIMEOUT 5000 /* 50ms timeout */
+#define DELAY_TIME_IN_US 10
+ const int max_iterations = WAIT_TIMEOUT;
+ int loop;
+
+ /* Wait for the GPU to actually enter protected mode */
+ for (loop = 0; loop < max_iterations; loop++) {
+ unsigned long flags;
+ bool pmode_exited;
+
+ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+ GPU_STATUS_PROTECTED_MODE_ACTIVE)
+ break;
+
+ /* Check if GPU already exited the protected mode */
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ pmode_exited =
+ !kbase_csf_scheduler_protected_mode_in_use(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ if (pmode_exited)
+ break;
+
+ udelay(DELAY_TIME_IN_US);
+ }
+
+ if (loop == max_iterations) {
+ dev_err(kbdev->dev, "Timeout for actual pmode entry after PROTM_ENTER ack");
+ err = -ETIMEDOUT;
+ }
+ }
if (err) {
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
@@ -2555,7 +2720,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
gpu_map_prot =
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
cpu_map_prot = pgprot_writecombine(cpu_map_prot);
- };
+ }
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
if (!phys)
@@ -2656,3 +2821,4 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}
+
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 74bae39..edb1563 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -246,6 +246,7 @@ void kbase_csf_firmware_csg_input_mask(
u32 kbase_csf_firmware_csg_output(
const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
+
/**
* struct kbase_csf_global_iface - Global CSF interface
* provided by the firmware.
@@ -324,24 +325,13 @@ u32 kbase_csf_firmware_global_input_read(
u32 kbase_csf_firmware_global_output(
const struct kbase_csf_global_iface *iface, u32 offset);
-/* Calculate the offset to the Hw doorbell page corresponding to the
- * doorbell number.
+/**
+ * kbase_csf_ring_doorbell() - Ring the doorbell
+ *
+ * @kbdev: An instance of the GPU platform device
+ * @doorbell_nr: Index of the HW doorbell page
*/
-static u32 csf_doorbell_offset(int doorbell_nr)
-{
- WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
-
- return CSF_HW_DOORBELL_PAGE_OFFSET +
- (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE);
-}
-
-static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev,
- int doorbell_nr)
-{
- WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
-
- kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1);
-}
+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr);
/**
* kbase_csf_read_firmware_memory - Read a value in a GPU address
@@ -374,7 +364,7 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 value);
/**
- * kbase_csf_firmware_early_init() - Early initializatin for the firmware.
+ * kbase_csf_firmware_early_init() - Early initialization for the firmware.
* @kbdev: Kbase device
*
* Initialize resources related to the firmware. Must be called at kbase probe.
@@ -384,22 +374,33 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
int kbase_csf_firmware_early_init(struct kbase_device *kbdev);
/**
- * kbase_csf_firmware_init() - Load the firmware for the CSF MCU
+ * kbase_csf_firmware_late_init() - Late initialization for the firmware.
+ * @kbdev: Kbase device
+ *
+ * Initialize resources related to the firmware. But must be called after
+ * backend late init is done. Must be used at probe time only.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU
* @kbdev: Kbase device
*
* Request the firmware from user space and load it into memory.
*
* Return: 0 if successful, negative error code on failure
*/
-int kbase_csf_firmware_init(struct kbase_device *kbdev);
+int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
/**
- * kbase_csf_firmware_term() - Unload the firmware
+ * kbase_csf_firmware_unload_term() - Unload the firmware
* @kbdev: Kbase device
*
- * Frees the memory allocated by kbase_csf_firmware_init()
+ * Frees the memory allocated by kbase_csf_firmware_load_init()
*/
-void kbase_csf_firmware_term(struct kbase_device *kbdev);
+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
@@ -454,8 +455,8 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
- * This function needs to be called after kbase_csf_wait_protected_mode_enter()
- * to wait for the protected mode entry to complete. GPU reset is triggered if
+ * This function needs to be called after kbase_csf_enter_protected_mode() to
+ * wait for the GPU to actually enter protected mode. GPU reset is triggered if
* the wait is unsuccessful.
*/
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
@@ -523,9 +524,9 @@ bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev);
#endif
/**
- * kbase_trigger_firmware_reload - Trigger the reboot of MCU firmware, for the
- * cold boot case firmware image would be
- * reloaded from filesystem into memory.
+ * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for
+ * the cold boot case firmware image would
+ * be reloaded from filesystem into memory.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
@@ -738,18 +739,18 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev);
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur);
/**
- * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU core power-off
+ * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU shader Core power-off
* time value
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
- * Return: the internally recorded MCU core power-off (nominal) value. The unit
+ * Return: the internally recorded MCU shader Core power-off (nominal) timeout value. The unit
* of the value is in micro-seconds.
*/
u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev);
/**
- * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU core power-off
+ * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU shader Core power-off
* time value
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
@@ -766,7 +767,7 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev);
* returned value is the source configuration flag, and it is set to '1'
* when CYCLE_COUNTER alternative source is used.
*
- * The configured MCU core power-off timer will only have effect when the host
+ * The configured MCU shader Core power-off timer will only have effect when the host
* driver has delegated the shader cores' power management to MCU.
*
* Return: the actual internal core power-off timer value in register defined
@@ -805,4 +806,6 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
* Return: 0 if success, or negative error code on failure.
*/
int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
+
+
#endif
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index b270c6e..ef8f328 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@
#include <mali_kbase.h>
#include "mali_kbase_csf_firmware_cfg.h"
#include <mali_kbase_reset_gpu.h>
+#include <linux/version.h>
#if CONFIG_SYSFS
#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config"
@@ -209,11 +210,18 @@ static struct attribute *fw_cfg_attrs[] = {
&fw_cfg_attr_cur,
NULL,
};
+#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE)
+ATTRIBUTE_GROUPS(fw_cfg);
+#endif
static struct kobj_type fw_cfg_kobj_type = {
.release = &fw_cfg_kobj_release,
.sysfs_ops = &fw_cfg_ops,
+#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE)
+ .default_groups = fw_cfg_groups,
+#else
.default_attrs = fw_cfg_attrs,
+#endif
};
int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
@@ -273,9 +281,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev)
}
int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
- const struct firmware *fw,
- const u32 *entry,
- unsigned int size, bool updatable)
+ const struct kbase_csf_mcu_fw *const fw,
+ const u32 *entry, unsigned int size, bool updatable)
{
const char *name = (char *)&entry[3];
struct firmware_config *config;
@@ -336,8 +343,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev)
}
int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
- const struct firmware *fw,
- const u32 *entry, unsigned int size)
+ const struct kbase_csf_mcu_fw *const fw,
+ const u32 *entry, unsigned int size)
{
return 0;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
index edf62ed..770fedb 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -67,10 +67,8 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev);
* Return: 0 if successful, negative error code on failure
*/
int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
- const struct firmware *fw,
- const u32 *entry,
- unsigned int size,
- bool updatable);
+ const struct kbase_csf_mcu_fw *const fw,
+ const u32 *entry, unsigned int size, bool updatable);
/**
* kbase_csf_firmware_cfg_find_config_address() - Get a FW config option address
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
new file mode 100644
index 0000000..bfcc6c8
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <csf/mali_kbase_csf_firmware_log.h>
+#include <csf/mali_kbase_csf_trace_buffer.h>
+#include <linux/debugfs.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)data;
+ struct firmware_trace_buffer *tb =
+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+
+ if (tb == NULL) {
+ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
+ return -EIO;
+ }
+ /* The enabled traces limited to u64 here, regarded practical */
+ *val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb);
+ return 0;
+}
+
+static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)data;
+ struct firmware_trace_buffer *tb =
+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+ u64 new_mask;
+ unsigned int enable_bits_count;
+
+ if (tb == NULL) {
+ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
+ return -EIO;
+ }
+
+ /* Ignore unsupported types */
+ enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb);
+ if (enable_bits_count > 64) {
+ dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
+ enable_bits_count = 64;
+ }
+ new_mask = val & ((1 << enable_bits_count) - 1);
+
+ if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
+ return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
+ else
+ return 0;
+}
+
+static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file)
+{
+ struct kbase_device *kbdev = in->i_private;
+
+ file->private_data = kbdev;
+ dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file");
+
+ return 0;
+}
+
+static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct kbase_device *kbdev = file->private_data;
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+ unsigned int n_read;
+ unsigned long not_copied;
+ /* Limit reads to the kernel dump buffer size */
+ size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE);
+ int ret;
+
+ struct firmware_trace_buffer *tb =
+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+
+ if (tb == NULL) {
+ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
+ return -EIO;
+ }
+
+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+ return -EBUSY;
+
+ /* Reading from userspace is only allowed in manual mode */
+ if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem);
+
+ /* Do the copy, if we have obtained some trace data */
+ not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0;
+
+ if (not_copied) {
+ dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer");
+ ret = -EFAULT;
+ goto out;
+ }
+
+ *ppos += n_read;
+ ret = n_read;
+
+out:
+ atomic_set(&fw_log->busy, 0);
+ return ret;
+}
+
+static int kbase_csf_firmware_log_mode_read(void *data, u64 *val)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)data;
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+ *val = fw_log->mode;
+ return 0;
+}
+
+static int kbase_csf_firmware_log_mode_write(void *data, u64 val)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)data;
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+ int ret = 0;
+
+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+ return -EBUSY;
+
+ if (val == fw_log->mode)
+ goto out;
+
+ switch (val) {
+ case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL:
+ cancel_delayed_work_sync(&fw_log->poll_work);
+ break;
+ case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT:
+ schedule_delayed_work(&fw_log->poll_work,
+ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fw_log->mode = val;
+
+out:
+ atomic_set(&fw_log->busy, 0);
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops,
+ kbase_csf_firmware_log_enable_mask_read,
+ kbase_csf_firmware_log_enable_mask_write, "%llx\n");
+
+static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = kbasep_csf_firmware_log_debugfs_open,
+ .read = kbasep_csf_firmware_log_debugfs_read,
+ .llseek = no_llseek,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read,
+ kbase_csf_firmware_log_mode_write, "%llu\n");
+
+#endif /* CONFIG_DEBUG_FS */
+
+static void kbase_csf_firmware_log_poll(struct work_struct *work)
+{
+ struct kbase_device *kbdev =
+ container_of(work, struct kbase_device, csf.fw_log.poll_work.work);
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+ schedule_delayed_work(&fw_log->poll_work,
+ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+
+ kbase_csf_firmware_log_dump_buffer(kbdev);
+}
+
+int kbase_csf_firmware_log_init(struct kbase_device *kbdev)
+{
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+ /* Add one byte for null-termination */
+ fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL);
+ if (fw_log->dump_buf == NULL)
+ return -ENOMEM;
+
+ /* Ensure null-termination for all strings */
+ fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0;
+
+ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL;
+
+ atomic_set(&fw_log->busy, 0);
+ INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
+
+#if defined(CONFIG_DEBUG_FS)
+ debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev,
+ &kbase_csf_firmware_log_enable_mask_fops);
+ debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
+ &kbasep_csf_firmware_log_debugfs_fops);
+ debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
+ &kbase_csf_firmware_log_mode_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+ return 0;
+}
+
+void kbase_csf_firmware_log_term(struct kbase_device *kbdev)
+{
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+ cancel_delayed_work_sync(&fw_log->poll_work);
+ kfree(fw_log->dump_buf);
+}
+
+void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
+{
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+ u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf;
+ unsigned int read_size, remaining_size;
+ struct firmware_trace_buffer *tb =
+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+
+ if (tb == NULL) {
+ dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
+ return;
+ }
+
+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+ return;
+
+ /* FW should only print complete messages, so there's no need to handle
+ * partial messages over multiple invocations of this function
+ */
+
+ p = buf;
+ pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE];
+
+ while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) {
+ pend = p + read_size;
+ p = buf;
+
+ while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
+ /* Null-terminate the string */
+ *pnewline = 0;
+
+ dev_err(kbdev->dev, "FW> %s", p);
+
+ p = pnewline + 1;
+ }
+
+ remaining_size = pend - p;
+
+ if (!remaining_size) {
+ p = buf;
+ } else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) {
+ /* Copy unfinished string to the start of the buffer */
+ memmove(buf, p, remaining_size);
+ p = &buf[remaining_size];
+ } else {
+ /* Print abnormally long string without newlines */
+ dev_err(kbdev->dev, "FW> %s", buf);
+ p = buf;
+ }
+ }
+
+ if (p != buf) {
+ /* Null-terminate and print last unfinished string */
+ *p = 0;
+ dev_err(kbdev->dev, "FW> %s", buf);
+ }
+
+ atomic_set(&fw_log->busy, 0);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.h b/mali_kbase/csf/mali_kbase_csf_firmware_log.h
new file mode 100644
index 0000000..6655f6f
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_FIRMWARE_LOG_H_
+#define _KBASE_CSF_FIRMWARE_LOG_H_
+
+#include <mali_kbase.h>
+
+/*
+ * Firmware log dumping buffer size.
+ */
+#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE
+
+/**
+ * kbase_csf_firmware_log_init - Initialize firmware log handling.
+ *
+ * @kbdev: Pointer to the Kbase device
+ *
+ * Return: The initialization error code.
+ */
+int kbase_csf_firmware_log_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_log_term - Terminate firmware log handling.
+ *
+ * @kbdev: Pointer to the Kbase device
+ */
+void kbase_csf_firmware_log_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log
+ * buffer and print it to dmesg.
+ *
+ * @kbdev: Pointer to the Kbase device
+ */
+void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 8a961a7..d03cf73 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@
#include "mali_kbase_csf_scheduler.h"
#include "mmu/mali_kbase_mmu.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -103,6 +104,7 @@ struct dummy_firmware_interface {
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
+
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -227,7 +229,8 @@ static int invent_capabilities(struct kbase_device *kbdev)
iface->version = 1;
iface->kbdev = kbdev;
iface->features = 0;
- iface->prfcnt_size = 64;
+ iface->prfcnt_size =
+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE);
if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
/* update rate=1, max event size = 1<<8 = 256 */
@@ -371,37 +374,6 @@ u32 kbase_csf_firmware_csg_output(
}
KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);
-static void
-csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface,
- const u32 glb_req)
-{
- struct kbase_device *kbdev = iface->kbdev;
- u32 glb_ack = output_page_read(iface->output, GLB_ACK);
- /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of
- * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters.
- */
- if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) {
- /* NO_MALI only uses the first buffer in the ring buffer. */
- input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0);
- output_page_write(iface->output, GLB_PRFCNT_INSERT, 1);
- kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE);
- }
-
- /* Propagate enable masks to model if request to enable. */
- if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) {
- u32 tiler_en, l2_en, sc_en;
-
- tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN);
- l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN);
- sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN);
-
- /* NO_MALI platform enabled all CSHW counters by default. */
- kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en);
- kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en);
- kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en);
- }
-}
-
void kbase_csf_firmware_global_input(
const struct kbase_csf_global_iface *const iface, const u32 offset,
const u32 value)
@@ -412,9 +384,17 @@ void kbase_csf_firmware_global_input(
input_page_write(iface->input, offset, value);
if (offset == GLB_REQ) {
- csf_firmware_prfcnt_process(iface, value);
- /* NO_MALI: Immediately acknowledge requests */
- output_page_write(iface->output, GLB_ACK, value);
+ /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE
+ * and PRFCNT_SAMPLE. These will be processed along with the
+ * corresponding performance counter registers when the global doorbell
+ * is rung in order to emulate the performance counter sampling behavior
+ * of the real firmware.
+ */
+ const u32 ack = output_page_read(iface->output, GLB_ACK);
+ const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK);
+ const u32 toggled = (value ^ ack) & req_mask;
+
+ output_page_write(iface->output, GLB_ACK, ack ^ toggled);
}
}
KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);
@@ -455,6 +435,99 @@ u32 kbase_csf_firmware_global_output(
KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
/**
+ * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request
+ *
+ * @kbdev: An instance of the GPU platform device
+ */
+static void csf_doorbell_prfcnt(struct kbase_device *kbdev)
+{
+ struct kbase_csf_global_iface *iface;
+ u32 req;
+ u32 ack;
+ u32 extract_index;
+
+ if (WARN_ON(!kbdev))
+ return;
+
+ iface = &kbdev->csf.global_iface;
+
+ req = input_page_read(iface->input, GLB_REQ);
+ ack = output_page_read(iface->output, GLB_ACK);
+ extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT);
+
+ /* Process enable bit toggle */
+ if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) {
+ if (req & GLB_REQ_PRFCNT_ENABLE_MASK) {
+ /* Reset insert index to zero on enable bit set */
+ output_page_write(iface->output, GLB_PRFCNT_INSERT, 0);
+ WARN_ON(extract_index != 0);
+ }
+ ack ^= GLB_REQ_PRFCNT_ENABLE_MASK;
+ }
+
+ /* Process sample request */
+ if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) {
+ const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET(
+ input_page_read(iface->input, GLB_PRFCNT_CONFIG));
+ u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT);
+
+ const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK;
+ const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK;
+
+ /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */
+ if (insert_index - extract_index >= ring_size) {
+ WARN_ON(insert_index - extract_index > ring_size);
+ if (!prev_overflow)
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK;
+ } else {
+ struct gpu_model_prfcnt_en enable_maps = {
+ .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN),
+ .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN),
+ .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN),
+ .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN),
+ };
+
+ const u64 prfcnt_base =
+ input_page_read(iface->input, GLB_PRFCNT_BASE_LO) +
+ ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32);
+
+ u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base +
+ (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE *
+ (insert_index % ring_size));
+
+ /* trigger sample dump in the dummy model */
+ gpu_model_prfcnt_dump_request(sample_base, enable_maps);
+
+ /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */
+ output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index);
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK;
+ }
+
+ /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */
+ if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2)))
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK;
+ }
+
+ /* Update GLB_ACK */
+ output_page_write(iface->output, GLB_ACK, ack);
+}
+
+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr)
+{
+ WARN_ON(doorbell_nr < 0);
+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
+
+ if (WARN_ON(!kbdev))
+ return;
+
+ if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) {
+ csf_doorbell_prfcnt(kbdev);
+ gpu_model_glb_request_job_irq(kbdev->model);
+ }
+}
+EXPORT_SYMBOL(kbase_csf_ring_doorbell);
+
+/**
* handle_internal_firmware_fatal - Handler for CS internal firmware fault.
*
* @kbdev: Pointer to kbase device
@@ -631,17 +704,16 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
+
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
- u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
- GLB_ACK_IRQ_MASK_PING_MASK |
- GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
- GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
- GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
- GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
- GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
- GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
- GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
+ u32 const ack_irq_mask =
+ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
+ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+ GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
+ 0;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -797,8 +869,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
spin_unlock(&kbdev->pm.clk_rtm.lock);
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
- "use cycle counter format with firmware idle hysteresis!");
+ dev_info(
+ kbdev->dev,
+ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
}
/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
@@ -914,8 +987,9 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
spin_unlock(&kbdev->pm.clk_rtm.lock);
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
- "use cycle counter with MCU Core Poweroff timer!");
+ dev_info(
+ kbdev->dev,
+ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
}
/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
@@ -939,7 +1013,14 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
{
- return kbdev->csf.mcu_core_pwroff_dur_us;
+ u32 pwroff;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return pwroff;
}
u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
@@ -952,7 +1033,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
+ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
return pwroff;
}
@@ -965,16 +1046,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
- kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
-#ifdef KBASE_PM_RUNTIME
- if (kbase_pm_gpu_sleep_allowed(kbdev))
- kbdev->csf.gpu_idle_hysteresis_ms /=
- FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
- WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
- kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
-
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@@ -987,7 +1058,21 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
return 0;
}
-int kbase_csf_firmware_init(struct kbase_device *kbdev)
+int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
+{
+ kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+ if (kbase_pm_gpu_sleep_allowed(kbdev))
+ kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+ kbdev->csf.gpu_idle_dur_count =
+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+
+ return 0;
+}
+
+int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
{
int ret;
@@ -1053,11 +1138,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
return 0;
error:
- kbase_csf_firmware_term(kbdev);
+ kbase_csf_firmware_unload_term(kbdev);
return ret;
}
-void kbase_csf_firmware_term(struct kbase_device *kbdev)
+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
{
cancel_work_sync(&kbdev->csf.fw_error_work);
@@ -1392,7 +1477,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
gpu_map_prot =
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
cpu_map_prot = pgprot_writecombine(cpu_map_prot);
- };
+ }
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
if (!phys)
@@ -1430,9 +1515,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
gpu_map_properties |= gpu_map_prot;
- ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
- va_reg->start_pfn, &phys[0], num_pages,
- gpu_map_properties, KBASE_MEM_GROUP_CSF_FW);
+ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
+ &phys[0], num_pages, gpu_map_properties,
+ KBASE_MEM_GROUP_CSF_FW, NULL);
if (ret)
goto mmu_insert_pages_error;
@@ -1493,3 +1578,4 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}
+
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 4b3931f..1876d50 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -154,8 +154,8 @@ u64 kbase_csf_heap_context_allocator_alloc(
struct kbase_csf_heap_context_allocator *const ctx_alloc)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
- u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
- BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE;
+ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
+ BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
u64 heap_gpu_va = 0;
@@ -164,10 +164,6 @@ u64 kbase_csf_heap_context_allocator_alloc(
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-#ifdef CONFIG_MALI_VECTOR_DUMP
- flags |= BASE_MEM_PROT_CPU_RD;
-#endif
-
mutex_lock(&ctx_alloc->lock);
/* If the pool of heap contexts wasn't already allocated then
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 2991060..1321d06 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -33,6 +33,10 @@
static DEFINE_SPINLOCK(kbase_csf_fence_lock);
#endif
+#ifdef CONFIG_MALI_FENCE_DEBUG
+#define FENCE_WAIT_TIMEOUT_MS 3000
+#endif
+
static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue,
bool drain_queue);
@@ -51,7 +55,7 @@ static int kbase_kcpu_map_import_prepare(
long i;
int ret = 0;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
/* Take the processes mmap lock */
down_read(kbase_mem_get_process_mmap_lock());
@@ -110,7 +114,7 @@ static int kbase_kcpu_unmap_import_prepare_internal(
struct kbase_va_region *reg;
int ret = 0;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
kbase_gpu_vm_lock(kctx);
@@ -178,7 +182,9 @@ static void kbase_jit_add_to_pending_alloc_list(
&kctx->csf.kcpu_queues.jit_blocked_queues;
struct kbase_kcpu_command_queue *blocked_queue;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
+
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(blocked_queue,
&kctx->csf.kcpu_queues.jit_blocked_queues,
@@ -194,6 +200,8 @@ static void kbase_jit_add_to_pending_alloc_list(
}
list_add_tail(&queue->jit_blocked, target_list_head);
+
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
}
/**
@@ -223,10 +231,12 @@ static int kbase_kcpu_jit_allocate_process(
u32 i;
int ret;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (alloc_info->blocked) {
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_del(&queue->jit_blocked);
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
alloc_info->blocked = false;
}
@@ -250,6 +260,7 @@ static int kbase_kcpu_jit_allocate_process(
bool can_block = false;
struct kbase_kcpu_command const *jit_cmd;
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) {
if (jit_cmd == cmd)
break;
@@ -268,6 +279,7 @@ static int kbase_kcpu_jit_allocate_process(
}
}
}
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
if (!can_block) {
/*
@@ -350,7 +362,7 @@ static int kbase_kcpu_jit_allocate_prepare(
int ret = 0;
u32 i;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
count > ARRAY_SIZE(kctx->jit_alloc)) {
@@ -388,8 +400,10 @@ static int kbase_kcpu_jit_allocate_prepare(
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_add_tail(&current_command->info.jit_alloc.node,
&kctx->csf.kcpu_queues.jit_cmds_head);
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
current_command->info.jit_alloc.info = info;
current_command->info.jit_alloc.count = count;
current_command->info.jit_alloc.blocked = false;
@@ -411,7 +425,9 @@ static void kbase_kcpu_jit_allocate_finish(
struct kbase_kcpu_command_queue *queue,
struct kbase_kcpu_command *cmd)
{
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
+
+ spin_lock(&queue->kctx->csf.kcpu_queues.jit_lock);
/* Remove this command from the jit_cmds_head list */
list_del(&cmd->info.jit_alloc.node);
@@ -425,6 +441,8 @@ static void kbase_kcpu_jit_allocate_finish(
cmd->info.jit_alloc.blocked = false;
}
+ spin_unlock(&queue->kctx->csf.kcpu_queues.jit_lock);
+
kfree(cmd->info.jit_alloc.info);
}
@@ -437,8 +455,6 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
{
struct kbase_kcpu_command_queue *blocked_queue;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-
/*
* Reschedule all queues blocked by JIT_ALLOC commands.
* NOTE: This code traverses the list of blocked queues directly. It
@@ -446,10 +462,10 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
* time. This precondition is true since we're holding the
* kbase_csf_kcpu_queue_context.lock .
*/
- list_for_each_entry(blocked_queue,
- &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
- kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker,
- &blocked_queue->work);
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
+ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
+ kthread_queue_work(&blocked_queue->csf_kcpu_worker, &blocked_queue->work);
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
}
static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
@@ -466,7 +482,7 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
if (WARN_ON(!ids))
return -EINVAL;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
queue);
@@ -498,16 +514,18 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
queue->kctx->kbdev, queue, item_err, pages_used);
}
- /* Free the list of ids */
- kfree(ids);
-
/*
* Remove this command from the jit_cmds_head list and retry pending
* allocations.
*/
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_del(&cmd->info.jit_free.node);
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
kbase_kcpu_jit_retry_pending_allocs(kctx);
+ /* Free the list of ids */
+ kfree(ids);
+
return rc;
}
@@ -523,7 +541,7 @@ static int kbase_kcpu_jit_free_prepare(
int ret;
u32 i;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
/* Sanity checks */
if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) {
@@ -569,8 +587,10 @@ static int kbase_kcpu_jit_free_prepare(
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
+ spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_add_tail(&current_command->info.jit_free.node,
&kctx->csf.kcpu_queues.jit_cmds_head);
+ spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
current_command->info.jit_free.ids = ids;
current_command->info.jit_free.count = count;
@@ -598,7 +618,7 @@ static int kbase_csf_queue_group_suspend_prepare(
int pinned_pages = 0, ret = 0;
struct kbase_va_region *reg;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (suspend_buf->size < csg_suspend_buf_size)
return -EINVAL;
@@ -700,10 +720,8 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
struct kbase_kcpu_command_queue *kcpu_queue =
(struct kbase_kcpu_command_queue *)param;
- struct kbase_context *const kctx = kcpu_queue->kctx;
- kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker,
- &kcpu_queue->work);
+ kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
return KBASE_CSF_EVENT_CALLBACK_KEEP;
}
@@ -733,7 +751,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
{
u32 i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_wait->objs))
return -EINVAL;
@@ -750,7 +768,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
queue);
queue->command_started = true;
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START,
queue, cqs_wait->nr_objs, 0);
}
@@ -772,7 +790,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
error = true;
}
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END,
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END,
queue, cqs_wait->objs[i].addr,
error);
@@ -801,7 +819,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
struct base_cqs_wait_info *objs;
unsigned int nr_objs = cqs_wait_info->nr_objs;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -855,7 +873,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
{
unsigned int i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_set->objs))
return;
@@ -879,7 +897,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
evt[BASEP_EVENT_VAL_INDEX]++;
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET,
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET,
queue, cqs_set->objs[i].addr,
evt[BASEP_EVENT_ERR_INDEX]);
}
@@ -896,11 +914,10 @@ static int kbase_kcpu_cqs_set_prepare(
struct base_kcpu_command_cqs_set_info *cqs_set_info,
struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
struct base_cqs_set *objs;
unsigned int nr_objs = cqs_set_info->nr_objs;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -950,7 +967,7 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
{
u32 i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_wait_operation->objs))
return -EINVAL;
@@ -1037,7 +1054,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
struct base_cqs_wait_operation_info *objs;
unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -1092,7 +1109,7 @@ static void kbase_kcpu_cqs_set_operation_process(
{
unsigned int i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_set_operation->objs))
return;
@@ -1159,11 +1176,10 @@ static int kbase_kcpu_cqs_set_operation_prepare(
struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info,
struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
struct base_cqs_set_operation_info *objs;
unsigned int nr_objs = cqs_set_operation_info->nr_objs;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -1202,12 +1218,15 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue;
struct kbase_context *const kctx = kcpu_queue->kctx;
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue,
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ /* Fence gets signaled. Deactivate the timer for fence-wait timeout */
+ del_timer(&kcpu_queue->fence_timeout);
+#endif
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
fence->context, fence->seqno);
/* Resume kcpu command queue processing. */
- kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker,
- &kcpu_queue->work);
+ kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
}
static void kbase_kcpu_fence_wait_cancel(
@@ -1216,7 +1235,7 @@ static void kbase_kcpu_fence_wait_cancel(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (WARN_ON(!fence_info->fence))
return;
@@ -1225,8 +1244,15 @@ static void kbase_kcpu_fence_wait_cancel(
bool removed = dma_fence_remove_callback(fence_info->fence,
&fence_info->fence_cb);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ /* Fence-wait cancelled or fence signaled. In the latter case
+ * the timer would already have been deactivated inside
+ * kbase_csf_fence_wait_callback().
+ */
+ del_timer_sync(&kcpu_queue->fence_timeout);
+#endif
if (removed)
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END,
kcpu_queue, fence_info->fence->context,
fence_info->fence->seqno);
}
@@ -1238,6 +1264,80 @@ static void kbase_kcpu_fence_wait_cancel(
fence_info->fence = NULL;
}
+#ifdef CONFIG_MALI_FENCE_DEBUG
+/**
+ * fence_timeout_callback() - Timeout callback function for fence-wait
+ *
+ * @timer: Timer struct
+ *
+ * Context and seqno of the timed-out fence will be displayed in dmesg.
+ * If the fence has been signalled a work will be enqueued to process
+ * the fence-wait without displaying debugging information.
+ */
+static void fence_timeout_callback(struct timer_list *timer)
+{
+ struct kbase_kcpu_command_queue *kcpu_queue =
+ container_of(timer, struct kbase_kcpu_command_queue, fence_timeout);
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+ struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset];
+ struct kbase_kcpu_command_fence_info *fence_info;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence;
+#else
+ struct dma_fence *fence;
+#endif
+ struct kbase_sync_fence_info info;
+
+ if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) {
+ dev_err(kctx->kbdev->dev,
+ "%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__,
+ cmd->type, kctx->tgid, kctx->id, kcpu_queue->id);
+ return;
+ }
+
+ fence_info = &cmd->info.fence;
+
+ fence = kbase_fence_get(fence_info);
+ if (!fence) {
+ dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid,
+ kctx->id, kcpu_queue->id);
+ return;
+ }
+
+ kbase_sync_fence_info_get(fence, &info);
+
+ if (info.status == 1) {
+ kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
+ } else if (info.status == 0) {
+ dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
+ FENCE_WAIT_TIMEOUT_MS);
+ dev_warn(kctx->kbdev->dev,
+ "ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s",
+ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name);
+ } else {
+ dev_warn(kctx->kbdev->dev, "fence has got error");
+ dev_warn(kctx->kbdev->dev,
+ "ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)",
+ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status);
+ }
+
+ kbase_fence_put(fence);
+}
+
+/**
+ * fence_timeout_start() - Start a timer to check fence-wait timeout
+ *
+ * @cmd: KCPU command queue
+ *
+ * Activate a timer to check whether a fence-wait command in the queue
+ * gets completed within FENCE_WAIT_TIMEOUT_MS
+ */
+static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd)
+{
+ mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
+}
+#endif
+
/**
* kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command
*
@@ -1257,8 +1357,9 @@ static int kbase_kcpu_fence_wait_process(
#else
struct dma_fence *fence;
#endif
+ struct kbase_context *const kctx = kcpu_queue->kctx;
- lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (WARN_ON(!fence_info->fence))
return -EINVAL;
@@ -1272,14 +1373,26 @@ static int kbase_kcpu_fence_wait_process(
&fence_info->fence_cb,
kbase_csf_fence_wait_callback);
- KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev,
- FENCE_WAIT_START, kcpu_queue,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
+ KCPU_FENCE_WAIT_START, kcpu_queue,
fence->context, fence->seqno);
fence_status = cb_err;
- if (cb_err == 0)
+ if (cb_err == 0) {
kcpu_queue->fence_wait_processed = true;
- else if (cb_err == -ENOENT)
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ fence_timeout_start(kcpu_queue);
+#endif
+ } else if (cb_err == -ENOENT) {
fence_status = dma_fence_get_status(fence);
+ if (!fence_status) {
+ struct kbase_sync_fence_info info;
+
+ kbase_sync_fence_info_get(fence, &info);
+ dev_warn(kctx->kbdev->dev,
+ "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u",
+ info.name, kctx->tgid, kctx->id, kcpu_queue->id);
+ }
+ }
}
/*
@@ -1302,7 +1415,6 @@ static int kbase_kcpu_fence_wait_prepare(
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_in;
#else
@@ -1310,7 +1422,7 @@ static int kbase_kcpu_fence_wait_prepare(
#endif
struct base_fence fence;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
sizeof(fence)))
@@ -1324,7 +1436,6 @@ static int kbase_kcpu_fence_wait_prepare(
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT;
current_command->info.fence.fence = fence_in;
current_command->info.fence.kcpu_queue = kcpu_queue;
-
return 0;
}
@@ -1341,14 +1452,16 @@ static int kbase_kcpu_fence_signal_process(
ret = dma_fence_signal(fence_info->fence);
if (unlikely(ret < 0)) {
- dev_warn(kctx->kbdev->dev,
- "fence_signal() failed with %d\n", ret);
+ dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret);
+ /* Treated as a success */
+ ret = 0;
}
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue,
fence_info->fence->context,
fence_info->fence->seqno);
+ /* dma_fence refcount needs to be decreased to release it. */
dma_fence_put(fence_info->fence);
fence_info->fence = NULL;
@@ -1360,7 +1473,6 @@ static int kbase_kcpu_fence_signal_prepare(
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_out;
#else
@@ -1371,7 +1483,7 @@ static int kbase_kcpu_fence_signal_prepare(
int ret = 0;
int fd;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
sizeof(fence)))
@@ -1399,9 +1511,6 @@ static int kbase_kcpu_fence_signal_prepare(
/* create a sync_file fd representing the fence */
sync_file = sync_file_create(fence_out);
if (!sync_file) {
-#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
- dma_fence_put(fence_out);
-#endif
ret = -ENOMEM;
goto file_create_fail;
}
@@ -1433,8 +1542,16 @@ static int kbase_kcpu_fence_signal_prepare(
fd_flags_fail:
fput(sync_file->file);
file_create_fail:
+ /*
+ * Upon failure, dma_fence refcount that was increased by
+ * dma_fence_get() or sync_file_create() needs to be decreased
+ * to release it.
+ */
dma_fence_put(fence_out);
+ current_command->info.fence.fence = NULL;
+ kfree(fence_out);
+
return ret;
}
#endif /* CONFIG_SYNC_FILE */
@@ -1444,11 +1561,9 @@ static void kcpu_queue_process_worker(struct kthread_work *data)
struct kbase_kcpu_command_queue *queue = container_of(data,
struct kbase_kcpu_command_queue, work);
- mutex_lock(&queue->kctx->csf.kcpu_queues.lock);
-
+ mutex_lock(&queue->lock);
kcpu_queue_process(queue, false);
-
- mutex_unlock(&queue->kctx->csf.kcpu_queues.lock);
+ mutex_unlock(&queue->lock);
}
static int delete_queue(struct kbase_context *kctx, u32 id)
@@ -1461,9 +1576,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
struct kbase_kcpu_command_queue *queue =
kctx->csf.kcpu_queues.array[id];
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE,
queue, queue->num_pending_cmds, queue->cqs_wait_count);
+ /* Disassociate the queue from the system to prevent further
+ * submissions. Draining pending commands would be acceptable
+ * even if a new queue is created using the same ID.
+ */
+ kctx->csf.kcpu_queues.array[id] = NULL;
+ bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
+
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
+
+ mutex_lock(&queue->lock);
+
/* Drain the remaining work for this queue first and go past
* all the waits.
*/
@@ -1475,17 +1601,16 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
/* All CQS wait commands should have been cleaned up */
WARN_ON(queue->cqs_wait_count);
- kctx->csf.kcpu_queues.array[id] = NULL;
- bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
-
/* Fire the tracepoint with the mutex held to enforce correct
* ordering with the summary stream.
*/
KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue);
- mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ mutex_unlock(&queue->lock);
+
+ kbase_destroy_kworker_stack(&queue->csf_kcpu_worker);
- kthread_cancel_work_sync(&queue->work);
+ mutex_destroy(&queue->lock);
kfree(queue);
} else {
@@ -1552,7 +1677,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
bool process_next = true;
size_t i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
for (i = 0; i != queue->num_pending_cmds; ++i) {
struct kbase_kcpu_command *cmd =
@@ -1971,13 +2096,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
mutex_lock(&kctx->csf.kcpu_queues.lock);
+ queue = kctx->csf.kcpu_queues.array[enq->id];
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
- if (!kctx->csf.kcpu_queues.array[enq->id]) {
- ret = -EINVAL;
- goto out;
- }
+ if (queue == NULL)
+ return -EINVAL;
- queue = kctx->csf.kcpu_queues.array[enq->id];
+ mutex_lock(&queue->lock);
if (kcpu_queue_get_space(queue) < enq->nr_commands) {
ret = -EBUSY;
@@ -1992,7 +2117,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
* for the possibility to roll back.
*/
- for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) {
+ for (i = 0; (i != enq->nr_commands) && !ret; ++i) {
struct kbase_kcpu_command *kcpu_cmd =
&queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)];
struct base_kcpu_command command;
@@ -2015,7 +2140,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
}
- kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds;
+ kcpu_cmd->enqueue_ts = atomic64_read(&kctx->csf.kcpu_queues.num_cmds);
switch (command.type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
#if IS_ENABLED(CONFIG_SYNC_FILE)
@@ -2086,6 +2211,8 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
ret = -EINVAL;
break;
}
+
+ atomic64_inc(&kctx->csf.kcpu_queues.num_cmds);
}
if (!ret) {
@@ -2102,15 +2229,14 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
queue->num_pending_cmds += enq->nr_commands;
- kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker,
- &queue->work);
+ kthread_queue_work(&queue->csf_kcpu_worker, &queue->work);
} else {
/* Roll back the number of enqueued commands */
- kctx->csf.kcpu_queues.num_cmds -= i;
+ atomic64_sub(i, &kctx->csf.kcpu_queues.num_cmds);
}
out:
- mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ mutex_unlock(&queue->lock);
return ret;
}
@@ -2124,20 +2250,9 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx)
for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx)
kctx->csf.kcpu_queues.array[idx] = NULL;
- kthread_init_worker(&kctx->csf.kcpu_queues.csf_kcpu_worker);
- kctx->csf.kcpu_queues.csf_kcpu_thread = kbase_create_realtime_thread(
- kctx->kbdev,
- kthread_worker_fn,
- &kctx->csf.kcpu_queues.csf_kcpu_worker,
- "mali_kbase_csf_kcpu");
-
- if (IS_ERR(kctx->csf.kcpu_queues.csf_kcpu_thread)) {
- return -ENOMEM;
- }
-
mutex_init(&kctx->csf.kcpu_queues.lock);
- kctx->csf.kcpu_queues.num_cmds = 0;
+ atomic64_set(&kctx->csf.kcpu_queues.num_cmds, 0);
return 0;
}
@@ -2155,9 +2270,6 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
(void)delete_queue(kctx, id);
}
- kthread_flush_worker(&kctx->csf.kcpu_queues.csf_kcpu_worker);
- kthread_stop(kctx->csf.kcpu_queues.csf_kcpu_thread);
-
mutex_destroy(&kctx->csf.kcpu_queues.lock);
}
@@ -2201,8 +2313,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
goto out;
}
+ ret = kbase_create_realtime_thread(
+ kctx->kbdev, kthread_worker_fn, &queue->csf_kcpu_worker, "mali_kbase_csf_kcpu_%i", idx);
+
+ if (ret) {
+ kfree(queue);
+ goto out;
+ }
+
bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1);
kctx->csf.kcpu_queues.array[idx] = queue;
+ mutex_init(&queue->lock);
queue->kctx = kctx;
queue->start_offset = 0;
queue->num_pending_cmds = 0;
@@ -2226,8 +2347,11 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
queue->num_pending_cmds);
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue,
queue->fence_context, 0);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
+#endif
out:
mutex_unlock(&kctx->csf.kcpu_queues.lock);
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 417a096..f982f56 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -47,9 +47,9 @@ struct kbase_kcpu_command_import_info {
* struct kbase_kcpu_command_fence_info - Structure which holds information
* about the fence object enqueued in the kcpu command queue
*
- * @fence_cb: Fence callback
- * @fence: Fence
- * @kcpu_queue: kcpu command queue
+ * @fence_cb: Fence callback
+ * @fence: Fence
+ * @kcpu_queue: kcpu command queue
*/
struct kbase_kcpu_command_fence_info {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
@@ -184,7 +184,7 @@ struct kbase_suspend_copy_buffer {
};
/**
- * struct base_kcpu_command_group_suspend - structure which contains
+ * struct kbase_kcpu_command_group_suspend_info - structure which contains
* suspend buffer data captured for a suspended queue group.
*
* @sus_buf: Pointer to the structure which contains details of the
@@ -198,7 +198,7 @@ struct kbase_kcpu_command_group_suspend_info {
/**
- * struct kbase_cpu_command - Command which is to be part of the kernel
+ * struct kbase_kcpu_command - Command which is to be part of the kernel
* command queue
*
* @type: Type of the command.
@@ -236,9 +236,12 @@ struct kbase_kcpu_command {
/**
* struct kbase_kcpu_command_queue - a command queue executed by the kernel
*
+ * @lock: Lock to protect accesses to this queue.
* @kctx: The context to which this command queue belongs.
* @commands: Array of commands which have been successfully
* enqueued to this command queue.
+ * @csf_kcpu_worker: Dedicated worker for processing kernel CPU command
+ * queues.
* @work: struct work_struct which contains a pointer to
* the function which handles processing of kcpu
* commands enqueued into a kcpu command queue;
@@ -271,10 +274,13 @@ struct kbase_kcpu_command {
* or without errors since last cleaned.
* @jit_blocked: Used to keep track of command queues blocked
* by a pending JIT allocation command.
+ * @fence_timeout: Timer used to detect the fence wait timeout.
*/
struct kbase_kcpu_command_queue {
+ struct mutex lock;
struct kbase_context *kctx;
struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
+ struct kthread_worker csf_kcpu_worker;
struct kthread_work work;
u8 start_offset;
u8 id;
@@ -287,6 +293,9 @@ struct kbase_kcpu_command_queue {
bool command_started;
struct list_head jit_blocked;
bool has_error;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ struct timer_list fence_timeout;
+#endif /* CONFIG_MALI_FENCE_DEBUG */
};
/**
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c
index 0a2cde0..fa87777 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,7 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
- * kbasep_csf_kcpu_debugfs_print_queue() - Print additional info for KCPU
+ * kbasep_csf_kcpu_debugfs_print_cqs_waits() - Print additional info for KCPU
* queues blocked on CQS wait commands.
*
* @file: The seq_file to print to
@@ -167,11 +167,7 @@ static const struct file_operations kbasep_csf_kcpu_debugfs_fops = {
void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx)
{
struct dentry *file;
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
return;
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index 99de444..6cbb4f0 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -387,7 +387,7 @@
/* CS_BASE register */
#define CS_BASE_POINTER_SHIFT 0
-#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT)
+#define CS_BASE_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_BASE_POINTER_SHIFT)
#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT)
#define CS_BASE_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK))
@@ -401,7 +401,8 @@
/* CS_TILER_HEAP_START register */
#define CS_TILER_HEAP_START_POINTER_SHIFT 0
-#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT)
+#define CS_TILER_HEAP_START_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_START_POINTER_SHIFT)
#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \
(((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT)
#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \
@@ -412,7 +413,8 @@
/* CS_TILER_HEAP_END register */
#define CS_TILER_HEAP_END_POINTER_SHIFT 0
-#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT)
+#define CS_TILER_HEAP_END_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_END_POINTER_SHIFT)
#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \
(((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT)
#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \
@@ -423,7 +425,7 @@
/* CS_USER_INPUT register */
#define CS_USER_INPUT_POINTER_SHIFT 0
-#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT)
+#define CS_USER_INPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_INPUT_POINTER_SHIFT)
#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT)
#define CS_USER_INPUT_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \
@@ -431,7 +433,7 @@
/* CS_USER_OUTPUT register */
#define CS_USER_OUTPUT_POINTER_SHIFT 0
-#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT)
+#define CS_USER_OUTPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_OUTPUT_POINTER_SHIFT)
#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT)
#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \
@@ -470,7 +472,8 @@
/* CS_INSTR_BUFFER_BASE register */
#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0)
-#define CS_INSTR_BUFFER_BASE_POINTER_MASK ((u64)0xFFFFFFFFFFFFFFFF << CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
+#define CS_INSTR_BUFFER_BASE_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \
(((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \
@@ -479,8 +482,8 @@
/* CS_INSTR_BUFFER_OFFSET_POINTER register */
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0)
-#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
- (((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
+ ((GPU_ULL(0xFFFFFFFFFFFFFFFF)) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \
(((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \
@@ -529,7 +532,8 @@
/* CS_STATUS_CMD_PTR register */
#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0
-#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT)
+#define CS_STATUS_CMD_PTR_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_CMD_PTR_POINTER_SHIFT)
#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \
(((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT)
#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \
@@ -543,6 +547,13 @@
#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \
(((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \
(((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK))
+#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16
+#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT)
+#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \
+ (((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT)
+#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \
+ (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \
+ (((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK))
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \
@@ -608,7 +619,8 @@
/* CS_STATUS_WAIT_SYNC_POINTER register */
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
-#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \
(((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \
@@ -694,6 +706,7 @@
(((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK))
/* CS_FATAL_EXCEPTION_TYPE values */
#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40
+#define CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE 0x41
#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44
#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48
#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49
@@ -709,7 +722,8 @@
/* CS_FAULT_INFO register */
#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0
-#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \
(((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \
@@ -718,7 +732,8 @@
/* CS_FATAL_INFO register */
#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0
-#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \
(((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \
@@ -750,7 +765,7 @@
/* CS_HEAP_ADDRESS register */
#define CS_HEAP_ADDRESS_POINTER_SHIFT 0
-#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT)
+#define CS_HEAP_ADDRESS_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_HEAP_ADDRESS_POINTER_SHIFT)
#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT)
#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \
@@ -761,14 +776,14 @@
/* CS_INSERT register */
#define CS_INSERT_VALUE_SHIFT 0
-#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT)
+#define CS_INSERT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSERT_VALUE_SHIFT)
#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT)
#define CS_INSERT_VALUE_SET(reg_val, value) \
(((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK))
/* CS_EXTRACT_INIT register */
#define CS_EXTRACT_INIT_VALUE_SHIFT 0
-#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT)
+#define CS_EXTRACT_INIT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_INIT_VALUE_SHIFT)
#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT)
#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \
(((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \
@@ -779,7 +794,7 @@
/* CS_EXTRACT register */
#define CS_EXTRACT_VALUE_SHIFT 0
-#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT)
+#define CS_EXTRACT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_VALUE_SHIFT)
#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT)
#define CS_EXTRACT_VALUE_SET(reg_val, value) \
(((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK))
@@ -932,7 +947,7 @@
/* CSG_SUSPEND_BUF register */
#define CSG_SUSPEND_BUF_POINTER_SHIFT 0
-#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_SUSPEND_BUF_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \
(((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \
@@ -940,7 +955,8 @@
/* CSG_PROTM_SUSPEND_BUF register */
#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0
-#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \
(((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \
@@ -1408,7 +1424,7 @@
/* GLB_ALLOC_EN register */
#define GLB_ALLOC_EN_MASK_SHIFT 0
-#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
+#define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT)
#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT)
#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \
(((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK))
@@ -1521,4 +1537,44 @@
(((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \
GLB_REQ_ITER_TRACE_ENABLE_MASK))
+/* GLB_PRFCNT_CONFIG register */
+#define GLB_PRFCNT_CONFIG_SIZE_SHIFT (0)
+#define GLB_PRFCNT_CONFIG_SIZE_MASK (0xFF << GLB_PRFCNT_CONFIG_SIZE_SHIFT)
+#define GLB_PRFCNT_CONFIG_SIZE_GET(reg_val) \
+ (((reg_val)&GLB_PRFCNT_CONFIG_SIZE_MASK) >> GLB_PRFCNT_CONFIG_SIZE_SHIFT)
+#define GLB_PRFCNT_CONFIG_SIZE_SET(reg_val, value) \
+ (((reg_val) & ~GLB_PRFCNT_CONFIG_SIZE_MASK) | \
+ (((value) << GLB_PRFCNT_CONFIG_SIZE_SHIFT) & GLB_PRFCNT_CONFIG_SIZE_MASK))
+#define GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT GPU_U(8)
+#define GLB_PRFCNT_CONFIG_SET_SELECT_MASK (GPU_U(0x3) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT)
+#define GLB_PRFCNT_CONFIG_SET_SELECT_GET(reg_val) \
+ (((reg_val)&GLB_PRFCNT_CONFIG_SET_SELECT_MASK) >> GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT)
+#define GLB_PRFCNT_CONFIG_SET_SELECT_SET(reg_val, value) \
+ (((reg_val) & ~GLB_PRFCNT_CONFIG_SET_SELECT_MASK) | \
+ (((value) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) & GLB_PRFCNT_CONFIG_SET_SELECT_MASK))
+
+/* GLB_PRFCNT_SIZE register */
+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) ((value) >> 8)
+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(value) ((value) << 8)
+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT GPU_U(0)
+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT)
+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(reg_val) \
+ (GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) >> \
+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT))
+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(reg_val, value) \
+ (((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \
+ ((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \
+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK))
+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) ((value) >> 8)
+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(value) ((value) << 8)
+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT GPU_U(16)
+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT)
+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \
+ (GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \
+ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT))
+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \
+ (((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \
+ ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \
+ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK))
+
#endif /* _KBASE_CSF_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index 1c5dbc9..108e734 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,7 +29,7 @@
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_reset_gpu.h>
-#include <linux/string.h>
+#include <csf/mali_kbase_csf_firmware_log.h>
enum kbasep_soft_reset_status {
RESET_SUCCESS = 0,
@@ -257,68 +257,6 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)));
}
-static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
-{
- u8 *buf, *p, *pnewline, *pend, *pendbuf;
- unsigned int read_size, remaining_size;
- struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
-
- if (tb == NULL) {
- dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
- return;
- }
-
- buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
- if (buf == NULL) {
- dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
- return;
- }
-
- buf[PAGE_SIZE] = 0;
-
- p = buf;
- pendbuf = &buf[PAGE_SIZE];
-
- dev_err(kbdev->dev, "Firmware trace buffer dump:");
- while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
- pendbuf - p))) {
- pend = p + read_size;
- p = buf;
-
- while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
- /* Null-terminate the string */
- *pnewline = 0;
-
- dev_err(kbdev->dev, "FW> %s", p);
-
- p = pnewline + 1;
- }
-
- remaining_size = pend - p;
-
- if (!remaining_size) {
- p = buf;
- } else if (remaining_size < PAGE_SIZE) {
- /* Copy unfinished string to the start of the buffer */
- memmove(buf, p, remaining_size);
- p = &buf[remaining_size];
- } else {
- /* Print abnormal page-long string without newlines */
- dev_err(kbdev->dev, "FW> %s", buf);
- p = buf;
- }
- }
-
- if (p != buf) {
- /* Null-terminate and print last unfinished string */
- *p = 0;
- dev_err(kbdev->dev, "FW> %s", buf);
- }
-
- kfree(buf);
-}
-
/**
* kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the
* event of an error during GPU reset.
@@ -389,7 +327,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
if (!silent) {
kbase_csf_debug_dump_registers(kbdev);
if (likely(firmware_inited))
- kbase_csf_dump_firmware_trace_buffer(kbdev);
+ kbase_csf_firmware_log_dump_buffer(kbdev);
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 9924ab5..905923a 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -33,6 +33,7 @@
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
#include <trace/events/power.h>
+#include "mali_kbase_csf_tiler_heap.h"
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -85,6 +86,21 @@
/* A GPU address space slot is reserved for MCU. */
#define NUM_RESERVED_AS_SLOTS (1)
+/* Heap deferral time in ms from a CSG suspend to be included in reclaim scan list. The
+ * value corresponds to realtime priority CSGs. Other priorites are of derived time value
+ * from this, with the realtime case the highest delay.
+ */
+#define HEAP_RECLAIM_PRIO_DEFERRAL_MS (1000)
+
+/* Additional heap deferral time in ms if a CSG suspended is in state of WAIT_SYNC */
+#define HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS (200)
+
+/* Tiler heap reclaim count size for limiting a count run length */
+#define HEAP_RECLAIM_COUNT_BATCH_SIZE (HEAP_SHRINKER_BATCH << 6)
+
+/* Tiler heap reclaim scan (free) method size for limiting a scan run length */
+#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7)
+
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
static
@@ -298,7 +314,8 @@ static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
goto out;
}
- if (suspend_active_groups_on_powerdown(kbdev, true))
+ ret = suspend_active_groups_on_powerdown(kbdev, true);
+ if (ret)
goto out;
kbase_pm_lock(kbdev);
@@ -346,7 +363,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
csf.scheduler.tick_timer);
- kbase_csf_scheduler_advance_tick(kbdev);
+ kbase_csf_scheduler_tick_advance(kbdev);
return HRTIMER_NORESTART;
}
@@ -553,7 +570,7 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
for (j = 0; j < max_streams; ++j) {
struct kbase_queue *const queue = group->bound_queues[j];
- if (queue) {
+ if (queue && queue->user_io_addr) {
u64 const *const output_addr =
(u64 const *)(queue->user_io_addr + PAGE_SIZE);
@@ -589,7 +606,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
if (!non_idle_offslot_grps) {
@@ -614,7 +631,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
#endif
} else {
/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
- kbase_csf_scheduler_advance_tick_nolock(kbdev);
+ kbase_csf_scheduler_tick_advance_nolock(kbdev);
}
return ack_gpu_idle_event;
@@ -686,6 +703,12 @@ static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
return (group->run_state == KBASE_CSF_GROUP_IDLE);
}
+static bool can_schedule_idle_group(struct kbase_queue_group *group)
+{
+ return (on_slot_group_idle_locked(group) ||
+ (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
+}
+
static bool queue_group_scheduled(struct kbase_queue_group *group)
{
return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
@@ -701,34 +724,39 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
}
/**
- * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
+ * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
*
* @kbdev: Pointer to the GPU device
*
* This function waits for the GPU to exit protected mode which is confirmed
* when active_protm_grp is set to NULL.
+ *
+ * Return: true on success, false otherwise.
*/
-static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
+static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
long remaining;
+ bool success = true;
lockdep_assert_held(&scheduler->lock);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL,
- jiffies_to_msecs(wt));
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
remaining = wait_event_timeout(kbdev->csf.event_wait,
!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
- if (!remaining)
+ if (!remaining) {
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms);
+ success = false;
+ }
+
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
- jiffies_to_msecs(remaining));
+ return success;
}
/**
@@ -738,13 +766,39 @@ static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
*
* This function sends a ping request to the firmware and waits for the GPU
* to exit protected mode.
+ *
+ * If the GPU does not exit protected mode, it is considered as hang.
+ * A GPU reset would then be triggered.
*/
static void scheduler_force_protm_exit(struct kbase_device *kbdev)
{
+ unsigned long flags;
+
lockdep_assert_held(&kbdev->csf.scheduler.lock);
kbase_csf_firmware_ping(kbdev);
- scheduler_wait_protm_quit(kbdev);
+
+ if (scheduler_protm_wait_quit(kbdev))
+ return;
+
+ dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
+
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+ if (kbdev->csf.scheduler.active_protm_grp) {
+ dev_err(kbdev->dev,
+ "Group-%d of context %d_%d ran in protected mode for too long on slot %d",
+ kbdev->csf.scheduler.active_protm_grp->handle,
+ kbdev->csf.scheduler.active_protm_grp->kctx->tgid,
+ kbdev->csf.scheduler.active_protm_grp->kctx->id,
+ kbdev->csf.scheduler.active_protm_grp->csg_nr);
+ }
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
+
+ /* The GPU could be stuck in Protected mode. To prevent a hang,
+ * a GPU reset is performed.
+ */
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kbdev);
}
/**
@@ -1116,8 +1170,8 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group)
return;
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
+ new_val);
}
int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
@@ -1246,11 +1300,10 @@ static int halt_stream_sync(struct kbase_queue *queue)
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
-
/* Timed wait */
remaining = wait_event_timeout(kbdev->csf.event_wait,
(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
@@ -1321,8 +1374,7 @@ static int sched_halt_stream(struct kbase_queue *queue)
long remaining;
int slot;
int err = 0;
- const u32 group_schedule_timeout =
- 20 * kbdev->csf.scheduler.csg_scheduling_period_ms;
+ const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
if (WARN_ON(!group))
return -EINVAL;
@@ -1736,8 +1788,8 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
group->run_state);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group,
- queue, queue->status_wait);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
+ queue->status_wait);
if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
err = -EIO;
@@ -1789,9 +1841,9 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
start_stream_sync(queue);
}
}
- queue_delayed_work(system_long_wq,
- &kbdev->csf.scheduler.ping_work,
- msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
+ queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
+ msecs_to_jiffies(kbase_get_timeout_ms(
+ kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
}
@@ -1826,7 +1878,8 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status(
slot_state = CSG_SLOT_RUNNING;
atomic_set(&csg_slot->state, slot_state);
csg_slot->trigger_jiffies = jiffies;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
+ state);
dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
csg_slot->resident_group->handle, slot);
}
@@ -1942,7 +1995,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
flags);
atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
csg_slot[slot].trigger_jiffies = jiffies;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
@@ -1985,10 +2038,10 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
&mapping);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group,
- queue, queue->sync_ptr);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON,
- queue->group, queue, queue->blocked_reason);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
+ queue->sync_ptr);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
+ queue->blocked_reason);
if (!sync_ptr) {
dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
@@ -2003,11 +2056,11 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
(sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
sync_current_val = READ_ONCE(*sync_ptr);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group,
- queue, sync_current_val);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
+ sync_current_val);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group,
- queue, queue->sync_value);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
+ queue->sync_value);
if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
(sync_current_val > queue->sync_value)) ||
@@ -2024,8 +2077,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
out:
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED,
- queue->group, queue, updated);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
return updated;
}
@@ -2059,8 +2111,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
queue->saved_cmd_ptr = cmd_ptr;
#endif
- KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
- queue->group, queue, status);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
+ queue, status);
if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
queue->status_wait = status;
@@ -2114,12 +2166,10 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
* of work needs to be enforced in situation such as entering into
* protected mode).
*/
- if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) &&
- !scheduler->tock_pending_request) {
- scheduler->tock_pending_request = true;
+ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
dev_dbg(kbdev->dev, "Kicking async for group %d\n",
group->handle);
- kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+ kbase_csf_scheduler_invoke_tock(kbdev);
}
}
@@ -2146,7 +2196,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
list_add_tail(&group->link,
&kctx->csf.sched.runnable_groups[group->priority]);
kctx->csf.sched.num_runnable_grps++;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
kctx->csf.sched.num_runnable_grps);
/* Add the kctx if not yet in runnable kctxs */
@@ -2154,7 +2204,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
/* First runnable csg, adds to the runnable_kctxs */
INIT_LIST_HEAD(&kctx->csf.link);
list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
}
scheduler->total_runnable_grps++;
@@ -2211,7 +2261,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kctx->kbdev);
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_EXIT_PROTM,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
scheduler->active_protm_grp, 0u);
scheduler->active_protm_grp = NULL;
}
@@ -2241,13 +2291,12 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
}
kctx->csf.sched.num_runnable_grps--;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
kctx->csf.sched.num_runnable_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp,
- 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
if (kctx->csf.sched.num_runnable_grps == 0) {
struct kbase_context *new_head_kctx;
@@ -2256,13 +2305,11 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
list_del_init(&kctx->csf.link);
if (scheduler->top_ctx == kctx)
scheduler->top_ctx = NULL;
- KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx,
- 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
new_head_kctx = (!list_empty(kctx_list)) ?
list_first_entry(kctx_list, struct kbase_context, csf.link) :
NULL;
- KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE,
- new_head_kctx, 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
}
WARN_ON(scheduler->total_runnable_grps == 0);
@@ -2289,7 +2336,7 @@ static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
kctx->csf.sched.num_idle_wait_grps++;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
kctx->csf.sched.num_idle_wait_grps);
group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
dev_dbg(kctx->kbdev->dev,
@@ -2310,13 +2357,12 @@ static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
list_del_init(&group->link);
WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
kctx->csf.sched.num_idle_wait_grps--;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
kctx->csf.sched.num_idle_wait_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT,
- new_head_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
}
@@ -2342,8 +2388,7 @@ static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
@@ -2359,8 +2404,7 @@ static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
@@ -2380,15 +2424,15 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
+ group, new_val);
}
} else {
if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
+ group, new_val);
}
}
} else {
@@ -2396,8 +2440,8 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
+ new_val);
}
}
}
@@ -2436,6 +2480,145 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
return cs_idle;
}
+static void detach_from_sched_reclaim_mgr(struct kbase_context *kctx)
+{
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (!list_empty(&heap_info->mgr_link)) {
+ WARN_ON(!heap_info->flags);
+ list_del_init(&heap_info->mgr_link);
+
+ if (heap_info->flags & CSF_CTX_RECLAIM_CANDI_FLAG)
+ WARN_ON(atomic_sub_return(heap_info->nr_est_pages,
+ &scheduler->reclaim_mgr.est_cand_pages) < 0);
+ if (heap_info->flags & CSF_CTX_RECLAIM_SCAN_FLAG)
+ WARN_ON(atomic_sub_return(heap_info->nr_scan_pages,
+ &scheduler->reclaim_mgr.mgr_scan_pages) < 0);
+
+ dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_detach: ctx_%d_%d, flags = 0x%x\n",
+ kctx->tgid, kctx->id, heap_info->flags);
+ /* Clear on detaching */
+ heap_info->nr_est_pages = 0;
+ heap_info->nr_scan_pages = 0;
+ heap_info->flags = 0;
+ }
+}
+
+static void attach_to_sched_reclaim_mgr(struct kbase_context *kctx)
+{
+ struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info;
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (WARN_ON(!list_empty(&heap_info->mgr_link)))
+ list_del_init(&heap_info->mgr_link);
+
+ list_add_tail(&heap_info->mgr_link, &scheduler->reclaim_mgr.candidate_ctxs);
+
+ /* Read the kctx's tiler heap estimate of pages, this separates it away
+ * from the kctx's tiler heap side updates/changes. The value remains static
+ * for the duration of this kctx on the reclaim manager's candidate_ctxs list.
+ */
+ heap_info->nr_est_pages = (u32)atomic_read(&kctx->csf.tiler_heaps.est_count_pages);
+ atomic_add(heap_info->nr_est_pages, &scheduler->reclaim_mgr.est_cand_pages);
+
+ heap_info->attach_jiffies = jiffies;
+ heap_info->flags = CSF_CTX_RECLAIM_CANDI_FLAG;
+
+ dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages = %u\n",
+ kctx->tgid, kctx->id, heap_info->nr_est_pages);
+}
+
+static void update_kctx_heap_info_on_grp_on_slot(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
+
+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
+
+ heap_info->on_slot_grps++;
+ /* If the kctx transitioned on-slot CSGs: 0 => 1, detach the kctx scheduler->reclaim_mgr */
+ if (heap_info->on_slot_grps == 1) {
+ dev_dbg(kctx->kbdev->dev,
+ "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager\n",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ detach_from_sched_reclaim_mgr(kctx);
+ }
+}
+
+static void update_kctx_heap_info_on_grp_evict(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info;
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ const u32 num_groups = kctx->kbdev->csf.global_iface.group_num;
+ u32 on_slot_grps = 0;
+ u32 i;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ /* Group eviction from the scheduler is a bit more complex, but fairly less
+ * frequent in operations. Taking the opportunity to actually count the
+ * on-slot CSGs from the given kctx, for robustness and clearer code logic.
+ */
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
+ struct kbase_queue_group *grp = csg_slot->resident_group;
+
+ if (unlikely(!grp))
+ continue;
+
+ if (grp->kctx == kctx)
+ on_slot_grps++;
+ }
+
+ heap_info->on_slot_grps = on_slot_grps;
+
+ /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */
+ if (!heap_info->on_slot_grps) {
+ if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) {
+ /* The kctx has other operational CSGs, attach it if not yet done */
+ if (list_empty(&heap_info->mgr_link)) {
+ dev_dbg(kctx->kbdev->dev,
+ "CSG_%d_%d_%d evict, add kctx to reclaim manager\n",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ attach_to_sched_reclaim_mgr(kctx);
+ }
+ } else {
+ /* The kctx is a zombie after the group eviction, drop it out */
+ dev_dbg(kctx->kbdev->dev,
+ "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager\n",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ detach_from_sched_reclaim_mgr(kctx);
+ }
+ }
+}
+
+static void update_kctx_heap_info_on_grp_suspend(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
+
+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
+
+ if (!WARN_ON(heap_info->on_slot_grps == 0))
+ heap_info->on_slot_grps--;
+ /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */
+ if (heap_info->on_slot_grps == 0) {
+ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager\n",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ attach_to_sched_reclaim_mgr(kctx);
+ }
+}
+
static void save_csg_slot(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
@@ -2506,6 +2689,7 @@ static void save_csg_slot(struct kbase_queue_group *group)
}
update_offslot_non_idle_cnt_on_grp_suspend(group);
+ update_kctx_heap_info_on_grp_suspend(group);
}
}
@@ -2640,7 +2824,7 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
group->handle, group->kctx->tgid, group->kctx->id, slot,
prev_prio, prio);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
}
@@ -2790,12 +2974,13 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
group->handle, kctx->tgid, kctx->id, slot, prio);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group,
- (((u64)ep_cfg) << 32) |
- ((((u32)kctx->as_nr) & 0xF) << 16) |
- (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
+ (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
+ (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
kbasep_platform_event_work_begin(group);
+ /* Update the heap reclaim manager */
+ update_kctx_heap_info_on_grp_on_slot(group);
/* Programming a slot consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
@@ -2835,8 +3020,8 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
+ new_val);
}
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
@@ -2860,14 +3045,16 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
if (fault)
group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group,
- (((u64)scheduler->total_runnable_grps) << 32) |
- ((u32)group->run_state));
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
+ (((u64)scheduler->total_runnable_grps) << 32) |
+ ((u32)group->run_state));
dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
group->handle, scheduler->total_runnable_grps);
/* Notify a group has been evicted */
wake_up_all(&kbdev->csf.event_wait);
}
+
+ update_kctx_heap_info_on_grp_evict(group);
}
static int term_group_sync(struct kbase_queue_group *group)
@@ -2879,7 +3066,8 @@ static int term_group_sync(struct kbase_queue_group *group)
term_csg_slot(group);
remaining = wait_event_timeout(kbdev->csf.event_wait,
- csg_slot_stopped_locked(kbdev, group->csg_nr), remaining);
+ group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
+ remaining);
if (!remaining) {
dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
@@ -2900,6 +3088,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ bool wait_for_termination = true;
bool on_slot;
kbase_reset_gpu_assert_failed_or_prevented(kbdev);
@@ -2914,39 +3103,28 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
#ifdef KBASE_PM_RUNTIME
/* If the queue group is on slot and Scheduler is in SLEEPING state,
- * then we need to wait here for Scheduler to exit the sleep state
- * (i.e. wait for the runtime suspend or power down of GPU). This would
- * be better than aborting the power down. The group will be suspended
- * anyways on power down, so won't have to send the CSG termination
- * request to FW.
+ * then we need to wake up the Scheduler to exit the sleep state rather
+ * than waiting for the runtime suspend or power down of GPU.
+ * The group termination is usually triggered in the context of Application
+ * thread and it has been seen that certain Apps can destroy groups at
+ * random points and not necessarily when the App is exiting.
*/
if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
- if (wait_for_scheduler_to_exit_sleep(kbdev)) {
+ scheduler_wakeup(kbdev, true);
+
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
dev_warn(
kbdev->dev,
- "Wait for scheduler to exit sleep state timedout when terminating group %d of context %d_%d on slot %d",
+ "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
-
- scheduler_wakeup(kbdev, true);
-
- /* Wait for MCU firmware to start running */
- if (kbase_csf_scheduler_wait_mcu_active(kbdev))
- dev_warn(
- kbdev->dev,
- "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
- kbase_backend_get_cycle_cnt(kbdev),
- group->handle, group->kctx->tgid,
- group->kctx->id, group->csg_nr);
+ /* No point in waiting for CSG termination if MCU didn't
+ * become active.
+ */
+ wait_for_termination = false;
}
-
- /* Check the group state again as scheduler lock would have been
- * released when waiting for the exit from SLEEPING state.
- */
- if (!queue_group_scheduled_locked(group))
- goto unlock;
-
- on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
}
#endif
if (!on_slot) {
@@ -2954,7 +3132,11 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
} else {
bool as_faulty;
- term_group_sync(group);
+ if (likely(wait_for_termination))
+ term_group_sync(group);
+ else
+ term_csg_slot(group);
+
/* Treat the csg been terminated */
as_faulty = cleanup_csg_slot(group);
/* remove from the scheduler list */
@@ -3013,6 +3195,8 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
if (protm_grp && protm_grp != group) {
clear_bit((unsigned int)group->csg_nr,
scheduler->csg_slots_idle_mask);
+ /* Request the update to confirm the condition inferred. */
+ group->reevaluate_idle_status = true;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
}
@@ -3039,8 +3223,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
/* A new group into the scheduler */
new_val = atomic_inc_return(
&kbdev->csf.scheduler.non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
}
/* Since a group has become active now, check if GPU needs to be
@@ -3706,7 +3889,7 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
*/
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
- KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u);
for (slot = 0; slot < num_groups; slot++) {
group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
if (group && group->kctx == kctx) {
@@ -3783,8 +3966,8 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
struct kbase_queue *queue = group->bound_queues[i];
clear_bit(i, group->protm_pending_bitmap);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group,
- queue, group->protm_pending_bitmap[0]);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue,
+ group->protm_pending_bitmap[0]);
if (!WARN_ON(!queue) && queue->enabled) {
struct kbase_csf_cmd_stream_info *stream =
@@ -3820,6 +4003,42 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
}
/**
+ * protm_enter_set_next_pending_seq - Update the scheduler's field of
+ * tick_protm_pending_seq to that from the next available on-slot protm
+ * pending CSG.
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * If applicable, the function updates the scheduler's tick_protm_pending_seq
+ * field from the next available on-slot protm pending CSG. If not, the field
+ * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID.
+ */
+static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u32 num_groups = kbdev->csf.global_iface.group_num;
+ u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
+ u32 i;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
+ num_groups);
+ /* Reset the tick's pending protm seq number to invalid initially */
+ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
+ for_each_set_bit(i, active_csgs, num_groups) {
+ struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
+
+ /* Set to the next pending protm group's scan_seq_number */
+ if ((group != scheduler->active_protm_grp) &&
+ (!bitmap_empty(group->protm_pending_bitmap, num_csis)) &&
+ (group->scan_seq_num < scheduler->tick_protm_pending_seq))
+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
+ }
+}
+
+/**
* scheduler_group_check_protm_enter - Request the given group to be evaluated
* for triggering the protected mode.
*
@@ -3842,6 +4061,12 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
lockdep_assert_held(&scheduler->lock);
+ /* This lock is taken to prevent the issuing of MMU command during the
+ * transition to protected mode. This helps avoid the scenario where the
+ * entry to protected mode happens with a memory region being locked and
+ * the same region is then accessed by the GPU in protected mode.
+ */
+ mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
/* Check if the previous transition to enter & exit the protected
@@ -3849,8 +4074,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
*/
protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
kbdev->protected_mode;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
- protm_in_use);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use);
/* Firmware samples the PROTM_PEND ACK bit for CSs when
* Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
@@ -3890,22 +4114,62 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
/* Switch to protected mode */
scheduler->active_protm_grp = input_grp;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
- input_grp, 0u);
- /* Reset the tick's pending protm seq number */
- scheduler->tick_protm_pending_seq =
- KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
+ 0u);
kbase_csf_enter_protected_mode(kbdev);
+ /* Set the pending protm seq number to the next one */
+ protm_enter_set_next_pending_seq(kbdev);
+
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
kbase_csf_wait_protected_mode_enter(kbdev);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+
+ scheduler->protm_enter_time = ktime_get_raw();
+
return;
}
}
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+}
+
+/**
+ * scheduler_check_pmode_progress - Check if protected mode execution is progressing
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * This function is called when the GPU is in protected mode.
+ *
+ * It will check if the time spent in protected mode is less
+ * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT
+ * request is sent to the FW.
+ */
+static void scheduler_check_pmode_progress(struct kbase_device *kbdev)
+{
+ u64 protm_spent_time_ms;
+ u64 protm_progress_timeout =
+ kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT);
+ s64 diff_ms_signed =
+ ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time);
+
+ if (diff_ms_signed < 0)
+ return;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ protm_spent_time_ms = (u64)diff_ms_signed;
+ if (protm_spent_time_ms < protm_progress_timeout)
+ return;
+
+ dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu",
+ protm_spent_time_ms, protm_progress_timeout);
+
+ /* Prompt the FW to exit protected mode */
+ scheduler_force_protm_exit(kbdev);
}
static void scheduler_apply(struct kbase_device *kbdev)
@@ -4021,7 +4285,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
}
if (queue_group_idle_locked(group)) {
- if (on_slot_group_idle_locked(group))
+ if (can_schedule_idle_group(group))
list_add_tail(&group->link_to_schedule,
&scheduler->idle_groups_to_schedule);
continue;
@@ -4107,10 +4371,9 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev)
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE,
- top_grp, top_ctx->csf.sched.num_runnable_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE,
- new_head_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp,
+ top_ctx->csf.sched.num_runnable_grps);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
dev_dbg(kbdev->dev,
"groups rotated for a context, num_runnable_groups: %u\n",
scheduler->top_ctx->csf.sched.num_runnable_grps);
@@ -4141,13 +4404,12 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
struct kbase_context *new_head_kctx;
list_move_tail(&pos->csf.link, list);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
- 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u);
new_head_kctx = (!list_empty(list)) ?
list_first_entry(list, struct kbase_context, csf.link) :
NULL;
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE,
- new_head_kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx,
+ 0u);
dev_dbg(kbdev->dev, "contexts rotated\n");
}
}
@@ -4162,12 +4424,17 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
* @kbdev: Pointer to the GPU device.
* @csg_bitmap: Bitmap of the CSG slots for which
* the status update request completed successfully.
- * @failed_csg_bitmap: Bitmap of the CSG slots for which
+ * @failed_csg_bitmap: Bitmap of the idle CSG slots for which
* the status update request timedout.
*
* This function sends a CSG status update request for all the CSG slots
- * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
- * request to complete.
+ * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if
+ * the group's 'reevaluate_idle_status' field is set, the nominally non-idle
+ * slots are also included in the status update for a confirmation of their
+ * status. The function wait for the status update request to complete and
+ * returns the update completed slots bitmap and any timed out idle-flagged
+ * slots bitmap.
+ *
* The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
* this function.
*/
@@ -4179,6 +4446,7 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
unsigned long flags, i;
+ u32 active_chk = 0;
lockdep_assert_held(&scheduler->lock);
@@ -4190,6 +4458,7 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
struct kbase_csf_cmd_stream_group_info *const ginfo =
&global_iface->groups[i];
u32 csg_req;
+ bool idle_flag;
if (WARN_ON(!group)) {
clear_bit(i, scheduler->csg_inuse_bitmap);
@@ -4197,30 +4466,47 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
continue;
}
- if (test_bit(i, scheduler->csg_slots_idle_mask)) {
- clear_bit(i, scheduler->csg_slots_idle_mask);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
- scheduler->csg_slots_idle_mask[0]);
+ idle_flag = test_bit(i, scheduler->csg_slots_idle_mask);
+ if (idle_flag || group->reevaluate_idle_status) {
+ if (idle_flag) {
+#ifdef CONFIG_MALI_DEBUG
+ if (!bitmap_empty(group->protm_pending_bitmap,
+ ginfo->stream_num)) {
+ dev_warn(kbdev->dev,
+ "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, (int)i);
+ }
+#endif
+ clear_bit(i, scheduler->csg_slots_idle_mask);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
+ scheduler->csg_slots_idle_mask[0]);
+ } else {
+ /* Updates include slots for which reevaluation is needed.
+ * Here one tracks the extra included slots in active_chk.
+ * For protm pending slots, their status of activeness are
+ * assured so no need to request an update.
+ */
+ active_chk |= BIT(i);
+ group->reevaluate_idle_status = false;
+ }
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i);
-
csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
CSG_REQ_STATUS_UPDATE_MASK);
- set_bit(i, csg_bitmap);
- } else if (group->run_state == KBASE_CSF_GROUP_IDLE) {
- /* In interrupt context, some previously 'nominal' idle
- * on-slot group could have been de-idled. Its idle flag may
- * have been cleared, mark the correct run_state for the next
- * tick/tock cycle here in the scheduler process context.
+ /* Track the slot update requests in csg_bitmap.
+ * Note, if the scheduler requested extended update, the resulting
+ * csg_bitmap would be the idle_flags + active_chk. Otherwise it's
+ * identical to the idle_flags.
*/
+ set_bit(i, csg_bitmap);
+ } else {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
}
}
- /* All the idle flags transferred to csg_bitmap, check its empty here */
- WARN_ON(!bitmap_empty(scheduler->csg_slots_idle_mask, num_groups));
/* The groups are aggregated into a single kernel doorbell request */
if (!bitmap_empty(csg_bitmap, num_groups)) {
@@ -4243,9 +4529,19 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
/* Store the bitmap of timed out slots */
bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
+
+ /* Mask off any failed bit position contributed from active ones, as the
+ * intention is to retain the failed bit pattern contains only those from
+ * idle flags reporting back to the caller. This way, any failed to update
+ * original idle flag would be kept as 'idle' (an informed guess, as the
+ * update did not come to a conclusive result). So will be the failed
+ * active ones be treated as still 'non-idle'. This is for a graceful
+ * handling to the unexpected timeout condition.
+ */
+ failed_csg_bitmap[0] &= ~active_chk;
+
} else {
- KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL,
- db_slots);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots);
csg_bitmap[0] = db_slots;
}
} else {
@@ -4326,8 +4622,7 @@ static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
link_to_schedule) {
-
- WARN_ON(!on_slot_group_idle_locked(group));
+ WARN_ON(!can_schedule_idle_group(group));
if (!scheduler->ngrp_to_schedule) {
/* keep the top csg's origin */
@@ -4462,7 +4757,7 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
u64 const *output_addr;
u64 cur_extract_ofs;
- if (!queue)
+ if (!queue || !queue->user_io_addr)
continue;
output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
@@ -4569,7 +4864,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
atomic_read(
&kbdev->csf.scheduler.non_idle_offslot_grps));
/* Bring forward the next tick */
- kbase_csf_scheduler_advance_tick(kbdev);
+ kbase_csf_scheduler_tick_advance(kbdev);
return false;
}
@@ -4592,14 +4887,14 @@ static void gpu_idle_worker(struct work_struct *work)
bool scheduler_is_idle_suspendable = false;
bool all_groups_suspended = false;
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
(((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
if (kbase_reset_gpu_try_prevent(kbdev)) {
dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
__ENCODE_KTRACE_INFO(true, false, false));
return;
}
@@ -4614,11 +4909,11 @@ static void gpu_idle_worker(struct work_struct *work)
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
if (scheduler_is_idle_suspendable) {
- KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev) &&
- scheduler->total_runnable_grps)
+ kbase_csf_scheduler_get_nr_active_csgs(kbdev))
scheduler_sleep_on_idle(kbdev);
else
#endif
@@ -4630,9 +4925,8 @@ unlock:
#endif
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
- __ENCODE_KTRACE_INFO(false,
- scheduler_is_idle_suspendable,
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
+ __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
all_groups_suspended));
#undef __ENCODE_KTRACE_INFO
}
@@ -4925,7 +5219,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
*/
atomic_set(&scheduler->non_idle_offslot_grps,
scheduler->non_idle_scanout_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL,
scheduler->non_idle_scanout_grps);
/* Adds those idle but runnable groups to the scanout list */
@@ -5123,8 +5417,12 @@ redo_local_tock:
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
protm_grp->handle);
new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- protm_grp, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp,
+ new_val);
+
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ scheduler_check_pmode_progress(kbdev);
} else if (scheduler->top_grp) {
if (protm_grp)
dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
@@ -5178,11 +5476,9 @@ redo_local_tock:
goto redo_local_tock;
}
}
-
- return;
+ } else {
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
-
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
/**
@@ -5236,14 +5532,11 @@ static bool can_skip_scheduling(struct kbase_device *kbdev)
static void schedule_on_tock(struct kthread_work *work)
{
- struct kbase_device *kbdev = container_of(work, struct kbase_device,
- csf.scheduler.tock_work.work);
+ struct kbase_device *kbdev =
+ container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int err;
- /* Tock work item is serviced */
- scheduler->tock_pending_request = false;
-
err = kbase_reset_gpu_try_prevent(kbdev);
/* Regardless of whether reset failed or is currently happening, exit
* early
@@ -5259,8 +5552,9 @@ static void schedule_on_tock(struct kthread_work *work)
scheduler->state = SCHED_BUSY;
/* Undertaking schedule action steps */
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
- schedule_actions(kbdev, false);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
+ while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true)
+ schedule_actions(kbdev, false);
/* Record time information on a non-skipped tock */
scheduler->last_schedule = jiffies;
@@ -5284,8 +5578,8 @@ exit_no_schedule_unlock:
static void schedule_on_tick(struct kthread_work *work)
{
- struct kbase_device *kbdev = container_of(work, struct kbase_device,
- csf.scheduler.tick_work);
+ struct kbase_device *kbdev =
+ container_of(work, struct kbase_device, csf.scheduler.tick_work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int err = kbase_reset_gpu_try_prevent(kbdev);
@@ -5304,8 +5598,7 @@ static void schedule_on_tick(struct kthread_work *work)
scheduler->state = SCHED_BUSY;
/* Undertaking schedule action steps */
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
- scheduler->total_runnable_grps);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
schedule_actions(kbdev, true);
/* Record time information */
@@ -5566,8 +5859,7 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
* anyways.
*/
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
}
unlock:
@@ -5575,10 +5867,15 @@ unlock:
return suspend_on_slot_groups;
}
+static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
+{
+ kthread_cancel_work_sync(&scheduler->tick_work);
+}
+
static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
{
+ atomic_set(&scheduler->pending_tock_work, false);
kthread_cancel_delayed_work_sync(&scheduler->tock_work);
- scheduler->tock_pending_request = false;
}
static void scheduler_inner_reset(struct kbase_device *kbdev)
@@ -5592,7 +5889,7 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
/* Cancel any potential queued delayed work(s) */
cancel_delayed_work_sync(&scheduler->gpu_idle_work);
cancel_tick_timer(kbdev);
- kthread_cancel_work_sync(&scheduler->tick_work);
+ cancel_tick_work(scheduler);
cancel_tock_work(scheduler);
cancel_delayed_work_sync(&scheduler->ping_work);
@@ -5601,8 +5898,8 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
if (scheduler->active_protm_grp)
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
- scheduler->active_protm_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp,
+ 0u);
scheduler->active_protm_grp = NULL;
memset(kbdev->csf.scheduler.csg_slots, 0,
num_groups * sizeof(struct kbase_csf_csg_slot));
@@ -5625,7 +5922,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
if (scheduler_handle_reset_in_protected_mode(kbdev) &&
!suspend_active_queue_groups_on_reset(kbdev)) {
@@ -5727,9 +6024,9 @@ static void firmware_aliveness_monitor(struct work_struct *work)
kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
} else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
- queue_delayed_work(system_long_wq,
- &kbdev->csf.scheduler.ping_work,
- msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
+ queue_delayed_work(
+ system_long_wq, &kbdev->csf.scheduler.ping_work,
+ msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
kbase_pm_context_idle(kbdev);
@@ -6036,7 +6333,7 @@ static bool check_sync_update_for_on_slot_group(
stream, CS_STATUS_WAIT);
unsigned long flags;
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT,
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS,
queue->group, queue, status);
if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
@@ -6080,6 +6377,10 @@ static bool check_sync_update_for_on_slot_group(
scheduler->csg_slots_idle_mask[0]);
spin_unlock_irqrestore(
&scheduler->interrupt_lock, flags);
+ /* Request the scheduler to confirm the condition inferred
+ * here inside the protected mode.
+ */
+ group->reevaluate_idle_status = true;
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
}
@@ -6176,11 +6477,6 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
continue;
if (check_sync_update_for_on_slot_group(group)) {
- /* As sync update has been performed for an on-slot
- * group, when MCU is in sleep state, ring the doorbell
- * so that FW can re-evaluate the SYNC_WAIT on wakeup.
- */
- kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
scheduler_wakeup(kbdev, true);
return;
}
@@ -6234,7 +6530,7 @@ static void check_group_sync_update_worker(struct kthread_work *work)
mutex_lock(&scheduler->lock);
- KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
if (kctx->csf.sched.num_idle_wait_grps != 0) {
struct kbase_queue_group *group, *temp;
@@ -6281,7 +6577,7 @@ static void check_group_sync_update_worker(struct kthread_work *work)
check_sync_update_after_sc_power_down(kbdev);
#endif
- KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
mutex_unlock(&scheduler->lock);
}
@@ -6291,7 +6587,8 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
{
struct kbase_context *const kctx = param;
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
+
kthread_queue_work(&kctx->csf.sched.sync_update_worker,
&kctx->csf.sched.sync_update_work);
@@ -6313,15 +6610,12 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
kctx->csf.sched.num_idle_wait_grps = 0;
kctx->csf.sched.ngrp_to_schedule = 0;
- kthread_init_worker(&kctx->csf.sched.sync_update_worker);
- kctx->csf.sched.sync_update_worker_thread = kbase_create_realtime_thread(
- kctx->kbdev,
- kthread_worker_fn,
- &kctx->csf.sched.sync_update_worker,
- "mali_kbase_csf_sync_update");
- if (IS_ERR(kctx->csf.sched.sync_update_worker_thread)) {
+ err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
+ &kctx->csf.sched.sync_update_worker,
+ "mali_kbase_csf_sync_update");
+ if (err) {
dev_err(kctx->kbdev->dev,
- "Failed to initialize scheduler context workqueue");
+ "Failed to initialize scheduler context kworker");
return -ENOMEM;
}
@@ -6333,10 +6627,13 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
if (err) {
dev_err(kctx->kbdev->dev,
"Failed to register a sync update callback");
- kthread_flush_worker(&kctx->csf.sched.sync_update_worker);
- kthread_stop(kctx->csf.sched.sync_update_worker_thread);
+ kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
}
+ /* Per-kctx heap_info object initialization */
+ memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_kctx_heap_info));
+ INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
+
return err;
}
@@ -6344,8 +6641,7 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
{
kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
kthread_cancel_work_sync(&kctx->csf.sched.sync_update_work);
- kthread_flush_worker(&kctx->csf.sched.sync_update_worker);
- kthread_stop(kctx->csf.sched.sync_update_worker_thread);
+ kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
}
int kbase_csf_scheduler_init(struct kbase_device *kbdev)
@@ -6367,21 +6663,33 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
return 0;
}
+static void scheduler_init_heap_reclaim_mgr(struct kbase_csf_scheduler *const scheduler)
+{
+ INIT_LIST_HEAD(&scheduler->reclaim_mgr.candidate_ctxs);
+ INIT_LIST_HEAD(&scheduler->reclaim_mgr.scan_list_ctxs);
+ atomic_set(&scheduler->reclaim_mgr.est_cand_pages, 0);
+ atomic_set(&scheduler->reclaim_mgr.mgr_scan_pages, 0);
+}
+
+static void scheduler_term_heap_reclaim_mgr(struct kbase_csf_scheduler *const scheduler)
+{
+ WARN_ON(!list_empty(&scheduler->reclaim_mgr.candidate_ctxs));
+ WARN_ON(!list_empty(&scheduler->reclaim_mgr.scan_list_ctxs));
+ WARN_ON(atomic_read(&scheduler->reclaim_mgr.est_cand_pages));
+ WARN_ON(atomic_read(&scheduler->reclaim_mgr.mgr_scan_pages));
+}
+
int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
{
+ int err;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
scheduler->timer_enabled = true;
- kthread_init_worker(&scheduler->csf_worker);
- scheduler->csf_worker_thread = kbase_create_realtime_thread(
- kbdev,
- kthread_worker_fn,
- &scheduler->csf_worker,
- "csf_scheduler");
-
- if (!scheduler->csf_worker_thread) {
- dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
+ err = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &scheduler->csf_worker,
+ "csf_scheduler");
+ if (err) {
+ dev_err(kbdev->dev, "Failed to allocate scheduler kworker\n");
return -ENOMEM;
}
scheduler->idle_wq = alloc_ordered_workqueue(
@@ -6389,13 +6697,13 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
if (!scheduler->idle_wq) {
dev_err(kbdev->dev,
"Failed to allocate GPU idle scheduler workqueue\n");
- kthread_flush_worker(&kbdev->csf.scheduler.csf_worker);
- kthread_stop(kbdev->csf.scheduler.csf_worker_thread);;
+ kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
return -ENOMEM;
}
kthread_init_work(&scheduler->tick_work, schedule_on_tick);
kthread_init_delayed_work(&scheduler->tock_work, schedule_on_tock);
+ atomic_set(&scheduler->pending_tock_work, false);
INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
@@ -6417,7 +6725,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
scheduler->top_ctx = NULL;
scheduler->top_grp = NULL;
scheduler->last_schedule = 0;
- scheduler->tock_pending_request = false;
scheduler->active_protm_grp = NULL;
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
scheduler_doorbell_init(kbdev);
@@ -6436,6 +6743,9 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
scheduler->tick_timer.function = tick_timer_callback;
scheduler->tick_timer_active = false;
+ scheduler_init_heap_reclaim_mgr(scheduler);
+ kbase_csf_tiler_heap_register_shrinker(kbdev);
+
return 0;
}
@@ -6466,7 +6776,7 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
mutex_unlock(&kbdev->csf.scheduler.lock);
cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
cancel_tick_timer(kbdev);
- kthread_cancel_work_sync(&kbdev->csf.scheduler.tick_work);
+ cancel_tick_work(&kbdev->csf.scheduler);
cancel_tock_work(&kbdev->csf.scheduler);
mutex_destroy(&kbdev->csf.scheduler.lock);
kfree(kbdev->csf.scheduler.csg_slots);
@@ -6478,10 +6788,11 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
{
if (kbdev->csf.scheduler.idle_wq)
destroy_workqueue(kbdev->csf.scheduler.idle_wq);
- if (kbdev->csf.scheduler.csf_worker_thread) {
- kthread_flush_worker(&kbdev->csf.scheduler.csf_worker);
- kthread_stop(kbdev->csf.scheduler.csf_worker_thread);
- }
+ if (kbdev->csf.scheduler.csf_worker.task)
+ kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
+
+ kbase_csf_tiler_heap_unregister_shrinker(kbdev);
+ scheduler_term_heap_reclaim_mgr(&kbdev->csf.scheduler);
}
/**
@@ -6546,13 +6857,12 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
if (currently_enabled && !enable) {
scheduler->timer_enabled = false;
cancel_tick_timer(kbdev);
- kthread_cancel_delayed_work_sync(&scheduler->tock_work);
- scheduler->tock_pending_request = false;
mutex_unlock(&scheduler->lock);
/* The non-sync version to cancel the normal work item is not
* available, so need to drop the lock before cancellation.
*/
- kthread_cancel_work_sync(&scheduler->tick_work);
+ cancel_tick_work(scheduler);
+ cancel_tock_work(scheduler);
return;
}
@@ -6624,7 +6934,7 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
/* Cancel any potential queued delayed work(s) */
- kthread_cancel_work_sync(&scheduler->tick_work);
+ cancel_tick_work(scheduler);
cancel_tock_work(scheduler);
result = kbase_reset_gpu_prevent_and_wait(kbdev);
@@ -6804,3 +7114,204 @@ void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
scheduler_wakeup(kbdev, true);
mutex_unlock(&scheduler->lock);
}
+
+static bool defer_count_unused_heap_pages(struct kbase_context *kctx)
+{
+ struct kbase_kctx_heap_info *info = &kctx->csf.sched.heap_info;
+ u32 prio, shift;
+ unsigned long ms;
+
+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW;
+ prio++) {
+ if (!list_empty(&kctx->csf.sched.runnable_groups[prio]))
+ break;
+ }
+
+ shift = (prio == KBASE_QUEUE_GROUP_PRIORITY_REALTIME) ? 0 : prio + 1;
+ /* Delay time from priority */
+ ms = HEAP_RECLAIM_PRIO_DEFERRAL_MS >> shift;
+
+ WARN_ON(!(info->flags & CSF_CTX_RECLAIM_CANDI_FLAG));
+
+ if (kctx->csf.sched.num_idle_wait_grps)
+ ms += HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS;
+
+ return time_before(jiffies, info->attach_jiffies + msecs_to_jiffies(ms));
+}
+
+static unsigned long
+reclaim_count_candidates_heap_pages(struct kbase_device *kbdev, unsigned long freed_pages,
+ struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
+ struct kbase_kctx_heap_info *info, *tmp;
+ unsigned long count = 0;
+ u32 cnt_ctxs = 0;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ list_for_each_entry_safe(info, tmp, &mgr->candidate_ctxs, mgr_link) {
+ struct kbase_context *kctx =
+ container_of(info, struct kbase_context, csf.sched.heap_info);
+
+ /* If the kctx not yet exhausted its deferral time, keep it as a candidate */
+ if (defer_count_unused_heap_pages(kctx))
+ continue;
+
+ /* Count the freeable pages of the kctx */
+ info->nr_scan_pages = shrink_ctrl->count_cb(kctx);
+
+ dev_dbg(kctx->kbdev->dev, "kctx_%d_%d heap pages count : %u\n", kctx->tgid,
+ kctx->id, info->nr_scan_pages);
+ cnt_ctxs++;
+
+ /* The kctx is either moved to the pages freeable kctx list, or removed
+ * from the manager if no pages are available for reclaim.
+ */
+ if (info->nr_scan_pages) {
+ /* Move the kctx to the scan_list inside the manager */
+ list_move_tail(&info->mgr_link, &mgr->scan_list_ctxs);
+ WARN_ON(atomic_sub_return(info->nr_est_pages, &mgr->est_cand_pages) < 0);
+ atomic_add(info->nr_scan_pages, &mgr->mgr_scan_pages);
+ info->flags = CSF_CTX_RECLAIM_SCAN_FLAG;
+ count += info->nr_scan_pages;
+ } else
+ detach_from_sched_reclaim_mgr(kctx);
+
+ /* Combine with the shrinker scan method freed pages to determine the count
+ * has done enough to avoid holding the scheduler lock too long.
+ */
+ if ((freed_pages + count) > HEAP_RECLAIM_COUNT_BATCH_SIZE)
+ break;
+ }
+
+ dev_dbg(kbdev->dev,
+ "Reclaim CSF count unused heap pages: %lu (processed kctxs: %u, from_scan: %lu)\n",
+ count, cnt_ctxs, freed_pages);
+
+ return count;
+}
+
+static unsigned long
+reclaim_free_counted_heap_pages(struct kbase_device *kbdev,
+ struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
+ unsigned long freed = 0;
+ u32 cnt_ctxs = 0;
+ struct kbase_kctx_heap_info *info, *tmp;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ if (WARN_ON(!shrink_ctrl->scan_cb))
+ return 0;
+
+ list_for_each_entry_safe(info, tmp, &mgr->scan_list_ctxs, mgr_link) {
+ struct kbase_context *kctx =
+ container_of(info, struct kbase_context, csf.sched.heap_info);
+ /* Attempt freeing all the counted heap pages from the kctx */
+ u32 n = shrink_ctrl->scan_cb(kctx, info->nr_scan_pages);
+
+ /* The free is attempted on all the counted heap pages. If the kctx has
+ * all its counted heap pages freed, or, it can't offer anymore, drop
+ * it from the reclaim manger, otherwise leave it remaining in. If the
+ * kctx changes its state (i.e. some CSGs becoming on-slot), the
+ * scheduler will pull it out.
+ */
+ if (n >= info->nr_scan_pages || n == 0)
+ detach_from_sched_reclaim_mgr(kctx);
+ else
+ info->nr_scan_pages -= n;
+
+ freed += n;
+ cnt_ctxs++;
+
+ /* Enough has been freed, break for a gap to avoid holding the lock too long */
+ if (freed >= HEAP_RECLAIM_SCAN_BATCH_SIZE)
+ break;
+ }
+
+ dev_dbg(kbdev->dev, "Reclaim CSF heap free heap pages: %lu (processed kctxs: %u)\n", freed,
+ cnt_ctxs);
+
+ return freed;
+}
+
+unsigned long
+kbase_csf_scheduler_count_free_heap_pages(struct kbase_device *kbdev,
+ struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
+{
+ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
+
+ unsigned long scan_count = atomic_read(&mgr->mgr_scan_pages);
+ unsigned long est_count = atomic_read(&mgr->est_cand_pages);
+ unsigned long total;
+ bool counted = false;
+
+ if (mutex_trylock(&kbdev->csf.scheduler.lock)) {
+ reclaim_count_candidates_heap_pages(kbdev, 0, shrink_ctrl);
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+ counted = true;
+ scan_count = atomic_read(&mgr->mgr_scan_pages);
+ /* We've processed the candidates, so overwrites the estimated to 0 */
+ est_count = 0;
+ }
+
+ total = scan_count + est_count;
+ dev_dbg(kbdev->dev, "Reclaim count unused pages: %lu (scan: %lu, extra_est: %lu, %d/)\n",
+ total, scan_count, est_count, counted);
+
+ return total;
+}
+
+unsigned long
+kbase_csf_scheduler_scan_free_heap_pages(struct kbase_device *kbdev,
+ struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
+{
+ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
+ struct shrink_control *sc = shrink_ctrl->sc;
+ unsigned long freed = 0;
+ unsigned long count = 0;
+ unsigned long avail = 0;
+
+ /* If Scheduler is busy in action, return 0 */
+ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ /* Wait for roughly 2-ms */
+ wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY),
+ msecs_to_jiffies(2));
+ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
+ dev_dbg(kbdev->dev,
+ "Reclaim scan see device busy (freed: 0, number to scan: %lu)\n",
+ sc->nr_to_scan);
+ return 0;
+ }
+ }
+
+ avail = atomic_read(&mgr->mgr_scan_pages);
+ if (avail) {
+ freed = reclaim_free_counted_heap_pages(kbdev, shrink_ctrl);
+ if (freed < sc->nr_to_scan && atomic_read(&mgr->est_cand_pages))
+ count = reclaim_count_candidates_heap_pages(kbdev, freed, shrink_ctrl);
+ } else {
+ count = reclaim_count_candidates_heap_pages(kbdev, freed, shrink_ctrl);
+ }
+
+ /* If having done count in this call, try reclaim free again */
+ if (count)
+ freed += reclaim_free_counted_heap_pages(kbdev, shrink_ctrl);
+
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+
+ dev_info(kbdev->dev,
+ "Reclaim scan freed pages: %lu (avail: %lu, extra: %lu, number to scan: %lu)\n",
+ freed, avail, count, sc->nr_to_scan);
+
+ /* On no avilablity, and with no new extra count, return STOP */
+ if (!avail && !count)
+ return SHRINK_STOP;
+ else
+ return freed;
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 7c39415..358d18a 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
#include "mali_kbase_csf.h"
#include "mali_kbase_csf_event.h"
+#include "mali_kbase_csf_tiler_heap_def.h"
/**
* kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue
@@ -472,7 +473,7 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
}
/**
- * kbase_csf_scheduler_advance_tick_nolock() - Advance the scheduling tick
+ * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick
*
* @kbdev: Pointer to the device
*
@@ -482,23 +483,23 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
* The caller must hold the interrupt lock.
*/
static inline void
-kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev)
+kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->interrupt_lock);
if (scheduler->tick_timer_active) {
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_ADVANCE_TICK, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u);
scheduler->tick_timer_active = false;
kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work);
} else {
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_NOADVANCE_TICK, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u);
}
}
/**
- * kbase_csf_scheduler_advance_tick() - Advance the scheduling tick
+ * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick
*
* @kbdev: Pointer to the device
*
@@ -506,13 +507,13 @@ kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev)
* immediate execution, but only if the tick hrtimer is active. If the timer
* is inactive then the tick work item is already in flight.
*/
-static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev)
+static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- kbase_csf_scheduler_advance_tick_nolock(kbdev);
+ kbase_csf_scheduler_tick_advance_nolock(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
@@ -537,6 +538,22 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
}
/**
+ * kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function will queue the scheduling tock work item for immediate
+ * execution.
+ */
+static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
+ kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+}
+
+/**
* kbase_csf_scheduler_queue_has_trace() - report whether the queue has been
* configured to operate with the
* cs_trace feature.
@@ -674,4 +691,35 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
void turn_on_sc_power_rails(struct kbase_device *kbdev);
#endif
+/* Forward declaration */
+struct kbase_csf_tiler_heap_shrink_control;
+
+/**
+ * kbase_csf_scheduler_count_free_heap_pages() - Undertake shrinker reclaim count action
+ *
+ * @kbdev: Pointer to the device
+ * @shrink_ctrl: Pointer to the kbase CSF schrink control object.
+ *
+ * This function is called from CSF tiler heap memory shrinker reclaim 'count_objects' operation.
+ *
+ * Return: number of potentially freeable tiler heap pages.
+ */
+unsigned long
+kbase_csf_scheduler_count_free_heap_pages(struct kbase_device *kbdev,
+ struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl);
+
+/**
+ * kbase_csf_scheduler_scan_free_heap_pages() - Undertake shrinker reclaim scan action
+ *
+ * @kbdev: Pointer to the device
+ * @shrink_ctrl: Pointer to the kbase CSF schrink control object.
+ *
+ * This function is called from CSF tiler heap memory shrinker reclaim 'scan_objects' operation.
+ *
+ * Return: number of actually freed tiler heap pagess.
+ */
+unsigned long
+kbase_csf_scheduler_scan_free_heap_pages(struct kbase_device *kbdev,
+ struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl);
+
#endif /* _KBASE_CSF_SCHEDULER_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 85babf9..b0d3825 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -179,9 +179,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
int err = 0;
struct kbase_context *const kctx = heap->kctx;
u64 nr_pages = PFN_UP(heap->chunk_size);
- u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
- BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE |
- BASE_MEM_COHERENT_LOCAL;
+ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
+ BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
/* Calls to this function are inherently synchronous, with respect to
@@ -191,10 +190,6 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
flags |= kbase_mem_group_id_set(kctx->jit_group_id);
-#if defined(CONFIG_MALI_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP)
- flags |= BASE_MEM_PROT_CPU_RD;
-#endif
-
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
if (unlikely(!chunk)) {
dev_err(kctx->kbdev->dev,
@@ -234,26 +229,39 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
return err;
}
+static void mark_free_mem_bypassing_pool(struct kbase_va_region *reg)
+{
+ if (WARN_ON(reg->gpu_alloc == NULL))
+ return;
+
+ reg->gpu_alloc->evicted = reg->gpu_alloc->nents;
+ kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+}
+
/**
* delete_chunk - Delete a tiler heap chunk
*
* @heap: Pointer to the tiler heap for which @chunk was allocated.
* @chunk: Pointer to a chunk to be deleted.
+ * @reclaim: Indicating the deletion is from shrinking reclaim or not.
*
* This function frees a tiler heap chunk previously allocated by @create_chunk
* and removes it from the list of chunks associated with the heap.
*
* WARNING: The deleted chunk is not unlinked from the list of chunks used by
* the GPU, therefore it is only safe to use this function when
- * deleting a heap.
+ * deleting a heap, or under reclaim operations when the relevant CSGS
+ * are off-slots for the given kctx.
*/
static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
- struct kbase_csf_tiler_heap_chunk *const chunk)
+ struct kbase_csf_tiler_heap_chunk *const chunk, bool reclaim)
{
struct kbase_context *const kctx = heap->kctx;
kbase_gpu_vm_lock(kctx);
chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
+ if (reclaim)
+ mark_free_mem_bypassing_pool(chunk->region);
kbase_mem_free_region(kctx, chunk->region);
kbase_gpu_vm_unlock(kctx);
list_del(&chunk->link);
@@ -277,7 +285,7 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
entry, struct kbase_csf_tiler_heap_chunk, link);
- delete_chunk(heap, chunk);
+ delete_chunk(heap, chunk, false);
}
}
@@ -334,12 +342,19 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
heap->gpu_va);
list_del(&heap->link);
+ atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
WARN_ON(heap->chunk_count);
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
heap->target_in_flight, 0);
+ if (heap->buf_desc_va) {
+ kbase_gpu_vm_lock(kctx);
+ heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ kbase_gpu_vm_unlock(kctx);
+ }
+
kfree(heap);
}
@@ -385,6 +400,7 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx)
INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list);
mutex_init(&kctx->csf.tiler_heaps.lock);
+ atomic_set(&kctx->csf.tiler_heaps.est_count_pages, 0);
dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n");
@@ -405,25 +421,27 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
delete_heap(heap);
}
+ WARN_ON(atomic_read(&kctx->csf.tiler_heaps.est_count_pages) != 0);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
mutex_destroy(&kctx->csf.tiler_heaps.lock);
kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
}
-int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
- u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks,
- u16 const target_in_flight, u64 *const heap_gpu_va,
- u64 *const first_chunk_va)
+int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
+ u32 const initial_chunks, u32 const max_chunks,
+ u16 const target_in_flight, u64 const buf_desc_va,
+ u64 *const heap_gpu_va, u64 *const first_chunk_va)
{
int err = 0;
struct kbase_csf_tiler_heap *heap = NULL;
struct kbase_csf_heap_context_allocator *const ctx_alloc =
&kctx->csf.tiler_heaps.ctx_alloc;
+ struct kbase_va_region *reg = NULL;
dev_dbg(kctx->kbdev->dev,
- "Creating a tiler heap with %u chunks (limit: %u) of size %u\n",
- initial_chunks, max_chunks, chunk_size);
+ "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx",
+ initial_chunks, max_chunks, chunk_size, buf_desc_va);
if (!kbase_mem_allow_alloc(kctx))
return -EINVAL;
@@ -443,17 +461,35 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
if (target_in_flight == 0)
return -EINVAL;
+ /* Check on the buffer descriptor virtual Address */
+ if (buf_desc_va) {
+ kbase_gpu_vm_lock(kctx);
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
+ if (kbase_is_region_invalid_or_free(reg) || !(reg->flags & KBASE_REG_CPU_RD) ||
+ (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)) {
+ kbase_gpu_vm_unlock(kctx);
+ return -EINVAL;
+ }
+
+ reg->flags |= KBASE_REG_NO_USER_FREE;
+ kbase_gpu_vm_unlock(kctx);
+ }
+
heap = kzalloc(sizeof(*heap), GFP_KERNEL);
if (unlikely(!heap)) {
- dev_err(kctx->kbdev->dev,
- "No kernel memory for a new tiler heap\n");
- return -ENOMEM;
+ dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
+ err = -ENOMEM;
+ goto err_out;
}
heap->kctx = kctx;
heap->chunk_size = chunk_size;
heap->max_chunks = max_chunks;
heap->target_in_flight = target_in_flight;
+ heap->buf_desc_va = buf_desc_va;
+ heap->buf_desc_reg = reg;
+ heap->desc_chk_flags = 0;
+ heap->desc_chk_cnt = 0;
INIT_LIST_HEAD(&heap->chunks_list);
heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
@@ -468,9 +504,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
}
- if (unlikely(err)) {
- kfree(heap);
- } else {
+ if (likely(!err)) {
struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry(
&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
@@ -494,16 +528,27 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va);
}
#endif
+ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
+ kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
- dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va);
+ /* Assuming at least one chunk reclaimable per heap on (estimated) count */
+ atomic_add(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
+ dev_dbg(kctx->kbdev->dev,
+ "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d",
+ heap->gpu_va, buf_desc_va, kctx->tgid, kctx->id);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
- kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
- kctx->running_total_tiler_heap_memory +=
- heap->chunk_size * heap->chunk_count;
- if (kctx->running_total_tiler_heap_memory >
- kctx->peak_total_tiler_heap_memory)
- kctx->peak_total_tiler_heap_memory =
- kctx->running_total_tiler_heap_memory;
+
+ return 0;
+ }
+
+err_out:
+ kfree(heap);
+ if (buf_desc_va) {
+ kbase_gpu_vm_lock(kctx);
+ reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ kbase_gpu_vm_unlock(kctx);
}
return err;
}
@@ -526,7 +571,6 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
} else
err = -EINVAL;
- mutex_unlock(&kctx->csf.tiler_heaps.lock);
if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
kctx->running_total_tiler_heap_memory -= heap_size;
else
@@ -537,6 +581,11 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
else
dev_warn(kctx->kbdev->dev,
"Running total tiler chunk count lower than expected!");
+ if (!err)
+ dev_dbg(kctx->kbdev->dev,
+ "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d",
+ heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
return err;
}
@@ -637,3 +686,352 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
return err;
}
+
+static bool delete_chunk_from_gpu_va(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
+ u64 *hdr_val)
+{
+ struct kbase_context *kctx = heap->kctx;
+ struct kbase_csf_tiler_heap_chunk *chunk;
+
+ list_for_each_entry(chunk, &heap->chunks_list, link) {
+ struct kbase_vmap_struct map;
+ u64 *chunk_hdr;
+
+ if (chunk->gpu_va != chunk_gpu_va)
+ continue;
+ /* Found it, extract next chunk header before delete it */
+ chunk_hdr = kbase_vmap_prot(kctx, chunk_gpu_va, sizeof(*chunk_hdr),
+ KBASE_REG_CPU_RD, &map);
+
+ if (unlikely(!chunk_hdr)) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Failed to map tiler heap(0x%llX) chunk(0x%llX) for reclaim extract next header",
+ heap->gpu_va, chunk_gpu_va);
+ return false;
+ }
+
+ *hdr_val = *chunk_hdr;
+ kbase_vunmap(kctx, &map);
+
+ dev_dbg(kctx->kbdev->dev,
+ "Scan reclaim delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)",
+ chunk_gpu_va, heap->gpu_va, *hdr_val);
+ delete_chunk(heap, chunk, true);
+
+ return true;
+ }
+
+ dev_warn(kctx->kbdev->dev,
+ "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete", heap->gpu_va,
+ chunk_gpu_va);
+ return false;
+}
+
+static bool heap_buffer_decsriptor_checked(struct kbase_csf_tiler_heap *const heap)
+{
+ return heap->desc_chk_flags & HEAP_BUF_DESCRIPTOR_CHECKED;
+}
+
+static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
+ struct kbase_csf_gpu_buffer_heap *desc)
+{
+ u64 ptr_addr = desc->pointer & CHUNK_ADDR_MASK;
+
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+ if (ptr_addr) {
+ struct kbase_csf_tiler_heap_chunk *chunk;
+
+ /* desc->pointer must be a chunk in the given heap */
+ list_for_each_entry(chunk, &heap->chunks_list, link) {
+ if (chunk->gpu_va == ptr_addr) {
+ dev_dbg(heap->kctx->kbdev->dev,
+ "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed",
+ heap->buf_desc_va);
+
+ heap->desc_chk_flags = HEAP_BUF_DESCRIPTOR_CHECKED;
+ return;
+ }
+ }
+ }
+ /* If there is no match, defer the check to next time */
+ dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred",
+ heap->buf_desc_va);
+}
+
+static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *ptr_u64)
+{
+ struct kbase_context *kctx = heap->kctx;
+ bool checked = false;
+
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
+ /* Initialize the descriptor pointer value to 0 */
+ *ptr_u64 = 0;
+
+ if (heap_buffer_decsriptor_checked(heap))
+ return true;
+
+ /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
+ if (heap->buf_desc_va) {
+ struct kbase_vmap_struct map;
+ struct kbase_csf_gpu_buffer_heap *desc = kbase_vmap_prot(
+ kctx, heap->buf_desc_va, sizeof(*desc), KBASE_REG_CPU_RD, &map);
+
+ if (unlikely(!desc)) {
+ dev_warn_once(kctx->kbdev->dev,
+ "Sanity check: buffer descriptor 0x%llX map failed",
+ heap->buf_desc_va);
+ goto out;
+ }
+
+ sanity_check_gpu_buffer_heap(heap, desc);
+ checked = heap_buffer_decsriptor_checked(heap);
+ if (checked)
+ *ptr_u64 = desc->pointer & CHUNK_ADDR_MASK;
+
+ kbase_vunmap(kctx, &map);
+ }
+
+out:
+ return checked;
+}
+
+static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
+{
+ u32 freed = 0;
+ u64 gpu_va = 0;
+ struct kbase_context *kctx = heap->kctx;
+
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
+ if (can_read_hw_gpu_buffer_heap(heap, &gpu_va)) {
+ u64 chunk_hdr_val;
+ u64 *hw_hdr;
+ struct kbase_vmap_struct map;
+
+ if (!gpu_va) {
+ struct kbase_csf_gpu_buffer_heap *desc = kbase_vmap_prot(
+ kctx, heap->buf_desc_va, sizeof(*desc), KBASE_REG_CPU_RD, &map);
+
+ if (unlikely(!desc)) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Failed to map Buffer descriptor 0x%llX for HW reclaim scan",
+ heap->buf_desc_va);
+ goto out;
+ }
+
+ gpu_va = desc->pointer & CHUNK_ADDR_MASK;
+ kbase_vunmap(kctx, &map);
+
+ if (!gpu_va) {
+ dev_dbg(kctx->kbdev->dev,
+ "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan",
+ heap->buf_desc_va);
+ goto out;
+ }
+ }
+
+ /* Map the HW chunk header here with RD/WR for likely update */
+ hw_hdr = kbase_vmap_prot(kctx, gpu_va, sizeof(*hw_hdr),
+ KBASE_REG_CPU_RD | KBASE_REG_CPU_WR, &map);
+ if (unlikely(!hw_hdr)) {
+ dev_warn(kctx->kbdev->dev,
+ "Failed to map HW chnker header 0x%llX for HW reclaim scan",
+ gpu_va);
+ goto out;
+ }
+
+ /* Move onto the next chunk relevant information */
+ chunk_hdr_val = *hw_hdr;
+ gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+
+ while (gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
+ bool success = delete_chunk_from_gpu_va(heap, gpu_va, &chunk_hdr_val);
+
+ if (!success)
+ break;
+
+ freed++;
+ /* On success, chunk_hdr_val is updated, extract the next chunk address */
+ gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+ }
+
+ /* Update the existing hardware chunk header, after reclaim deletion of chunks */
+ *hw_hdr = chunk_hdr_val;
+ kbase_vunmap(kctx, &map);
+ dev_dbg(heap->kctx->kbdev->dev,
+ "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX", freed,
+ chunk_hdr_val);
+ } else
+ dev_dbg(kctx->kbdev->dev,
+ "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)",
+ heap->buf_desc_va);
+
+out:
+ return freed;
+}
+
+static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
+{
+ u32 freed_chunks = 0;
+ u64 freed_pages = 0;
+ u64 gpu_va;
+ u64 chunk_hdr_val;
+ struct kbase_context *kctx = heap->kctx;
+ unsigned long prot = KBASE_REG_CPU_RD | KBASE_REG_CPU_WR;
+ struct kbase_vmap_struct map;
+ u64 *ctx_ptr;
+
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
+ ctx_ptr = kbase_vmap_prot(kctx, heap->gpu_va, sizeof(*ctx_ptr), prot, &map);
+ if (unlikely(!ctx_ptr)) {
+ dev_dbg(kctx->kbdev->dev,
+ "Failed to map tiler heap context 0x%llX for reclaim_scan", heap->gpu_va);
+ goto out;
+ }
+
+ /* Extract the first chunk address from the context's free_list_head */
+ chunk_hdr_val = *ctx_ptr;
+ gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+
+ while (gpu_va) {
+ u64 hdr_val;
+ bool success = delete_chunk_from_gpu_va(heap, gpu_va, &hdr_val);
+
+ if (!success)
+ break;
+
+ freed_chunks++;
+ chunk_hdr_val = hdr_val;
+ /* extract the next chunk address */
+ gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+ }
+
+ /* Update the post-scan deletion to context header */
+ *ctx_ptr = chunk_hdr_val;
+ kbase_vunmap(kctx, &map);
+
+ /* Try to scan the HW hoarded list of unused chunks */
+ freed_chunks += delete_hoarded_chunks(heap);
+ freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
+ dev_dbg(heap->kctx->kbdev->dev,
+ "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX",
+ freed_chunks, freed_pages, chunk_hdr_val);
+
+ /* Update context tiler heaps memory usage */
+ kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
+ kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
+out:
+ return freed_pages;
+}
+
+static u32 scan_kctx_unused_heap_pages_cb(struct kbase_context *kctx, u32 to_free)
+{
+ u64 freed = 0;
+ struct kbase_csf_tiler_heap *heap;
+
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
+
+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
+ freed += delete_unused_chunk_pages(heap);
+ /* If freed enough, then stop here */
+ if (freed >= to_free)
+ break;
+ }
+
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ /* The scan is surely not more than 4-G pages, but for logic flow limit it */
+ if (WARN_ON(unlikely(freed > U32_MAX)))
+ return U32_MAX;
+ else
+ return (u32)freed;
+}
+
+static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
+{
+ u32 chunk_cnt = 0;
+ u64 page_cnt = 0;
+
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+ /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
+ * in the chunk list walk. The downside is that the number is a less reliable guide for
+ * later on scan (free) calls on this heap for what actually is freeable.
+ */
+ if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
+ chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
+ page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
+ }
+
+ dev_dbg(heap->kctx->kbdev->dev,
+ "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX", chunk_cnt,
+ page_cnt, heap->gpu_va);
+
+ return page_cnt;
+}
+
+static u32 count_kctx_unused_heap_pages_cb(struct kbase_context *kctx)
+{
+ u64 page_cnt = 0;
+ struct kbase_csf_tiler_heap *heap;
+
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
+
+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
+ page_cnt += count_unused_heap_pages(heap);
+
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+
+ /* The count is surely not more than 4-G pages, but for logic flow limit it */
+ if (WARN_ON(unlikely(page_cnt > U32_MAX)))
+ return U32_MAX;
+ else
+ return (u32)page_cnt;
+}
+
+static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ struct kbase_device *kbdev = container_of(s, struct kbase_device, csf.tiler_heap_reclaim);
+ struct kbase_csf_tiler_heap_shrink_control shrink_ctrl = {
+ .sc = sc,
+ .count_cb = count_kctx_unused_heap_pages_cb,
+ .scan_cb = scan_kctx_unused_heap_pages_cb,
+ };
+
+ return kbase_csf_scheduler_count_free_heap_pages(kbdev, &shrink_ctrl);
+}
+
+static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ struct kbase_device *kbdev = container_of(s, struct kbase_device, csf.tiler_heap_reclaim);
+ struct kbase_csf_tiler_heap_shrink_control shrink_ctrl = {
+ .sc = sc,
+ .count_cb = count_kctx_unused_heap_pages_cb,
+ .scan_cb = scan_kctx_unused_heap_pages_cb,
+ };
+
+ return kbase_csf_scheduler_scan_free_heap_pages(kbdev, &shrink_ctrl);
+}
+
+void kbase_csf_tiler_heap_register_shrinker(struct kbase_device *kbdev)
+{
+ struct shrinker *reclaim = &kbdev->csf.tiler_heap_reclaim;
+
+ reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects;
+ reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects;
+ reclaim->seeks = HEAP_SHRINKER_SEEKS;
+ reclaim->batch = HEAP_SHRINKER_BATCH;
+
+ register_shrinker(reclaim);
+}
+
+void kbase_csf_tiler_heap_unregister_shrinker(struct kbase_device *kbdev)
+{
+ unregister_shrinker(&kbdev->csf.tiler_heap_reclaim);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
index 4031ad4..da60c59 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,6 @@
#define _KBASE_CSF_TILER_HEAP_H_
#include <mali_kbase.h>
-
/**
* kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a
* GPU address space
@@ -58,6 +57,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
* @target_in_flight: Number of render-passes that the driver should attempt to
* keep in flight for which allocation of new chunks is
* allowed. Must not be zero.
+ * @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for
+ * indicating that the caller is intending to perform tiler heap
+ * chunks reclaim for those that are hoarded with hardware while
+ * the associated shader activites are suspended and the CSGs are
+ * off slots. If the referred reclaiming is not desired, can
+ * set it to 0.
* @gpu_heap_va: Where to store the GPU virtual address of the context that was
* set up for the tiler heap.
* @first_chunk_va: Where to store the GPU virtual address of the first chunk
@@ -66,13 +71,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
*
* Return: 0 if successful or a negative error code on failure.
*/
-int kbase_csf_tiler_heap_init(struct kbase_context *kctx,
- u32 chunk_size, u32 initial_chunks, u32 max_chunks,
- u16 target_in_flight, u64 *gpu_heap_va,
- u64 *first_chunk_va);
+int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks,
+ u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va,
+ u64 *gpu_heap_va, u64 *first_chunk_va);
/**
- * kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap.
+ * kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap.
*
* @kctx: Pointer to the kbase context in which the tiler heap was initialized.
* @gpu_heap_va: The GPU virtual address of the context that was set up for the
@@ -112,4 +116,21 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
*/
int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr);
+
+/**
+ * kbase_csf_tiler_heap_register_shrinker - Register shrinker for tiler heap.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ */
+void kbase_csf_tiler_heap_register_shrinker(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_tiler_heap_unregister_shrinker - Unregister shrinker for tiler heap on device
+ * shut down.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ */
+void kbase_csf_tiler_heap_unregister_shrinker(struct kbase_device *kbdev);
#endif
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
index 2c006d9..70dbb6c 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
@@ -56,6 +56,15 @@
((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \
CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT)
+/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
+#define HEAP_SHRINK_STOP_LIMIT (1)
+
+/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */
+#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2)
+
+/* Tiler heap shrinker batch value */
+#define HEAP_SHRINKER_BATCH (512)
+
/**
* struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
*
@@ -78,6 +87,8 @@ struct kbase_csf_tiler_heap_chunk {
u64 gpu_va;
};
+#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0)
+
/**
* struct kbase_csf_tiler_heap - A tiler heap managed by the kernel
*
@@ -85,6 +96,16 @@ struct kbase_csf_tiler_heap_chunk {
* associated.
* @link: Link to this heap in a list of tiler heaps belonging to
* the @kbase_csf_tiler_heap_context.
+ * @chunks_list: Linked list of allocated chunks.
+ * @gpu_va: The GPU virtual address of the heap context structure that
+ * was allocated for the firmware. This is also used to
+ * uniquely identify the heap.
+ * @heap_id: Unique id representing the heap, assigned during heap
+ * initialization.
+ * @buf_desc_va: Buffer decsriptor GPU VA. Can be 0 for backward compatible
+ * to earlier version base interfaces.
+ * @buf_desc_reg: Pointer to the VA region that covers the provided buffer
+ * descriptor memory object pointed to by buf_desc_va.
* @chunk_size: Size of each chunk, in bytes. Must be page-aligned.
* @chunk_count: The number of chunks currently allocated. Must not be
* zero or greater than @max_chunks.
@@ -93,22 +114,56 @@ struct kbase_csf_tiler_heap_chunk {
* @target_in_flight: Number of render-passes that the driver should attempt
* to keep in flight for which allocation of new chunks is
* allowed. Must not be zero.
- * @gpu_va: The GPU virtual address of the heap context structure that
- * was allocated for the firmware. This is also used to
- * uniquely identify the heap.
- * @heap_id: Unique id representing the heap, assigned during heap
- * initialization.
- * @chunks_list: Linked list of allocated chunks.
+ * @desc_chk_flags: Runtime sanity check flags on heap chunk reclaim.
+ * @desc_chk_cnt: Counter for providing a deferral gap if runtime sanity check
+ * needs to be retried later.
*/
struct kbase_csf_tiler_heap {
struct kbase_context *kctx;
struct list_head link;
+ struct list_head chunks_list;
+ u64 gpu_va;
+ u64 heap_id;
+ u64 buf_desc_va;
+ struct kbase_va_region *buf_desc_reg;
u32 chunk_size;
u32 chunk_count;
u32 max_chunks;
u16 target_in_flight;
- u64 gpu_va;
- u64 heap_id;
- struct list_head chunks_list;
+ u8 desc_chk_flags;
+ u8 desc_chk_cnt;
+};
+
+/**
+ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
+ *
+ * @cdsbp_0: Descriptor_type and buffer_type
+ * @size: The size of the current heap chunk
+ * @pointer: Pointer to the current heap chunk
+ * @low_pointer: Pointer to low end of current heap chunk
+ * @high_pointer: Pointer to high end of current heap chunk
+ */
+struct kbase_csf_gpu_buffer_heap {
+ u32 cdsbp_0;
+ u32 size;
+ u64 pointer;
+ u64 low_pointer;
+ u64 high_pointer;
+} __packed;
+
+/**
+ * struct kbase_csf_tiler_heap_shrink_control - Kbase wraper object that wraps around
+ * kernel shrink_control
+ *
+ * @sc: Pointer to shrinker control object in reclaim callback.
+ * @count_cb: Functin pointer for counting tiler heap free list.
+ * @scan_cb: Functin pointer for counting tiler heap free list.
+ */
+
+struct kbase_csf_tiler_heap_shrink_control {
+ struct shrink_control *sc;
+ u32 (*count_cb)(struct kbase_context *kctx);
+ u32 (*scan_cb)(struct kbase_context *kctx, u32 pages);
};
+
#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index f40be8f..27677ba 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -38,10 +38,7 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "tl/mali_kbase_timeline_priv.h"
#include <linux/debugfs.h>
-
-#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
-#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
-#endif
+#include <linux/version_compat_defs.h>
#endif
/* Name of the CSFFW timeline tracebuffer. */
@@ -301,7 +298,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
dev_warn(
kbdev->dev,
"Unable to parse CSFFW tracebuffer event header.");
- ret = -EBUSY;
+ ret = -EBUSY;
break;
}
@@ -322,7 +319,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
dev_warn(kbdev->dev,
"event_id: %u, can't read with event_size: %u.",
event_id, event_size);
- ret = -EBUSY;
+ ret = -EBUSY;
break;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index c6b89f5..9ce6776 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,12 +28,7 @@
#include <linux/list.h>
#include <linux/mman.h>
-
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
-#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
-#endif
-#endif
+#include <linux/version_compat_defs.h>
/**
* struct firmware_trace_buffer - Trace Buffer within the MCU firmware
@@ -127,9 +122,9 @@ static const struct firmware_trace_buffer_data trace_buffer_data[] = {
#endif
#ifdef CONFIG_MALI_PIXEL_GPU_SSCD
/* Enable all the logs */
- { FW_TRACE_BUF_NAME, { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES },
+ { FIRMWARE_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES },
#else
- { FW_TRACE_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES },
+ { FIRMWARE_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES },
#endif /* CONFIG_MALI_PIXEL_GPU_SSCD */
{ "benchmark", { 0 }, 2 },
{ "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
@@ -517,10 +512,16 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
}
EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
+static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
+{
+ unsigned int i;
+
+ for (i = 0; i < tb->trace_enable_entry_count; i++)
+ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1);
+}
#define U32_BITS 32
-static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb)
+u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb)
{
u64 active_mask = tb->trace_enable_init_mask[0];
@@ -530,18 +531,7 @@ static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb)
return active_mask;
}
-static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
- u64 mask)
-{
- unsigned int i;
-
- for (i = 0; i < tb->trace_enable_entry_count; i++)
- kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(
- tb, i, (mask >> i) & 1);
-}
-
-static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
- u64 mask)
+int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
{
struct kbase_device *kbdev = tb->kbdev;
unsigned long flags;
@@ -569,124 +559,3 @@ static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
return err;
}
-
-static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val)
-{
- struct kbase_device *kbdev = (struct kbase_device *)data;
- struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
-
- if (tb == NULL) {
- dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
- return -EIO;
- }
- /* The enabled traces limited to u64 here, regarded practical */
- *val = get_trace_buffer_active_mask64(tb);
- return 0;
-}
-
-static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val)
-{
- struct kbase_device *kbdev = (struct kbase_device *)data;
- struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
- u64 new_mask;
- unsigned int enable_bits_count;
-
- if (tb == NULL) {
- dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
- return -EIO;
- }
-
- /* Ignore unsupported types */
- enable_bits_count =
- kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb);
- if (enable_bits_count > 64) {
- dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64",
- enable_bits_count);
- enable_bits_count = 64;
- }
- new_mask = val & ((1 << enable_bits_count) - 1);
-
- if (new_mask != get_trace_buffer_active_mask64(tb))
- return set_trace_buffer_active_mask64(tb, new_mask);
- else
- return 0;
-}
-
-static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in,
- struct file *file)
-{
- struct kbase_device *kbdev = in->i_private;
-
- file->private_data = kbdev;
- dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file");
-
- return 0;
-}
-
-static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file,
- char __user *buf, size_t size, loff_t *ppos)
-{
- struct kbase_device *kbdev = file->private_data;
- u8 *pbyte;
- unsigned int n_read;
- unsigned long not_copied;
- /* Limit the kernel buffer to no more than two pages */
- size_t mem = MIN(size, 2 * PAGE_SIZE);
- unsigned long flags;
-
- struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
-
- if (tb == NULL) {
- dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
- return -EIO;
- }
-
- pbyte = kmalloc(mem, GFP_KERNEL);
- if (pbyte == NULL) {
- dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump");
- return -ENOMEM;
- }
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- /* Do the copy, if we have obtained some trace data */
- not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0;
- kfree(pbyte);
-
- if (!not_copied) {
- *ppos += n_read;
- return n_read;
- }
-
- dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer");
- return -EFAULT;
-}
-
-
-DEFINE_SIMPLE_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops,
- kbase_csf_firmware_trace_enable_mask_read,
- kbase_csf_firmware_trace_enable_mask_write, "%llx\n");
-
-static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = {
- .owner = THIS_MODULE,
- .open = kbasep_csf_firmware_trace_debugfs_open,
- .read = kbasep_csf_firmware_trace_debugfs_read,
- .llseek = no_llseek,
-};
-
-void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev)
-{
- debugfs_create_file("fw_trace_enable_mask", 0644,
- kbdev->mali_debugfs_directory, kbdev,
- &kbase_csf_firmware_trace_enable_mask_fops);
-
- debugfs_create_file("fw_traces", 0444,
- kbdev->mali_debugfs_directory, kbdev,
- &kbasep_csf_firmware_trace_debugfs_fops);
-}
-#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
index 6c3907c..037dc22 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,7 @@
#include <linux/types.h>
#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4)
-#define FW_TRACE_BUF_NAME "fwlog"
+#define FIRMWARE_LOG_BUF_NAME "fwlog"
#define FW_TRACE_BUF_NR_PAGES 4
/* Forward declarations */
@@ -59,7 +59,7 @@ struct kbase_device;
int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev);
/**
- * kbase_csf_firmware_trace_buffer_term - Terminate trace buffers
+ * kbase_csf_firmware_trace_buffers_term - Terminate trace buffers
*
* @kbdev: Device pointer
*/
@@ -166,15 +166,23 @@ bool kbase_csf_firmware_trace_buffer_is_empty(
unsigned int kbase_csf_firmware_trace_buffer_read_data(
struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
- * kbase_csf_fw_trace_buffer_debugfs_init() - Add debugfs entries for setting
- * enable mask and dumping the binary
- * firmware trace buffer
+ * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
*
- * @kbdev: Pointer to the device
+ * @tb: Trace buffer handle
+ *
+ * Return: Trace buffer active mask.
+ */
+u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb);
+
+/**
+ * kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask
+ *
+ * @tb: Trace buffer handle
+ * @mask: New active mask
+ *
+ * Return: 0 if successful, negative error code on failure.
*/
-void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev);
-#endif /* CONFIG_DEBUG_FS */
+int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask);
#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
index 2506ce1..9e4da9f 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -42,67 +42,67 @@ int dummy_array[] = {
/*
* Generic CSF events
*/
- KBASE_KTRACE_CODE_MAKE_CODE(EVICT_CTX_SLOTS),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START),
/* info_val[0:7] == fw version_minor
* info_val[15:8] == fw version_major
* info_val[63:32] == fw version_hash
*/
- KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_BOOT),
- KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_REBOOT),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END),
/* info_val == total number of runnable groups across all kctxs */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START),
/* info_val = timeout in ms */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START),
/* info_val = remaining ms timeout, or 0 if timedout */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT_DONE),
- KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT),
- KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT_NOTIFY_GPU),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_END),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_EVENT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT),
/* info_val = JOB_IRQ_STATUS */
- KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_START),
/* info_val = JOB_IRQ_STATUS */
KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END),
/* info_val = JOB_IRQ_STATUS */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_START),
/* info_val = GLB_REQ ^ GLB_ACQ */
- KBASE_KTRACE_CODE_MAKE_CODE(GLB_REQ_ACQ),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_GLB_REQ_ACK),
/* info_val[31:0] = num non idle offslot groups
* info_val[32] = scheduler can suspend on idle
*/
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CAN_IDLE),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ADVANCE_TICK),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NOADVANCE_TICK),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_ADVANCE),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_NOADVANCE),
/* kctx is added to the back of the list */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_INSERT_RUNNABLE),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_REMOVE_RUNNABLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_INSERT),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_REMOVE),
/* kctx is moved to the back of the list */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ROTATE_RUNNABLE),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HEAD_RUNNABLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_ROTATE),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_HEAD),
- KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_BEGIN),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_START),
/* 4-bit encoding of boolean values (ease of reading as hex values)
*
* info_val[3:0] = was reset active/failed to be prevented
* info_val[7:4] = whether scheduler was both idle and suspendable
* info_val[11:8] = whether all groups were suspended
*/
- KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_END),
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_BEGIN),
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_END),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_END),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END),
/* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */
- KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_UPDATE_IDLE_SLOTS_ACK),
/* info_val[63:0] = GPU cycle counter, used mainly for benchmarking
* purpose.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GPU_IDLE_HANDLING_START),
- KBASE_KTRACE_CODE_MAKE_CODE(MCU_HALTED),
- KBASE_KTRACE_CODE_MAKE_CODE(MCU_IN_SLEEP),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP),
/*
* Group events
@@ -111,17 +111,17 @@ int dummy_array[] = {
* info_val[19:16] == as_nr
* info_val[63:32] == endpoint config (max number of endpoints allowed)
*/
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START_REQ),
/* info_val == CSG_REQ state issued */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP_REQ),
/* info_val == CSG_ACK state */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STARTED),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_RUNNING),
/* info_val == CSG_ACK state */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED),
/* info_val == slot cleaned */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED),
/* info_val = slot requesting STATUS_UPDATE */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STATUS_UPDATE),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_UPDATE_IDLE_SLOT_REQ),
/* info_val = scheduler's new csg_slots_idle_mask[0]
* group->csg_nr indicates which bit was set
*/
@@ -133,13 +133,13 @@ int dummy_array[] = {
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR),
/* info_val == previous priority */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_PRIO_UPDATE),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_PRIO_UPDATE),
/* info_val == CSG_REQ ^ CSG_ACK */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_SYNC_UPDATE_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_SYNC_UPDATE),
/* info_val == CSG_REQ ^ CSG_ACK */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_IDLE_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_IDLE),
/* info_val == CSG_REQ ^ CSG_ACK */
- KBASE_KTRACE_CODE_MAKE_CODE(CSG_PROGRESS_TIMER_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROGRESS_TIMER_EVENT),
/* info_val[31:0] == CSG_REQ ^ CSG_ACQ
* info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK
*/
@@ -152,34 +152,34 @@ int dummy_array[] = {
/* info_val[31:0] == new run state of the evicted group
* info_val[63:32] == number of runnable groups
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT_SCHED),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT),
/* info_val == new num_runnable_grps
* group is added to the back of the list for its priority level
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_RUNNABLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_INSERT),
/* info_val == new num_runnable_grps
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_RUNNABLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_REMOVE),
/* info_val == num_runnable_grps
* group is moved to the back of the list for its priority level
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_ROTATE_RUNNABLE),
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_RUNNABLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_ROTATE),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_HEAD),
/* info_val == new num_idle_wait_grps
* group is added to the back of the list
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_IDLE_WAIT),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_INSERT),
/* info_val == new num_idle_wait_grps
* group is added to the back of the list
*/
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_IDLE_WAIT),
- KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_IDLE_WAIT),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_REMOVE),
+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_HEAD),
/* info_val == is scheduler running with protected mode tasks */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CHECK_PROTM_ENTER),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ENTER_PROTM),
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EXIT_PROTM),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER_CHECK),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_EXIT),
/* info_val[31:0] == number of GPU address space slots in use
* info_val[63:32] == number of runnable groups
*/
@@ -187,11 +187,11 @@ int dummy_array[] = {
/* info_val == new count of off-slot non-idle groups
* no group indicates it was set rather than incremented
*/
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_INC),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC),
/* info_val == new count of off-slot non-idle groups */
- KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_DEC),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC),
- KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_BEGIN),
+ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END),
/*
@@ -201,42 +201,42 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(CSI_START),
/* info_val == queue->enabled before stop */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP),
- KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQUESTED),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ),
/* info_val == CS_REQ ^ CS_ACK that were not processed due to the group
* being suspended
*/
- KBASE_KTRACE_CODE_MAKE_CODE(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED),
/* info_val == CS_REQ ^ CS_ACK */
- KBASE_KTRACE_CODE_MAKE_CODE(CSI_FAULT_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_FAULT),
/* info_val == CS_REQ ^ CS_ACK */
- KBASE_KTRACE_CODE_MAKE_CODE(CSI_TILER_OOM_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_TILER_OOM),
/* info_val == CS_REQ ^ CS_ACK */
- KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_INTERRUPT),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_PROTM_PEND),
/* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK),
/* info_val == group->run_State (for group the queue is bound to) */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP),
/* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */
- KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE),
+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_START),
/* info_val == bool for result of the evaluation */
- KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVALUATED),
+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_END),
/* info_val == contents of CS_STATUS_WAIT */
- KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_STATUS_WAIT),
+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_WAIT_STATUS),
/* info_val == current sync value pointed to by queue->sync_ptr */
- KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_CURRENT_VAL),
+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_CUR_VAL),
/* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */
- KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_TEST_VAL),
+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_TEST_VAL),
/* info_val == current value of CS_STATUS_BLOCKED_REASON */
- KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_BLOCKED_REASON),
+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_BLOCKED_REASON),
/* info_val = group's new protm_pending_bitmap[0]
* queue->csi_index indicates which bit was set
*/
- KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_SET),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_SET),
/* info_val = group's new protm_pending_bitmap[0]
* queue->csi_index indicates which bit was cleared
*/
- KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_CLEAR),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_CLEAR),
/*
* KCPU queue events
@@ -244,42 +244,42 @@ int dummy_array[] = {
/* KTrace info_val == KCPU queue fence context
* KCPU extra_info_val == N/A.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_NEW),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_CREATE),
/* KTrace info_val == Number of pending commands in KCPU queue when
* it is destroyed.
* KCPU extra_info_val == Number of CQS wait operations present in
* the KCPU queue when it is destroyed.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DESTROY),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DELETE),
/* KTrace info_val == CQS event memory address
* KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents
* of error field.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(CQS_SET),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_SET),
/* KTrace info_val == Number of CQS objects to be waited upon
* KCPU extra_info_val == N/A.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_START),
/* KTrace info_val == CQS event memory address
* KCPU extra_info_val == 1 if CQS was signaled with an error and queue
* inherited the error, otherwise 0.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_END),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_END),
/* KTrace info_val == Fence context
* KCPU extra_info_val == Fence seqno.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(FENCE_SIGNAL),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_SIGNAL),
/* KTrace info_val == Fence context
* KCPU extra_info_val == Fence seqno.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_START),
/* KTrace info_val == Fence context
* KCPU extra_info_val == Fence seqno.
*/
- KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_END),
+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_END),
#if 0 /* Dummy section to avoid breaking formatting */
};
#endif
-/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+ /* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c
index 824ca4b..cff6f89 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -98,6 +98,9 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
struct kbase_ktrace_msg *trace_msg;
struct kbase_context *kctx = NULL;
+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
+ return;
+
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
/* Reserve and update indices */
@@ -165,6 +168,9 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev,
struct kbase_ktrace_msg *trace_msg;
struct kbase_context *kctx = queue->kctx;
+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
+ return;
+
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
/* Reserve and update indices */
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
index 7f32cd2..1896e10 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -47,7 +47,7 @@
* 1.3:
* Add a lot of extra new traces. Tweak some existing scheduler related traces
* to contain extra information information/happen at slightly different times.
- * SCHEDULER_EXIT_PROTM now has group information
+ * SCHEDULER_PROTM_EXIT now has group information
*/
#define KBASE_KTRACE_VERSION_MAJOR 1
#define KBASE_KTRACE_VERSION_MINOR 3
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c
index 05d1677..6597a15 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,6 +80,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev,
unsigned long irqflags;
struct kbase_ktrace_msg *trace_msg;
+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
+ return;
+
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
/* Reserve and update indices */
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
index 9ee7f81..86e81e5 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,37 +30,36 @@
/*
* Generic CSF events - using the common DEFINE_MALI_ADD_EVENT
*/
-DEFINE_MALI_ADD_EVENT(EVICT_CTX_SLOTS);
-DEFINE_MALI_ADD_EVENT(FIRMWARE_BOOT);
-DEFINE_MALI_ADD_EVENT(FIRMWARE_REBOOT);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START);
+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT);
+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT_DONE);
-DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT);
-DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT_NOTIFY_GPU);
-DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT);
+DEFINE_MALI_ADD_EVENT(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT);
+DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_START);
DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END);
-DEFINE_MALI_ADD_EVENT(CSG_INTERRUPT_PROCESS);
-DEFINE_MALI_ADD_EVENT(GLB_REQ_ACQ);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_CAN_IDLE);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_ADVANCE_TICK);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_NOADVANCE_TICK);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_INSERT_RUNNABLE);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_REMOVE_RUNNABLE);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_ROTATE_RUNNABLE);
-DEFINE_MALI_ADD_EVENT(SCHEDULER_HEAD_RUNNABLE);
-DEFINE_MALI_ADD_EVENT(IDLE_WORKER_BEGIN);
-DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END);
-DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN);
-DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END);
-DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK);
-DEFINE_MALI_ADD_EVENT(GPU_IDLE_HANDLING_START);
-DEFINE_MALI_ADD_EVENT(MCU_HALTED);
-DEFINE_MALI_ADD_EVENT(MCU_IN_SLEEP);
+DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_GLB_REQ_ACK);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_ADVANCE);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_NOADVANCE);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_INSERT);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_REMOVE);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_ROTATE);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_HEAD);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START);
+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED);
+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP);
DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
@@ -130,37 +129,38 @@ DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
__entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \
__entry->csg_nr, __entry->slot_prio, __entry->info_val))
-DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STARTED);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START_REQ);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP_REQ);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_RUNNING);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STATUS_UPDATE);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_PRIO_UPDATE);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_SYNC_UPDATE_INTERRUPT);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_IDLE_INTERRUPT);
-DEFINE_MALI_CSF_GRP_EVENT(CSG_PROGRESS_TIMER_INTERRUPT);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_IDLE);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROGRESS_TIMER_EVENT);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_START);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT_SCHED);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_RUNNABLE);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_RUNNABLE);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_ROTATE_RUNNABLE);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_RUNNABLE);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_IDLE_WAIT);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_IDLE_WAIT);
-DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_IDLE_WAIT);
-DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_CHECK_PROTM_ENTER);
-DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_ENTER_PROTM);
-DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_EXIT_PROTM);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_INSERT);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_REMOVE);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_ROTATE);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_HEAD);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_INSERT);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_REMOVE);
+DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_HEAD);
+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER_CHECK);
+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER);
+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP);
-DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_INC);
-DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_DEC);
-DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_BEGIN);
+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC);
+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC);
+DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
#undef DEFINE_MALI_CSF_GRP_EVENT
@@ -176,22 +176,22 @@ DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP);
-DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQUESTED);
-DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND);
-DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_FAULT_INTERRUPT);
-DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_TILER_OOM_INTERRUPT);
-DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_INTERRUPT);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQ);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_FAULT);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_TILER_OOM);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_PROTM_PEND);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP);
-DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE);
-DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVALUATED);
-DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_STATUS_WAIT);
-DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_CURRENT_VAL);
-DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_TEST_VAL);
-DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_BLOCKED_REASON);
-DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_SET);
-DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_CLEAR);
+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_START);
+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_END);
+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_WAIT_STATUS);
+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_CUR_VAL);
+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_TEST_VAL);
+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_BLOCKED_REASON);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_SET);
+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_CLEAR);
#undef DEFINE_MALI_CSF_GRP_Q_EVENT
@@ -230,14 +230,14 @@ DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template,
u64 info_val1, u64 info_val2), \
TP_ARGS(queue, info_val1, info_val2))
-DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_NEW);
-DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DESTROY);
-DEFINE_MALI_CSF_KCPU_EVENT(CQS_SET);
-DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_START);
-DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_END);
-DEFINE_MALI_CSF_KCPU_EVENT(FENCE_SIGNAL);
-DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_START);
-DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_END);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_CREATE);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DELETE);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_SET);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_START);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_END);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_SIGNAL);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_START);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_END);
#undef DEFINE_MALI_CSF_KCPU_EVENT
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace.c b/mali_kbase/debug/mali_kbase_debug_ktrace.c
index 9bf8610..f521b47 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace.c
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,13 +27,13 @@ int kbase_ktrace_init(struct kbase_device *kbdev)
#if KBASE_KTRACE_TARGET_RBUF
struct kbase_ktrace_msg *rbuf;
+ spin_lock_init(&kbdev->ktrace.lock);
rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
if (!rbuf)
return -EINVAL;
kbdev->ktrace.rbuf = rbuf;
- spin_lock_init(&kbdev->ktrace.lock);
#endif /* KBASE_KTRACE_TARGET_RBUF */
return 0;
}
@@ -42,6 +42,7 @@ void kbase_ktrace_term(struct kbase_device *kbdev)
{
#if KBASE_KTRACE_TARGET_RBUF
kfree(kbdev->ktrace.rbuf);
+ kbdev->ktrace.rbuf = NULL;
#endif /* KBASE_KTRACE_TARGET_RBUF */
}
@@ -183,6 +184,9 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code,
unsigned long irqflags;
struct kbase_ktrace_msg *trace_msg;
+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
+ return;
+
WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL));
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace.h b/mali_kbase/debug/mali_kbase_debug_ktrace.h
index f1e6d3d..31a15a0 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -82,6 +82,18 @@ void kbase_ktrace_debugfs_init(struct kbase_device *kbdev);
*/
#if KBASE_KTRACE_TARGET_RBUF
/**
+ * kbasep_ktrace_initialized - Check whether kbase ktrace is initialized
+ *
+ * @ktrace: ktrace of kbase device.
+ *
+ * Return: true if ktrace has been initialized.
+ */
+static inline bool kbasep_ktrace_initialized(struct kbase_ktrace *ktrace)
+{
+ return ktrace->rbuf != NULL;
+}
+
+/**
* kbasep_ktrace_add - internal function to add trace to the ringbuffer.
* @kbdev: kbase device
* @code: ktrace code
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h b/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h
index 4694b78..8d9e11e 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -138,8 +138,8 @@ enum kbase_ktrace_code {
};
/**
- * struct kbase_ktrace - object representing a trace message added to trace
- * buffer trace_rbuf in &kbase_device
+ * struct kbase_ktrace_msg - object representing a trace message added to trace
+ * buffer trace_rbuf in &kbase_device
* @timestamp: CPU timestamp at which the trace message was added.
* @thread_id: id of the thread in the context of which trace message was
* added.
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 5325658..51abad0 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -43,6 +43,7 @@
#include <mali_kbase_hwcnt_virtualizer.h>
#include <mali_kbase_kinstr_prfcnt.h>
#include <mali_kbase_vinstr.h>
+#include <tl/mali_kbase_timeline.h>
/**
* kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC
@@ -60,7 +61,7 @@ static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev)
kbase_vinstr_term(kbdev->vinstr_ctx);
kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
- kbase_csf_firmware_term(kbdev);
+ kbase_csf_firmware_unload_term(kbdev);
}
}
@@ -191,7 +192,7 @@ static int kbase_csf_early_init(struct kbase_device *kbdev)
}
/**
- * kbase_csf_early_init - Early termination for firmware & scheduler.
+ * kbase_csf_early_term() - Early termination for firmware & scheduler.
* @kbdev: Device pointer
*/
static void kbase_csf_early_term(struct kbase_device *kbdev)
@@ -200,6 +201,19 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
}
/**
+ * kbase_csf_late_init - late initialization for firmware.
+ * @kbdev: Device pointer
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int kbase_csf_late_init(struct kbase_device *kbdev)
+{
+ int err = kbase_csf_firmware_late_init(kbdev);
+
+ return err;
+}
+
+/**
* kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
* interface.
* @kbdev: Device pointer
@@ -269,59 +283,46 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
static const struct kbase_device_init dev_init[] = {
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- { kbase_gpu_device_create, kbase_gpu_device_destroy,
- "Dummy model initialization failed" },
+ { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
#endif
- { power_control_init, power_control_term,
- "Power control initialization failed" },
+ { power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
- { kbase_device_early_init, kbase_device_early_term,
- "Early device initialization failed" },
- { kbase_device_populate_max_freq, NULL,
- "Populating max frequency failed" },
+ { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
+ { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
+ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
"Priority control manager initialization failed" },
- { kbase_ctx_sched_init, kbase_ctx_sched_term,
- "Context scheduler initialization failed" },
- { kbase_mem_init, kbase_mem_term,
- "Memory subsystem initialization failed" },
+ { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
+ { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
{ kbase_csf_protected_memory_init, kbase_csf_protected_memory_term,
"Protected memory allocator initialization failed" },
{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
{ kbase_protected_mode_init, kbase_protected_mode_term,
"Protected mode subsystem initialization failed" },
- { kbase_device_list_init, kbase_device_list_term,
- "Device list setup failed" },
+ { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
{ kbase_device_timeline_init, kbase_device_timeline_term,
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
- { kbase_lowest_gpu_freq_init, NULL,
- "Lowest freq initialization failed" },
- { kbase_device_hwcnt_watchdog_if_init,
- kbase_device_hwcnt_watchdog_if_term,
+ { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
"GPU hwcnt backend watchdog interface creation failed" },
- { kbase_device_hwcnt_backend_csf_if_init,
- kbase_device_hwcnt_backend_csf_if_term,
+ { kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term,
"GPU hwcnt backend CSF interface creation failed" },
- { kbase_device_hwcnt_backend_csf_init,
- kbase_device_hwcnt_backend_csf_term,
+ { kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term,
"GPU hwcnt backend creation failed" },
{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
"GPU hwcnt context initialization failed" },
- { kbase_backend_late_init, kbase_backend_late_term,
- "Late backend initialization failed" },
- { kbase_csf_early_init, kbase_csf_early_term,
- "Early CSF initialization failed" },
+ { kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" },
+ { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
+ { kbase_csf_late_init, NULL, "Late CSF initialization failed" },
{ NULL, kbase_device_firmware_hwcnt_term, NULL },
- { kbase_device_debugfs_init, kbase_device_debugfs_term,
- "DebugFS initialization failed" },
+ { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
/* Sysfs init needs to happen before registering the device with
* misc_register(), otherwise it causes a race condition between
* registering the device and a uevent event being generated for
@@ -339,8 +340,7 @@ static const struct kbase_device_init dev_init[] = {
"Misc device registration failed" },
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
- { kbase_device_late_init, kbase_device_late_term,
- "Late device initialization failed" },
+ { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
};
static void kbase_device_term_partial(struct kbase_device *kbdev,
@@ -468,7 +468,7 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->fw_load_lock);
- err = kbase_csf_firmware_init(kbdev);
+ err = kbase_csf_firmware_load_init(kbdev);
if (!err) {
unsigned long flags;
@@ -498,11 +498,12 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
ret = kbase_device_hwcnt_csf_deferred_init(kbdev);
if (ret) {
- kbase_csf_firmware_term(kbdev);
+ kbase_csf_firmware_unload_term(kbdev);
goto out;
}
kbase_csf_debugfs_init(kbdev);
+ kbase_timeline_io_debugfs_init(kbdev);
out:
kbase_pm_context_idle(kbdev);
}
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 1e914d0..fcd0c50 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -177,7 +177,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+#ifdef CONFIG_MALI_DEBUG
WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+#endif
kbase_pm_disable_db_mirror_interrupt(kbdev);
kbdev->pm.backend.exit_gpu_sleep_mode = true;
kbase_csf_scheduler_invoke_tick(kbdev);
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index ff57cf6..e6f0197 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -63,9 +63,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
if (val & RESET_COMPLETED)
kbase_pm_reset_done(kbdev);
- if (val & PRFCNT_SAMPLE_COMPLETED)
- kbase_instr_hwcnt_sample_done(kbdev);
-
/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
* We need to acquire hwaccess_lock to avoid a race condition with
* kbase_gpu_cache_flush_and_busy_wait
@@ -73,6 +70,13 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
+ /* kbase_instr_hwcnt_sample_done frees the HWCNT pipeline to request another
+ * sample. Therefore this must be called after clearing the IRQ to avoid a
+ * race between clearing and the next sample raising the IRQ again.
+ */
+ if (val & PRFCNT_SAMPLE_COMPLETED)
+ kbase_instr_hwcnt_sample_done(kbdev);
+
/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
* be called after the IRQ has been cleared. This is because it might
* trigger further power transitions and we don't want to miss the
@@ -105,8 +109,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+ WARN_ON(!kbdev->pm.backend.gpu_powered);
writel(value, kbdev->reg + offset);
@@ -123,8 +126,7 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
u32 val;
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+ WARN_ON(!kbdev->pm.backend.gpu_powered);
val = readl(kbdev->reg + offset);
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 260afef..9287d73 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -241,7 +241,7 @@ static const struct kbase_device_init dev_init[] = {
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
- { kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
+ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_instr_backend_init, kbase_instr_backend_term,
"Instrumentation backend initialization failed" },
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
@@ -326,20 +326,19 @@ int kbase_device_init(struct kbase_device *kbdev)
if (err)
return err;
- kthread_init_worker(&kbdev->job_done_worker);
- kbdev->job_done_worker_thread = kbase_create_realtime_thread(kbdev,
+ err = kbase_create_realtime_thread(kbdev,
kthread_worker_fn, &kbdev->job_done_worker, "mali_jd_thread");
- if (IS_ERR(kbdev->job_done_worker_thread))
- return PTR_ERR(kbdev->job_done_worker_thread);
+ if (err)
+ return err;
err = kbase_pm_apc_init(kbdev);
if (err)
return err;
kthread_init_worker(&kbdev->event_worker);
- kbdev->event_worker_thread = kthread_run(kthread_worker_fn,
- &kbdev->event_worker, "mali_event_thread");
- if (IS_ERR(kbdev->event_worker_thread)) {
+ kbdev->event_worker.task =
+ kthread_run(kthread_worker_fn, &kbdev->event_worker, "mali_event_thread");
+ if (IS_ERR(kbdev->event_worker.task)) {
err = -ENOMEM;
}
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index c123010..9571830 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -279,9 +279,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
goto dma_set_mask_failed;
- /* There is no limit for Mali, so set to max. We only do this if dma_parms
- * is already allocated by the platform.
- */
+ /* There is no limit for Mali, so set to max. */
if (kbdev->dev->dma_parms)
err = dma_set_max_seg_size(kbdev->dev, UINT_MAX);
if (err)
@@ -293,12 +291,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
if (err)
goto dma_set_mask_failed;
- err = kbase_ktrace_init(kbdev);
- if (err)
- goto term_as;
err = kbase_pbha_read_dtb(kbdev);
if (err)
- goto term_ktrace;
+ goto term_as;
init_waitqueue_head(&kbdev->cache_clean_wait);
@@ -308,7 +303,11 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
- kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+#if MALI_USE_CSF
+ kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
+#else
+ kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS;
+#endif /* MALI_USE_CSF */
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
@@ -326,8 +325,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
}
return 0;
-term_ktrace:
- kbase_ktrace_term(kbdev);
term_as:
kbase_device_all_as_term(kbdev);
dma_set_mask_failed:
@@ -344,9 +341,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
#if KBASE_KTRACE_ENABLE
kbase_debug_assert_register_hook(NULL, NULL);
#endif
-
- kbase_ktrace_term(kbdev);
-
kbase_device_all_as_term(kbdev);
@@ -484,10 +478,14 @@ int kbase_device_early_init(struct kbase_device *kbdev)
{
int err;
+ err = kbase_ktrace_init(kbdev);
+ if (err)
+ return err;
+
err = kbasep_platform_device_init(kbdev);
if (err)
- return err;
+ goto ktrace_term;
err = kbase_pm_runtime_init(kbdev);
if (err)
@@ -501,7 +499,12 @@ int kbase_device_early_init(struct kbase_device *kbdev)
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
- /* Find out GPU properties based on the GPU feature registers */
+ /*
+ * Find out GPU properties based on the GPU feature registers.
+ * Note that this does not populate the few properties that depend on
+ * hw_features being initialized. Those are set by kbase_gpuprops_set_features
+ * soon after this in the init process.
+ */
kbase_gpuprops_set(kbdev);
/* We're done accessing the GPU registers for now. */
@@ -524,6 +527,8 @@ fail_interrupts:
kbase_pm_runtime_term(kbdev);
fail_runtime_pm:
kbasep_platform_device_term(kbdev);
+ktrace_term:
+ kbase_ktrace_term(kbdev);
return err;
}
@@ -540,6 +545,7 @@ void kbase_device_early_term(struct kbase_device *kbdev)
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
kbase_pm_runtime_term(kbdev);
kbasep_platform_device_term(kbdev);
+ kbase_ktrace_term(kbdev);
}
int kbase_device_late_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index 5ff970a..6706a61 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -39,7 +39,7 @@ const struct list_head *kbase_device_get_list(void);
void kbase_device_put_list(const struct list_head *dev_list);
/**
- * Kbase_increment_device_id - increment device id.
+ * kbase_increment_device_id - increment device id.
*
* Used to increment device id on successful initialization of the device.
*/
@@ -116,6 +116,22 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
bool kbase_is_gpu_removed(struct kbase_device *kbdev);
/**
+ * kbase_gpu_cache_flush_pa_range_and_busy_wait() - Start a cache physical range flush
+ * and busy wait
+ *
+ * @kbdev: kbase device to issue the MMU operation on.
+ * @phys: Starting address of the physical range to start the operation on.
+ * @nr_bytes: Number of bytes to work on.
+ * @flush_op: Flush command register value to be sent to HW
+ *
+ * Issue a cache flush physical range command, then busy wait an irq status.
+ * This function will clear FLUSH_PA_RANGE_COMPLETED irq mask bit
+ * and busy-wait the rawstat register.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
+/**
* kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
* @kbdev: Kbase device
* @flush_op: Flush command register value to be sent to HW
@@ -188,7 +204,7 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev);
/**
- * kbase_clean_caches_done - Issue preiously queued cache clean request or
+ * kbase_clean_caches_done - Issue previously queued cache clean request or
* wake up the requester that issued cache clean.
* @kbdev: Kbase device
*
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index 249d5f8..4e03e44 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,9 @@
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
+#define U64_LO_MASK ((1ULL << 32) - 1)
+#define U64_HI_MASK (~U64_LO_MASK)
+
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
@@ -38,8 +41,9 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev)
}
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
-static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
+static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
{
+ char *irq_flag_name;
/* Previously MMU-AS command was used for L2 cache flush on page-table update.
* And we're using the same max-loops count for GPU command, because amount of
* L2 cache flush overhead are same between them.
@@ -48,28 +52,42 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
/* Wait for the GPU cache clean operation to complete */
while (--max_loops &&
- !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
- CLEAN_CACHES_COMPLETED)) {
+ !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) {
;
}
/* reset gpu if time-out occurred */
if (max_loops == 0) {
+ switch (irq_bit) {
+ case CLEAN_CACHES_COMPLETED:
+ irq_flag_name = "CLEAN_CACHES_COMPLETED";
+ break;
+ case FLUSH_PA_RANGE_COMPLETED:
+ irq_flag_name = "FLUSH_PA_RANGE_COMPLETED";
+ break;
+ default:
+ irq_flag_name = "UNKNOWN";
+ break;
+ }
+
dev_err(kbdev->dev,
- "CLEAN_CACHES_COMPLETED bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+ "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n",
+ irq_flag_name);
+
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
return -EBUSY;
}
- /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
- KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
- CLEAN_CACHES_COMPLETED);
+ /* Clear the interrupt bit. */
+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit);
return 0;
}
+#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
+
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
u32 flush_op)
{
@@ -97,7 +115,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
irq_mask & ~CLEAN_CACHES_COMPLETED);
/* busy wait irq status to be enabled */
- ret = busy_wait_cache_clean_irq(kbdev);
+ ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
if (ret)
return ret;
@@ -118,7 +136,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
/* 3. Busy-wait irq status to be enabled. */
- ret = busy_wait_cache_clean_irq(kbdev);
+ ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
if (ret)
return ret;
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index 893a335..15bfd03 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -86,6 +86,9 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR:
e = "FIRMWARE_INTERNAL_ERROR";
break;
+ case CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE:
+ e = "CS_UNRECOVERABLE";
+ break;
case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT:
e = "RESOURCE_EVICTION_TIMEOUT";
break;
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
index f6945b3..6ef61ce 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,10 +35,7 @@
#define MCU_SUBSYSTEM_BASE 0x20000
/* IPA control registers */
-#define IPA_CONTROL_BASE 0x40000
-#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r))
#define COMMAND 0x000 /* (WO) Command register */
-#define STATUS 0x004 /* (RO) Status register */
#define TIMER 0x008 /* (RW) Timer control register */
#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */
@@ -68,6 +65,8 @@
#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define AS_STATUS_AS_ACTIVE_INT 0x2
+
/* Set to implementation defined, outer caching */
#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
/* Set to write back memory, outer caching */
@@ -125,42 +124,18 @@
#define MCU_STATUS_HALTED (1 << 1)
-#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
- * region base address, low word
- */
-#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
- * region base address, high word
- */
-#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter
- * configuration
- */
-
-#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter
- * enable for CS Hardware
- */
-
-#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
- * flags for shader cores
- */
-#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
- * flags for tiler
- */
-#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
- * flags for MMU/L2 cache
- */
-
/* JOB IRQ flags */
#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */
/* GPU_COMMAND codes */
#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */
#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */
-#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */
#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */
#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */
#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */
#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */
#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */
+#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */
/* GPU_COMMAND_RESET payloads */
@@ -179,27 +154,34 @@
*/
#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02
-/* GPU_COMMAND_PRFCNT payloads */
-#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */
-#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */
-
/* GPU_COMMAND_TIME payloads */
#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */
#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */
/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_L2_NONE 0x000 /* No flush */
-#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN 0x001 /* CLN only */
-#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */
/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE 0x000 /* No flush */
-#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */
-#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */
/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE 0x000 /* No flush */
-#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */
+
+/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */
+
+/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */
+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */
/* GPU_COMMAND command + payload */
#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
@@ -218,14 +200,6 @@
#define GPU_COMMAND_HARD_RESET \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET)
-/* Clear all performance counters, setting them all to zero. */
-#define GPU_COMMAND_PRFCNT_CLEAR \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR)
-
-/* Sample all performance counters, writing them out to memory */
-#define GPU_COMMAND_PRFCNT_SAMPLE \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE)
-
/* Starts the cycle counter, and system timestamp propagation */
#define GPU_COMMAND_CYCLE_COUNT_START \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE)
@@ -235,28 +209,53 @@
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */
-#define GPU_COMMAND_CACHE_CLN_INV_L2 \
- GPU_COMMAND_CODE_PAYLOAD( \
- GPU_COMMAND_CODE_FLUSH_CACHES, \
- (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
- GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE | \
- GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
+#define GPU_COMMAND_CACHE_CLN_INV_L2 \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE))
/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */
-#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \
- GPU_COMMAND_CODE_PAYLOAD( \
- GPU_COMMAND_CODE_FLUSH_CACHES, \
- (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
- GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
- GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE))
/* Clean and invalidate L2, LSC, and Other caches */
-#define GPU_COMMAND_CACHE_CLN_INV_FULL \
- GPU_COMMAND_CODE_PAYLOAD( \
- GPU_COMMAND_CODE_FLUSH_CACHES, \
- (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
- GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
- GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE))
+#define GPU_COMMAND_CACHE_CLN_INV_FULL \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE))
+
+/* Clean and invalidate only LSC cache */
+#define GPU_COMMAND_CACHE_CLN_INV_LSC \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE))
+
+/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */
+#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \
+ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE))
+
+/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */
+#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \
+ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \
+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE))
+
+/* Clean and invalidate physical range L2, LSC and Other caches */
+#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \
+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \
+ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \
+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \
+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE))
/* Merge cache flush commands */
#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2))
@@ -337,14 +336,16 @@
(((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK))
/* IRQ flags */
-#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
-#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */
-#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
-#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
-#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
-#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
-#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */
-#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */
+#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
+#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */
+#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
+#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
+#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
+#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */
+#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */
+#define FLUSH_PA_RANGE_COMPLETED \
+ (1 << 20) /* Set when a physical range cache clean operation has completed. */
/*
* In Debug build,
@@ -362,7 +363,11 @@
#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \
| POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)
-/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
-#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */
+/* GPU_FEATURES register */
+#define GPU_FEATURES_RAY_TRACING_SHIFT GPU_U(2)
+#define GPU_FEATURES_RAY_TRACING_MASK (GPU_U(0x1) << GPU_FEATURES_RAY_TRACING_SHIFT)
+#define GPU_FEATURES_RAY_TRACING_GET(reg_val) \
+ (((reg_val)&GPU_FEATURES_RAY_TRACING_MASK) >> GPU_FEATURES_RAY_TRACING_SHIFT)
+/* End of GPU_FEATURES register */
#endif /* _KBASE_GPU_REGMAP_CSF_H_ */
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
index d1cd8fc..c349f4b 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -262,19 +262,22 @@
#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES
/* Merge cache flush commands */
#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \
((cmd1) > (cmd2) ? (cmd1) : (cmd2))
/* IRQ flags */
-#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
-#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
-#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
-#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
-#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
-#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
-#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
+#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
+#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
+#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
+#define FLUSH_PA_RANGE_COMPLETED \
+ (1 << 20) /* Set when a physical range cache clean operation has completed. */
/*
* In Debug build,
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index 1d2a49b..1f4e5f0 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,8 +34,12 @@
/* GPU_U definition */
#ifdef __ASSEMBLER__
#define GPU_U(x) x
+#define GPU_UL(x) x
+#define GPU_ULL(x) x
#else
#define GPU_U(x) x##u
+#define GPU_UL(x) x##ul
+#define GPU_ULL(x) x##ull
#endif /* __ASSEMBLER__ */
/* Begin Register Offsets */
@@ -96,6 +100,7 @@
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
@@ -355,8 +360,8 @@
(((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \
AS_LOCKADDR_LOCKADDR_SIZE_MASK))
#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12)
-#define AS_LOCKADDR_LOCKADDR_BASE_MASK \
- (GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \
(((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \
AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
@@ -364,6 +369,11 @@
(((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \
(((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \
AS_LOCKADDR_LOCKADDR_BASE_MASK))
+#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6)
+#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT)
+#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \
+ (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \
+ ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK))
/* GPU_STATUS values */
#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
index 81dc56b..60b061e 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
@@ -281,7 +281,7 @@ int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
if (WARN_ON(ret))
return ret;
- now = ktime_get();
+ now = ktime_get_raw();
diff = ktime_sub(now, kbdev->ipa.last_sample_time);
diff_ms = ktime_to_ms(diff);
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
index e240117..34515a9 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,7 @@
#define DEFAULT_MIN_SAMPLE_CYCLES 10000
/**
- * read_hwcnt() - read a counter value
+ * kbase_ipa_read_hwcnt() - read a counter value
* @model_data: pointer to model data
* @offset: offset, in bytes, into vinstr buffer
*
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
index e1718c6..4479a4b 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -83,7 +83,7 @@ struct kbase_ipa_model_vinstr_data {
};
/**
- * struct ipa_group - represents a single IPA group
+ * struct kbase_ipa_group - represents a single IPA group
* @name: name of the IPA group
* @default_value: default value of coefficient for IPA group.
* Coefficients are interpreted as fractions where the
@@ -152,7 +152,7 @@ s64 kbase_ipa_single_counter(
s32 coeff, u32 counter);
/**
- * attach_vinstr() - attach a vinstr_buffer to an IPA model.
+ * kbase_ipa_attach_vinstr() - attach a vinstr_buffer to an IPA model.
* @model_data: pointer to model data
*
* Attach a vinstr_buffer to an IPA model. The vinstr_buffer
@@ -164,7 +164,7 @@ s64 kbase_ipa_single_counter(
int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
/**
- * detach_vinstr() - detach a vinstr_buffer from an IPA model.
+ * kbase_ipa_detach_vinstr() - detach a vinstr_buffer from an IPA model.
* @model_data: pointer to model data
*
* Detach a vinstr_buffer from an IPA model.
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index f11be0d..eaa2258 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -59,9 +59,11 @@
#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62)
/**
- * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * kbase_g7x_power_model_get_jm_counter() - get performance counter offset
+ * inside the Job Manager block
* @model_data: pointer to GPU model data.
- * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block.
+ * @counter_block_offset: offset in bytes of the performance counter inside
+ * the Job Manager block.
*
* Return: Block offset in bytes of the required performance counter.
*/
@@ -72,9 +74,11 @@ static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_da
}
/**
- * get_memsys_counter() - get performance counter offset inside the Memory System block
+ * kbase_g7x_power_model_get_memsys_counter() - get performance counter offset
+ * inside the Memory System block
* @model_data: pointer to GPU model data.
- * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block.
+ * @counter_block_offset: offset in bytes of the performance counter inside
+ * the (first) Memory System block.
*
* Return: Block offset in bytes of the required performance counter.
*/
@@ -88,9 +92,11 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst
}
/**
- * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * kbase_g7x_power_model_get_sc_counter() - get performance counter offset
+ * inside the Shader Cores block
* @model_data: pointer to GPU model data.
- * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block.
+ * @counter_block_offset: offset in bytes of the performance counter inside
+ * the (first) Shader Cores block.
*
* Return: Block offset in bytes of the required performance counter.
*/
@@ -110,10 +116,12 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da
}
/**
- * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * kbase_g7x_sum_all_memsys_blocks() - calculate energy for a single Memory
+ * System performance counter.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
- * @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
+ * @counter_block_offset: offset in bytes of the counter inside the block it
+ * belongs to.
*
* Return: Energy estimation for a single Memory System performance counter.
*/
@@ -130,12 +138,15 @@ static s64 kbase_g7x_sum_all_memsys_blocks(
}
/**
- * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * kbase_g7x_sum_all_shader_cores() - calculate energy for a Shader Cores
+ * performance counter for all cores.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
- * @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
+ * @counter_block_offset: offset in bytes of the counter inside the block it
+ * belongs to.
*
- * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ * Return: Energy estimation for a Shader Cores performance counter for all
+ * cores.
*/
static s64 kbase_g7x_sum_all_shader_cores(
struct kbase_ipa_model_vinstr_data *model_data,
@@ -150,7 +161,7 @@ static s64 kbase_g7x_sum_all_shader_cores(
}
/**
- * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * kbase_g7x_jm_single_counter() - calculate energy for a single Job Manager performance counter.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
* @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
@@ -170,7 +181,7 @@ static s64 kbase_g7x_jm_single_counter(
}
/**
- * get_active_cycles() - return the GPU_ACTIVE counter
+ * kbase_g7x_get_active_cycles() - return the GPU_ACTIVE counter
* @model_data: pointer to GPU model data.
*
* Return: the number of cycles the GPU was active during the counter sampling
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index 428e68b..8b8bbd1 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -324,7 +324,7 @@ int kbase_ipa_init(struct kbase_device *kbdev)
kbdev->ipa.configured_model = default_model;
}
- kbdev->ipa.last_sample_time = ktime_get();
+ kbdev->ipa.last_sample_time = ktime_get_raw();
end:
if (err)
@@ -750,7 +750,7 @@ void kbase_ipa_reset_data(struct kbase_device *kbdev)
mutex_lock(&kbdev->ipa.lock);
- now = ktime_get();
+ now = ktime_get_raw();
diff = ktime_sub(now, kbdev->ipa.last_sample_time);
elapsed_time = ktime_to_ms(diff);
@@ -765,7 +765,7 @@ void kbase_ipa_reset_data(struct kbase_device *kbdev)
if (model != kbdev->ipa.fallback_model)
model->ops->reset_counter_data(model);
- kbdev->ipa.last_sample_time = ktime_get();
+ kbdev->ipa.last_sample_time = ktime_get_raw();
}
mutex_unlock(&kbdev->ipa.lock);
diff --git a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
index d554fff..a8523a7 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,6 +20,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/version_compat_defs.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -27,10 +28,6 @@
#include "mali_kbase_ipa.h"
#include "mali_kbase_ipa_debugfs.h"
-#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
-#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
-#endif
-
struct kbase_ipa_model_param {
char *name;
union {
diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c
index fadae7d..f748144 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_simple.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -321,8 +321,9 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
mutex_lock(&model->kbdev->ipa.lock);
if (IS_ERR_OR_NULL(tz)) {
- pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
- PTR_ERR(tz), tz_name);
+ pr_warn_ratelimited(
+ "Error %d getting thermal zone \'%s\', not yet ready?\n",
+ PTR_ERR_OR_ZERO(tz), tz_name);
return -EPROBE_DEFER;
}
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index c9b9ea0..66cf323 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -194,8 +194,6 @@ struct kbase_jd_atom_dependency {
static inline const struct kbase_jd_atom *
kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
{
- KBASE_DEBUG_ASSERT(dep != NULL);
-
return (const struct kbase_jd_atom *)(dep->atom);
}
@@ -209,8 +207,6 @@ kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
static inline u8 kbase_jd_katom_dep_type(
const struct kbase_jd_atom_dependency *dep)
{
- KBASE_DEBUG_ASSERT(dep != NULL);
-
return dep->dep_type;
}
@@ -227,8 +223,6 @@ static inline void kbase_jd_katom_dep_set(
{
struct kbase_jd_atom_dependency *dep;
- KBASE_DEBUG_ASSERT(const_dep != NULL);
-
dep = (struct kbase_jd_atom_dependency *)const_dep;
dep->atom = a;
@@ -245,8 +239,6 @@ static inline void kbase_jd_katom_dep_clear(
{
struct kbase_jd_atom_dependency *dep;
- KBASE_DEBUG_ASSERT(const_dep != NULL);
-
dep = (struct kbase_jd_atom_dependency *)const_dep;
dep->atom = NULL;
@@ -504,7 +496,6 @@ enum kbase_atom_exit_protected_state {
* BASE_JD_REQ_START_RENDERPASS set in its core requirements
* with an atom that has BASE_JD_REQ_END_RENDERPASS set.
* @jc_fragment: Set of GPU fragment job chains
- * @retry_count: TODO: Not used,to be removed
*/
struct kbase_jd_atom {
struct kthread_work work;
@@ -615,8 +606,6 @@ struct kbase_jd_atom {
u32 atom_flags;
- int retry_count;
-
enum kbase_atom_gpu_rb_state gpu_rb_state;
bool need_cache_flush_cores_retained;
@@ -660,7 +649,7 @@ static inline bool kbase_jd_katom_is_protected(
}
/**
- * kbase_atom_is_younger - query if one atom is younger by age than another
+ * kbase_jd_atom_is_younger - query if one atom is younger by age than another
*
* @katom_a: the first atom
* @katom_b: the second atom
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
index 74d02f5..d03bcc0 100644
--- a/mali_kbase/jm/mali_kbase_jm_js.h
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -29,6 +29,8 @@
#include "mali_kbase_js_ctx_attr.h"
+#define JS_MAX_RUNNING_JOBS 8
+
/**
* kbasep_js_devdata_init - Initialize the Job Scheduler
* @kbdev: The kbase_device to operate on
@@ -618,7 +620,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
/**
- * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ * kbase_js_zap_context - Attempt to deschedule a context that is being
* destroyed
* @kctx: Context pointer
*
@@ -705,8 +707,10 @@ static inline bool kbasep_js_is_submit_allowed(
bool is_allowed;
/* Ensure context really is scheduled in */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx,
+ kctx->as_nr, atomic_read(&kctx->flags)))
+ return false;
test_bit = (u16) (1u << kctx->as_nr);
@@ -733,8 +737,10 @@ static inline void kbasep_js_set_submit_allowed(
u16 set_bit;
/* Ensure context really is scheduled in */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx,
+ kctx->as_nr, atomic_read(&kctx->flags)))
+ return;
set_bit = (u16) (1u << kctx->as_nr);
@@ -763,8 +769,10 @@ static inline void kbasep_js_clear_submit_allowed(
u16 clear_mask;
/* Ensure context really is scheduled in */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx,
+ kctx->as_nr, atomic_read(&kctx->flags)))
+ return;
clear_bit = (u16) (1u << kctx->as_nr);
clear_mask = ~clear_bit;
@@ -798,7 +806,7 @@ static inline void kbasep_js_atom_retained_state_init_invalid(
* @retained_state: where to copy
* @katom: where to copy from
*
- * Copy atom state that can be made available after jd_done_nolock() is called
+ * Copy atom state that can be made available after kbase_jd_done_nolock() is called
* on that atom.
*/
static inline void kbasep_js_atom_retained_state_copy(
@@ -872,9 +880,6 @@ static inline void kbase_js_runpool_inc_context_count(
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(kctx != NULL);
-
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -882,13 +887,12 @@ static inline void kbase_js_runpool_inc_context_count(
lockdep_assert_held(&js_devdata->runpool_mutex);
/* Track total contexts */
- KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+ WARN_ON_ONCE(js_devdata->nr_all_contexts_running >= JS_MAX_RUNNING_JOBS);
++(js_devdata->nr_all_contexts_running);
if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
/* Track contexts that can submit jobs */
- KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
- S8_MAX);
+ WARN_ON_ONCE(js_devdata->nr_user_contexts_running >= JS_MAX_RUNNING_JOBS);
++(js_devdata->nr_user_contexts_running);
}
}
@@ -909,9 +913,6 @@ static inline void kbase_js_runpool_dec_context_count(
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(kctx != NULL);
-
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -920,12 +921,12 @@ static inline void kbase_js_runpool_dec_context_count(
/* Track total contexts */
--(js_devdata->nr_all_contexts_running);
- KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+ WARN_ON_ONCE(js_devdata->nr_all_contexts_running < 0);
if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
/* Track contexts that can submit jobs */
--(js_devdata->nr_user_contexts_running);
- KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+ WARN_ON_ONCE(js_devdata->nr_user_contexts_running < 0);
}
}
@@ -984,6 +985,7 @@ static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
* kbasep_js_sched_prio_to_atom_prio - Convert relative scheduler priority
* to atom priority (base_jd_prio).
*
+ * @kbdev: Device pointer
* @sched_prio: Relative scheduler priority to translate.
*
* This function will convert relative scheduler priority back into base_jd_prio
@@ -999,7 +1001,7 @@ static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
* 0..BASE_JD_NR_PRIO_LEVELS-1. On failure: BASE_JD_PRIO_INVALID.
*/
static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(struct kbase_device *kbdev,
- int sched_prio)
+ int sched_prio)
{
if (likely(sched_prio >= 0 && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT))
return kbasep_js_relative_priority_to_atom[sched_prio];
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index c5cb9ea..15576fb 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -387,7 +387,7 @@ struct kbasep_js_kctx_info {
* @sched_priority: priority
* @device_nr: Core group atom was executed on
*
- * Subset of atom state that can be available after jd_done_nolock() is called
+ * Subset of atom state that can be available after kbase_jd_done_nolock() is called
* on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
* because the original atom could disappear.
*/
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index a713681..3669f7e 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -38,6 +38,7 @@ enum base_hw_feature {
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+ BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -87,6 +88,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+ BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -151,6 +153,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -159,6 +162,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -169,6 +173,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
+ BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 8766a6d..3917301 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -61,6 +61,9 @@ enum base_hw_issue {
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
@@ -598,6 +601,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -608,6 +612,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -616,6 +621,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -625,6 +631,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -633,6 +640,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -642,6 +650,19 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TURSEHW_1997,
+ BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
@@ -651,23 +672,31 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 53ee51e..b04cf94 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -87,14 +87,7 @@
#if MALI_USE_CSF
#include "csf/mali_kbase_csf.h"
-#endif
-
-#ifndef u64_to_user_ptr
-/* Introduced in Linux v4.6 */
-#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
-#endif
-#if MALI_USE_CSF
/* Physical memory group ID for CSF user I/O.
*/
#define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT
@@ -266,7 +259,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
void kbase_jd_zap_context(struct kbase_context *kctx);
/*
- * jd_done_nolock - Perform the necessary handling of an atom that has completed
+ * kbase_jd_done_nolock - Perform the necessary handling of an atom that has completed
* the execution.
*
* @katom: Pointer to the atom that completed the execution
@@ -282,7 +275,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx);
*
* The caller must hold the kbase_jd_context.lock.
*/
-bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
+bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
@@ -559,6 +552,21 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
}
/**
+ * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can
+ * run as using the device tree, and save this
+ * within kbdev.
+ * @kbdev: Pointer to kbase device.
+ *
+ * This function could be called from kbase_clk_rate_trace_manager_init,
+ * but is left separate as it can be called as soon as
+ * dev_pm_opp_of_add_table() has been called to initialize the OPP table,
+ * which occurs in power_control_init().
+ *
+ * Return: 0 in any case.
+ */
+int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev);
+
+/**
* kbase_pm_metrics_start - Start the utilization metrics timer
* @kbdev: Pointer to the kbase device for which to start the utilization
* metrics calculation thread.
@@ -807,16 +815,23 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev);
*
* @kbdev: the kbase device
* @threadfn: the function the realtime thread will execute
- * @data: pointer to the thread's data
+ * @worker: pointer to the thread's kworker
* @namefmt: a name for the thread.
*
* Creates a realtime kthread with priority &KBASE_RT_THREAD_PRIO and restricted
* to cores defined by &KBASE_RT_THREAD_CPUMASK_MIN and &KBASE_RT_THREAD_CPUMASK_MAX.
*
- * Return: A valid &struct task_struct pointer on success, or an ERR_PTR on failure.
+ * Return: Zero on success, or an PTR_ERR on failure.
+ */
+int kbase_create_realtime_thread(struct kbase_device *kbdev,
+ int (*threadfn)(void *data), struct kthread_worker *worker, const char namefmt[], ...);
+
+/**
+ * kbase_destroy_kworker_stack - Destroy a kthread_worker and it's thread on the stack
+ *
+ * @worker: pointer to the thread's kworker
*/
-struct task_struct * kbase_create_realtime_thread(struct kbase_device *kbdev,
- int (*threadfn)(void *data), void *data, const char namefmt[]);
+void kbase_destroy_kworker_stack(struct kthread_worker *worker);
#if !defined(UINT64_MAX)
#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 18e40b5..60fe2ce 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -89,6 +89,18 @@ enum {
KBASE_3BIT_AID_4 = 0x7
};
+#if MALI_USE_CSF
+/*
+ * Default value for the TIMER register of the IPA Control interface,
+ * expressed in milliseconds.
+ *
+ * The chosen value is a trade off between two requirements: the IPA Control
+ * interface should sample counters with a resolution in the order of
+ * milliseconds, while keeping GPU overhead as limited as possible.
+ */
+#define IPA_CONTROL_TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */
+#endif /* MALI_USE_CSF */
+
/* Default period for DVFS sampling (can be overridden by platform header) */
#ifndef DEFAULT_PM_DVFS_PERIOD
#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
@@ -158,11 +170,6 @@ enum {
*/
#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */
-/* Default number of milliseconds given for other jobs on the GPU to be
- * soft-stopped when the GPU needs to be reset.
- */
-#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
-
/* Nominal reference frequency that was used to obtain all following
* <...>_TIMEOUT_CYCLES macros, in kHz.
*
@@ -180,7 +187,7 @@ enum {
* Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
* on scaling from a 50MHz GPU system.
*/
-#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000)
+#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000ull)
/* Timeout in clock cycles for GPU Power Management to reach the desired
* Shader, L2 and MCU state.
@@ -191,10 +198,28 @@ enum {
/* Waiting timeout in clock cycles for GPU reset to complete.
*
- * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system.
+ * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system
*/
#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
+/* Waiting timeout in clock cycles for all active CSGs to be suspended.
+ *
+ * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000)
+
+/* Waiting timeout in clock cycles for GPU firmware to boot.
+ *
+ * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES (25000000)
+
+/* Waiting timeout for a ping request to be acknowledged, in clock cycles.
+ *
+ * Based on 6000ms timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull)
+
#else /* MALI_USE_CSF */
/* A default timeout in clock cycles to be used when an invalid timeout
@@ -202,6 +227,11 @@ enum {
*/
#define JM_DEFAULT_TIMEOUT_CYCLES (150000000)
+/* Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
#endif /* MALI_USE_CSF */
/* Default timeslice that a context is scheduled in for, in nanoseconds.
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index bf7d524..a16dbad 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -99,6 +99,7 @@
#include <linux/compat.h> /* is_compat_task/in_compat_syscall */
#include <linux/mman.h>
#include <linux/version.h>
+#include <linux/version_compat_defs.h>
#include <mali_kbase_hw.h>
#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
#include <mali_kbase_sync.h>
@@ -171,6 +172,11 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA
#endif
};
+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
+/* Mutex to synchronize the probe of multiple kbase instances */
+static struct mutex kbase_probe_mutex;
+#endif
+
/**
* mali_kbase_supports_cap - Query whether a kbase capability is supported
*
@@ -199,10 +205,14 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap)
return supported;
}
-struct task_struct *kbase_create_realtime_thread(struct kbase_device *kbdev,
- int (*threadfn)(void *data), void *data, const char namefmt[])
+int kbase_create_realtime_thread(struct kbase_device *kbdev,
+ int (*threadfn)(void *data), struct kthread_worker *worker, const char namefmt[], ...)
{
+ struct task_struct *task;
unsigned int i;
+ va_list args;
+ char name_buf[128];
+ int len;
cpumask_t mask = { CPU_BITS_NONE };
@@ -210,24 +220,51 @@ struct task_struct *kbase_create_realtime_thread(struct kbase_device *kbdev,
.sched_priority = KBASE_RT_THREAD_PRIO,
};
- struct task_struct *ret = kthread_create(kthread_worker_fn, data, namefmt);
+ kthread_init_worker(worker);
+
+ /* Construct the thread name */
+ va_start(args, namefmt);
+ len = vsnprintf(name_buf, sizeof(name_buf), namefmt, args);
+ va_end(args);
+ if (len + 1 > sizeof(name_buf)) {
+ dev_warn(kbdev->dev, "RT thread name truncated to %s", name_buf);
+ }
+
+ task = kthread_create(kthread_worker_fn, worker, name_buf);
- if (!IS_ERR(ret)) {
+ if (!IS_ERR(task)) {
for (i = KBASE_RT_THREAD_CPUMASK_MIN; i <= KBASE_RT_THREAD_CPUMASK_MAX ; i++)
cpumask_set_cpu(i, &mask);
- kthread_bind_mask(ret, &mask);
+ kthread_bind_mask(task, &mask);
- wake_up_process(ret);
+ /* Link the worker and the thread */
+ worker->task = task;
+ wake_up_process(task);
- if (sched_setscheduler_nocheck(ret, SCHED_FIFO, &param))
- dev_warn(kbdev->dev, "%s not set to RT prio", namefmt);
+ if (sched_setscheduler_nocheck(task, SCHED_FIFO, &param))
+ dev_warn(kbdev->dev, "%s not set to RT prio", name_buf);
else
dev_dbg(kbdev->dev, "%s set to RT prio: %i",
- namefmt, param.sched_priority);
+ name_buf, param.sched_priority);
+ } else {
+ return PTR_ERR(task);
}
- return ret;
+ return 0;
+}
+
+void kbase_destroy_kworker_stack(struct kthread_worker *worker)
+{
+ struct task_struct *task;
+
+ task = worker->task;
+ if (WARN_ON(!task))
+ return;
+
+ kthread_flush_worker(worker);
+ kthread_stop(task);
+ WARN_ON(!list_empty(&worker->work_list));
}
/**
@@ -341,10 +378,9 @@ static int kbase_file_create_kctx(struct kbase_file *kfile,
*
* @kfile: A device file created by kbase_file_new()
*
- * This function returns an error code (encoded with ERR_PTR) if no context
- * has been created for the given @kfile. This makes it safe to use in
- * circumstances where the order of initialization cannot be enforced, but
- * only if the caller checks the return value.
+ * This function returns NULL if no context has been created for the given @kfile.
+ * This makes it safe to use in circumstances where the order of initialization
+ * cannot be enforced, but only if the caller checks the return value.
*
* Return: Address of the kernel base context associated with the @kfile, or
* NULL if no context exists.
@@ -532,27 +568,6 @@ void kbase_release_device(struct kbase_device *kbdev)
EXPORT_SYMBOL(kbase_release_device);
#if IS_ENABLED(CONFIG_DEBUG_FS)
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && \
- !(KERNEL_VERSION(4, 4, 28) <= LINUX_VERSION_CODE && \
- KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE)
-/*
- * Older versions, before v4.6, of the kernel doesn't have
- * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
- */
-static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
-{
- char buf[4];
-
- count = min(count, sizeof(buf) - 1);
-
- if (copy_from_user(buf, s, count))
- return -EFAULT;
- buf[count] = '\0';
-
- return strtobool(buf, res);
-}
-#endif
-
static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
{
struct kbase_context *kctx = f->private_data;
@@ -664,13 +679,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
kbdev = kfile->kbdev;
-#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
kctx = kbase_create_context(kbdev, in_compat_syscall(),
flags, kfile->api_version, kfile->filp);
-#else
- kctx = kbase_create_context(kbdev, is_compat_task(),
- flags, kfile->api_version, kfile->filp);
-#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
/* if bad flags, will stay stuck in setup mode */
if (!kctx)
@@ -691,16 +701,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
/* we don't treat this as a fail - just warn about it */
dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n");
} else {
-#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
- /* prevent unprivileged use of debug file system
- * in old kernel version
- */
- debugfs_create_file("infinite_cache", 0600, kctx->kctx_dentry,
- kctx, &kbase_infinite_cache_fops);
-#else
debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
kctx, &kbase_infinite_cache_fops);
-#endif
debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry,
kctx, &kbase_force_same_va_fops);
@@ -1046,9 +1048,9 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo)
{
u32 flags = timeinfo->in.request_flags;
- struct timespec64 ts;
- u64 timestamp;
- u64 cycle_cnt;
+ struct timespec64 ts = { 0 };
+ u64 timestamp = 0;
+ u64 cycle_cnt = 0;
kbase_pm_context_active(kctx->kbdev);
@@ -1077,11 +1079,7 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
static int kbase_api_hwcnt_set(struct kbase_context *kctx,
struct kbase_ioctl_hwcnt_values *values)
{
- gpu_model_set_dummy_prfcnt_sample(
- (u32 __user *)(uintptr_t)values->data,
- values->size);
-
- return 0;
+ return gpu_model_set_dummy_prfcnt_user_sample(u64_to_user_ptr(values->data), values->size);
}
#endif /* CONFIG_MALI_NO_MALI */
@@ -1569,9 +1567,22 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
kctx->jit_group_id = heap_init->in.group_id;
return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
- heap_init->in.initial_chunks, heap_init->in.max_chunks,
- heap_init->in.target_in_flight,
- &heap_init->out.gpu_heap_va, &heap_init->out.first_chunk_va);
+ heap_init->in.initial_chunks, heap_init->in.max_chunks,
+ heap_init->in.target_in_flight, heap_init->in.buf_desc_va,
+ &heap_init->out.gpu_heap_va,
+ &heap_init->out.first_chunk_va);
+}
+
+static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx,
+ union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init)
+{
+ kctx->jit_group_id = heap_init->in.group_id;
+
+ return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
+ heap_init->in.initial_chunks, heap_init->in.max_chunks,
+ heap_init->in.target_in_flight, 0,
+ &heap_init->out.gpu_heap_va,
+ &heap_init->out.first_chunk_va);
}
static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx,
@@ -1653,6 +1664,31 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
cpu_queue_info->size);
}
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
+static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
+ union kbase_ioctl_read_user_page *user_page)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ unsigned long flags;
+
+ /* As of now, only LATEST_FLUSH is supported */
+ if (unlikely(user_page->in.offset != LATEST_FLUSH))
+ return -EINVAL;
+
+ /* Validating padding that must be zero */
+ if (unlikely(user_page->in.padding != 0))
+ return -EINVAL;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (!kbdev->pm.backend.gpu_powered)
+ user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE;
+ else
+ user_page->out.val_lo = kbase_reg_read(kbdev, USER_REG(LATEST_FLUSH));
+ user_page->out.val_hi = 0;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return 0;
+}
#endif /* MALI_USE_CSF */
static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx,
@@ -2110,6 +2146,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
union kbase_ioctl_cs_tiler_heap_init,
kctx);
break;
+ case KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13:
+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13,
+ kbasep_cs_tiler_heap_init_1_13,
+ union kbase_ioctl_cs_tiler_heap_init_1_13, kctx);
+ break;
case KBASE_IOCTL_CS_TILER_HEAP_TERM:
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM,
kbasep_cs_tiler_heap_term,
@@ -2128,6 +2169,10 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_cs_cpu_queue_info,
kctx);
break;
+ case KBASE_IOCTL_READ_USER_PAGE:
+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page,
+ union kbase_ioctl_read_user_page, kctx);
+ break;
#endif /* MALI_USE_CSF */
#if MALI_UNIT_TEST
case KBASE_IOCTL_TLSTREAM_STATS:
@@ -2251,18 +2296,28 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
}
#endif /* MALI_USE_CSF */
-static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+static __poll_t kbase_poll(struct file *filp, poll_table *wait)
{
struct kbase_file *const kfile = filp->private_data;
struct kbase_context *const kctx =
kbase_file_get_kctx_if_setup_complete(kfile);
- if (unlikely(!kctx))
+ if (unlikely(!kctx)) {
+#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
return POLLERR;
+#else
+ return EPOLLERR;
+#endif
+ }
poll_wait(filp, &kctx->event_queue, wait);
- if (kbase_event_pending(kctx))
+ if (kbase_event_pending(kctx)) {
+#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
return POLLIN | POLLRDNORM;
+#else
+ return EPOLLIN | EPOLLRDNORM;
+#endif
+ }
return 0;
}
@@ -3267,22 +3322,20 @@ static ssize_t gpuinfo_show(struct device *dev,
.name = "Mali-G510" },
{ .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G310" },
- { .id = GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
- .name = "Mali-TTUX" },
- { .id = GPU_ID2_PRODUCT_LTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
- .name = "Mali-LTUX" },
};
const char *product_name = "(Unknown Mali GPU)";
struct kbase_device *kbdev;
u32 gpu_id;
unsigned int product_id, product_id_mask;
unsigned int i;
+ struct kbase_gpu_props *gpu_props;
kbdev = to_kbase_device(dev);
if (!kbdev)
return -ENODEV;
- gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+ gpu_props = &kbdev->gpu_props;
+ gpu_id = gpu_props->props.raw_props.gpu_id;
product_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
product_id_mask = GPU_ID2_PRODUCT_MODEL >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
@@ -3296,6 +3349,32 @@ static ssize_t gpuinfo_show(struct device *dev,
}
}
+#if MALI_USE_CSF
+ if ((product_id & product_id_mask) ==
+ ((GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) {
+ const bool rt_supported =
+ GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features);
+ const u8 nr_cores = gpu_props->num_cores;
+
+ /* Mali-G715-Immortalis if 10 < number of cores with ray tracing supproted.
+ * Mali-G715 if 10 < number of cores without ray tracing supported.
+ * Mali-G715 if 7 <= number of cores <= 10 regardless ray tracing.
+ * Mali-G615 if number of cores < 7.
+ */
+ if ((nr_cores > 10) && rt_supported)
+ product_name = "Mali-G715-Immortalis";
+ else if (nr_cores >= 7)
+ product_name = "Mali-G715";
+
+ if (nr_cores < 7) {
+ dev_warn(kbdev->dev, "nr_cores(%u) GPU ID must be G615", nr_cores);
+ product_name = "Mali-G615";
+ } else
+ dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name,
+ nr_cores);
+ }
+#endif /* MALI_USE_CSF */
+
return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name,
kbdev->gpu_props.num_cores,
(gpu_id & GPU_ID_VERSION_MAJOR) >> KBASE_GPU_ID_VERSION_MAJOR_SHIFT,
@@ -3368,6 +3447,46 @@ static ssize_t dvfs_period_show(struct device *dev,
static DEVICE_ATTR_RW(dvfs_period);
+int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev)
+{
+ /* Uses default reference frequency defined in below macro */
+ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+
+ /* Only check lowest frequency in cases when OPPs are used and
+ * present in the device tree.
+ */
+#ifdef CONFIG_PM_OPP
+ struct dev_pm_opp *opp_ptr;
+ unsigned long found_freq = 0;
+
+ /* find lowest frequency OPP */
+ opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq);
+ if (IS_ERR(opp_ptr)) {
+ dev_err(kbdev->dev, "No OPPs found in device tree! Scaling timeouts using %llu kHz",
+ (unsigned long long)lowest_freq_khz);
+ } else {
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+ dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */
+#endif
+ /* convert found frequency to KHz */
+ found_freq /= 1000;
+
+ /* If lowest frequency in OPP table is still higher
+ * than the reference, then keep the reference frequency
+ * as the one to use for scaling .
+ */
+ if (found_freq < lowest_freq_khz)
+ lowest_freq_khz = found_freq;
+ }
+#else
+ dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT");
+#endif
+
+ kbdev->lowest_gpu_freq_khz = lowest_freq_khz;
+ dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz);
+ return 0;
+}
+
/**
* pm_poweroff_store - Store callback for the pm_poweroff sysfs file.
* @dev: The device with sysfs file is for
@@ -4533,7 +4652,7 @@ int power_control_init(struct kbase_device *kbdev)
}
}
if (err == -EPROBE_DEFER) {
- while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS))
+ while (i > 0)
regulator_put(kbdev->regulators[--i]);
return err;
}
@@ -4570,7 +4689,7 @@ int power_control_init(struct kbase_device *kbdev)
}
}
if (err == -EPROBE_DEFER) {
- while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) {
+ while (i > 0) {
clk_disable_unprepare(kbdev->clocks[--i]);
clk_put(kbdev->clocks[i]);
}
@@ -4591,6 +4710,11 @@ int power_control_init(struct kbase_device *kbdev)
if (kbdev->nr_regulators > 0) {
kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev,
regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS);
+
+ if (IS_ERR_OR_NULL(kbdev->opp_table)) {
+ err = PTR_ERR(kbdev->opp_table);
+ goto regulators_probe_defer;
+ }
}
#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
err = dev_pm_opp_of_add_table(kbdev->dev);
@@ -4598,6 +4722,20 @@ int power_control_init(struct kbase_device *kbdev)
#endif /* CONFIG_PM_OPP */
return 0;
+#if defined(CONFIG_PM_OPP) && \
+ ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_REGULATOR))
+regulators_probe_defer:
+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+ if (kbdev->clocks[i]) {
+ if (__clk_is_enabled(kbdev->clocks[i]))
+ clk_disable_unprepare(kbdev->clocks[i]);
+ clk_put(kbdev->clocks[i]);
+ kbdev->clocks[i] = NULL;
+ } else
+ break;
+ }
+#endif
+
clocks_probe_defer:
#if defined(CONFIG_REGULATOR)
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++)
@@ -4657,18 +4795,18 @@ static int type##_quirks_set(void *data, u64 val) \
kbdev = (struct kbase_device *)data; \
kbdev->hw_quirks_##type = (u32)val; \
trigger_reset(kbdev); \
- return 0;\
+ return 0; \
} \
\
static int type##_quirks_get(void *data, u64 *val) \
{ \
- struct kbase_device *kbdev;\
- kbdev = (struct kbase_device *)data;\
- *val = kbdev->hw_quirks_##type;\
- return 0;\
+ struct kbase_device *kbdev; \
+ kbdev = (struct kbase_device *)data; \
+ *val = kbdev->hw_quirks_##type; \
+ return 0; \
} \
-DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
- type##_quirks_set, "%llu\n")
+DEFINE_DEBUGFS_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get, \
+ type##_quirks_set, "%llu\n")
MAKE_QUIRK_ACCESSORS(sc);
MAKE_QUIRK_ACCESSORS(tiler);
@@ -4698,8 +4836,7 @@ static int kbase_device_debugfs_reset_write(void *data, u64 wait_for_reset)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_trigger_reset,
- NULL, &kbase_device_debugfs_reset_write, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_trigger_reset, NULL, &kbase_device_debugfs_reset_write, "%llu\n");
/**
* debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
@@ -4790,12 +4927,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
/* prevent unprivileged use of debug file system
* in old kernel version
*/
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
- /* only for newer kernel version debug file system is safe */
const mode_t mode = 0644;
-#else
- const mode_t mode = 0600;
-#endif
kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
NULL);
@@ -4897,9 +5029,11 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbdev->mali_debugfs_directory, kbdev,
&kbasep_serialize_jobs_debugfs_fops);
+ kbase_timeline_io_debugfs_init(kbdev);
#endif
kbase_dvfs_status_debugfs_init(kbdev);
+
return 0;
out:
@@ -5096,10 +5230,11 @@ static ssize_t fw_timeout_store(struct device *dev,
ret = kstrtouint(buf, 0, &fw_timeout);
if (ret || fw_timeout == 0) {
- dev_err(kbdev->dev, "%s\n%s\n%u",
- "Couldn't process fw_timeout write operation.",
- "Use format 'fw_timeout_ms', and fw_timeout_ms > 0",
- FIRMWARE_PING_INTERVAL_MS);
+ dev_err(kbdev->dev,
+ "Couldn't process fw_timeout write operation.\n"
+ "Use format 'fw_timeout_ms', and fw_timeout_ms > 0\n"
+ "Default fw_timeout: %u",
+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT));
return -EINVAL;
}
@@ -5203,6 +5338,66 @@ static ssize_t idle_hysteresis_time_show(struct device *dev,
}
static DEVICE_ATTR_RW(idle_hysteresis_time);
+
+/**
+ * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents
+ *
+ * Get the internally recorded MCU shader Core power-off (nominal) timeout value.
+ * The unit of the value is in micro-seconds.
+ *
+ * Return: The number of bytes output to @buf if the
+ * function succeeded. A Negative value on failure.
+ */
+static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_attribute *attr,
+ char *const buf)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+ u32 pwroff;
+
+ if (!kbdev)
+ return -ENODEV;
+
+ pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff);
+}
+
+/**
+ * mcu_shader_pwroff_timeout_store - Set the MCU shader core power-off time value.
+ *
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
+ *
+ * The duration value (unit: micro-seconds) for configuring MCU Shader Core
+ * timer, when the shader cores' power transitions are delegated to the
+ * MCU (normal operational mode)
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+ u32 dur;
+
+ if (!kbdev)
+ return -ENODEV;
+
+ if (kstrtouint(buf, 0, &dur))
+ return -EINVAL;
+
+ kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout);
+
#endif /* MALI_USE_CSF */
static struct attribute *kbase_scheduling_attrs[] = {
@@ -5263,6 +5458,7 @@ static struct attribute *kbase_attrs[] = {
&dev_attr_csg_scheduling_period.attr,
&dev_attr_fw_timeout.attr,
&dev_attr_idle_hysteresis_time.attr,
+ &dev_attr_mcu_shader_pwroff_timeout.attr,
#endif /* !MALI_USE_CSF */
&dev_attr_power_policy.attr,
&dev_attr_core_mask.attr,
@@ -5401,7 +5597,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
kbdev->dev = &pdev->dev;
dev_set_drvdata(kbdev->dev, kbdev);
-
+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
+ mutex_lock(&kbase_probe_mutex);
+#endif
err = kbase_device_init(kbdev);
if (err) {
@@ -5413,10 +5611,16 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
dev_set_drvdata(kbdev->dev, NULL);
kbase_device_free(kbdev);
+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
+ mutex_unlock(&kbase_probe_mutex);
+#endif
} else {
dev_info(kbdev->dev,
"Probed as %s\n", dev_name(kbdev->mdev.this_device));
kbase_increment_device_id();
+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
+ mutex_unlock(&kbase_probe_mutex);
+#endif
#ifdef CONFIG_MALI_ARBITER_SUPPORT
mutex_lock(&kbdev->pm.lock);
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT);
@@ -5488,13 +5692,8 @@ static int kbase_device_resume(struct device *dev)
#ifdef CONFIG_MALI_DEVFREQ
dev_dbg(dev, "Callback %s\n", __func__);
- if (kbdev->devfreq) {
- mutex_lock(&kbdev->pm.lock);
- if (kbdev->pm.active_count > 0)
- kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
- mutex_unlock(&kbdev->pm.lock);
- flush_workqueue(kbdev->devfreq_queue.workq);
- }
+ if (kbdev->devfreq)
+ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
#endif
return 0;
}
@@ -5649,41 +5848,43 @@ static struct platform_driver kbase_platform_driver = {
},
};
-/*
- * The driver will not provide a shortcut to create the Mali platform device
- * anymore when using Device Tree.
- */
-#if IS_ENABLED(CONFIG_OF)
+#if (KERNEL_VERSION(5, 3, 0) > LINUX_VERSION_CODE) && IS_ENABLED(CONFIG_OF)
module_platform_driver(kbase_platform_driver);
#else
-
static int __init kbase_driver_init(void)
{
int ret;
+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
+ mutex_init(&kbase_probe_mutex);
+#endif
+
+#ifndef CONFIG_OF
ret = kbase_platform_register();
if (ret)
return ret;
-
+#endif
ret = platform_driver_register(&kbase_platform_driver);
-
- if (ret)
+#ifndef CONFIG_OF
+ if (ret) {
kbase_platform_unregister();
-
+ return ret;
+ }
+#endif
return ret;
}
static void __exit kbase_driver_exit(void)
{
platform_driver_unregister(&kbase_platform_driver);
+#ifndef CONFIG_OF
kbase_platform_unregister();
+#endif
}
module_init(kbase_driver_init);
module_exit(kbase_driver_exit);
-
-#endif /* CONFIG_OF */
-
+#endif
MODULE_LICENSE("GPL");
MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
__stringify(BASE_UK_VERSION_MAJOR) "." \
diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h
index 4dc09e4..7e885ca 100644
--- a/mali_kbase/mali_kbase_cs_experimental.h
+++ b/mali_kbase/mali_kbase_cs_experimental.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,9 @@
*/
static inline void mali_kbase_print_cs_experimental(void)
{
-#if MALI_INCREMENTAL_RENDERING
- pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled");
-#endif /* MALI_INCREMENTAL_RENDERING */
+#if MALI_INCREMENTAL_RENDERING_JM
+ pr_info("mali_kbase: INCREMENTAL_RENDERING_JM (experimental) enabled");
+#endif /* MALI_INCREMENTAL_RENDERING_JM */
}
#endif /* _KBASE_CS_EXPERIMENTAL_H_ */
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index 53da266..66149f9 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,9 @@
#include <mali_kbase_defs.h>
#include "mali_kbase_ctx_sched.h"
#include "tl/mali_kbase_tracepoints.h"
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+#include "mali_kbase_reset_gpu.h"
+#else
#include <mali_kbase_hwaccess_jm.h>
#endif
@@ -152,7 +154,19 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
struct kbase_device *const kbdev = kctx->kbdev;
lockdep_assert_held(&kbdev->hwaccess_lock);
- WARN_ON(atomic_read(&kctx->refcount) == 0);
+#if MALI_USE_CSF
+ /* We expect the context to be active when this function is called,
+ * except for the case where a page fault is reported for it during
+ * the GPU reset sequence, in which case we can expect the refcount
+ * to be 0.
+ */
+ WARN_ON(!atomic_read(&kctx->refcount) && !kbase_reset_gpu_is_active(kbdev));
+#else
+ /* We expect the context to be active (and thus refcount should be non-zero)
+ * when this function is called
+ */
+ WARN_ON(!atomic_read(&kctx->refcount));
+#endif
WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
diff --git a/mali_kbase/mali_kbase_debug.h b/mali_kbase/mali_kbase_debug.h
index 10a3c85..c43d15d 100644
--- a/mali_kbase/mali_kbase_debug.h
+++ b/mali_kbase/mali_kbase_debug.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2012-2015, 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -65,7 +65,7 @@ struct kbasep_debug_assert_cb {
#endif
/**
- * KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - (Private) system printing
+ * KBASEP_DEBUG_ASSERT_OUT() - (Private) system printing
* function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
* @trace: location in the code from where the message is printed
* @function: function from where the message is printed
@@ -125,7 +125,7 @@ struct kbasep_debug_assert_cb {
#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
/**
- * KBASE_DEBUG_CODE( X ) - Executes the code inside the macro only in debug mode
+ * KBASE_DEBUG_CODE() - Executes the code inside the macro only in debug mode
* @X: Code to compile only in debug mode.
*/
#ifdef CONFIG_MALI_DEBUG
diff --git a/mali_kbase/mali_kbase_debug_job_fault.c b/mali_kbase/mali_kbase_debug_job_fault.c
index 4f021b3..d6518b4 100644
--- a/mali_kbase/mali_kbase_debug_job_fault.c
+++ b/mali_kbase/mali_kbase_debug_job_fault.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -87,8 +87,7 @@ static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
static int wait_for_job_fault(struct kbase_device *kbdev)
{
-#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \
- KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq,
kbase_is_job_fault_event_pending(kbdev),
msecs_to_jiffies(2000));
diff --git a/mali_kbase/mali_kbase_debug_mem_zones.c b/mali_kbase/mali_kbase_debug_mem_zones.c
new file mode 100644
index 0000000..1f8db32
--- /dev/null
+++ b/mali_kbase/mali_kbase_debug_mem_zones.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Debugfs interface to dump information about GPU_VA memory zones
+ */
+
+#include "mali_kbase_debug_mem_zones.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+/**
+ * debug_mem_zones_show - Show information about GPU_VA memory zones
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to get the contents of the @c mem_zones debugfs file.
+ * This lists the start address and size (in pages) of each initialized memory
+ * zone within GPU_VA memory.
+ *
+ * Return:
+ * 0 if successfully prints data in debugfs entry file
+ * -1 if it encountered an error
+ */
+static int debug_mem_zones_show(struct seq_file *sfile, void *data)
+{
+ struct kbase_context *const kctx = sfile->private;
+ size_t i;
+
+ const char *zone_names[KBASE_REG_ZONE_MAX] = {
+ "SAME_VA",
+ "CUSTOM_VA",
+ "EXEC_VA"
+#if MALI_USE_CSF
+ ,
+ "MCU_SHARED_VA",
+ "EXEC_FIXED_VA",
+ "FIXED_VA"
+#endif
+ };
+
+ kbase_gpu_vm_lock(kctx);
+
+ for (i = 0; i < KBASE_REG_ZONE_MAX; i++) {
+ struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i];
+
+ if (reg_zone->base_pfn) {
+ seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i,
+ reg_zone->base_pfn, reg_zone->va_size_pages);
+ }
+ }
+
+ kbase_gpu_vm_unlock(kctx);
+ return 0;
+}
+
+/*
+ * File operations related to debugfs entry for mem_zones
+ */
+static int debug_mem_zones_open(struct inode *in, struct file *file)
+{
+ return single_open(file, debug_mem_zones_show, in->i_private);
+}
+
+static const struct file_operations kbase_debug_mem_zones_fops = {
+ .owner = THIS_MODULE,
+ .open = debug_mem_zones_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/*
+ * Initialize debugfs entry for mem_zones
+ */
+void kbase_debug_mem_zones_init(struct kbase_context *const kctx)
+{
+ /* Caller already ensures this, but we keep the pattern for
+ * maintenance safety.
+ */
+ if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+ return;
+
+ debugfs_create_file("mem_zones", 0400, kctx->kctx_dentry, kctx,
+ &kbase_debug_mem_zones_fops);
+}
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_debug_mem_zones_init(struct kbase_context *const kctx)
+{
+}
+#endif
diff --git a/mali_kbase/mali_kbase_debug_mem_zones.h b/mali_kbase/mali_kbase_debug_mem_zones.h
new file mode 100644
index 0000000..acf349b
--- /dev/null
+++ b/mali_kbase/mali_kbase_debug_mem_zones.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DEBUG_MEM_ZONES_H
+#define _KBASE_DEBUG_MEM_ZONES_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_debug_mem_zones_init() - Initialize the mem_zones sysfs file
+ * @kctx: Pointer to kernel base context
+ *
+ * This function creates a "mem_zones" file which can be used to determine the
+ * address ranges of GPU memory zones, in the GPU Virtual-Address space.
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_zones_init(struct kbase_context *kctx);
+
+#endif
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index df373cb..6c4e3e8 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -978,11 +978,8 @@ struct kbase_process {
* @total_gpu_pages for both native and dma-buf imported
* allocations.
* @job_done_worker: Worker for job_done work.
- * @job_done_worker_thread: Thread for job_done work.
* @event_worker: Worker for event work.
- * @event_worker_thread: Thread for event work.
* @apc.worker: Worker for async power control work.
- * @apc.thread: Thread for async power control work.
* @apc.power_on_work: Work struct for powering on the GPU.
* @apc.power_off_work: Work struct for powering off the GPU.
* @apc.end_ts: The latest end timestamp to power off the GPU.
@@ -1189,11 +1186,8 @@ struct kbase_device {
#endif
bool poweroff_pending;
-#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
bool infinite_cache_active_default;
-#else
- u32 infinite_cache_active_default;
-#endif
+
struct kbase_mem_pool_group_config mem_pool_defaults;
u32 current_gpu_coherency_mode;
@@ -1242,9 +1236,7 @@ struct kbase_device {
struct kbasep_js_device_data js_data;
struct kthread_worker job_done_worker;
- struct task_struct *job_done_worker_thread;
struct kthread_worker event_worker;
- struct task_struct *event_worker_thread;
/* See KBASE_JS_*_PRIORITY_MODE for details. */
u32 js_ctx_scheduling_mode;
@@ -1260,7 +1252,6 @@ struct kbase_device {
struct {
struct kthread_worker worker;
- struct task_struct *thread;
struct kthread_work power_on_work;
struct kthread_work power_off_work;
ktime_t end_ts;
@@ -2042,5 +2033,7 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000
/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
#define KBASE_AS_INACTIVE_MAX_LOOPS 100000
+/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
+#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000
#endif /* _KBASE_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c
index c4129ff..ca3863f 100644
--- a/mali_kbase/mali_kbase_dma_fence.c
+++ b/mali_kbase/mali_kbase_dma_fence.c
@@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
/* Wait was cancelled - zap the atom */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
- if (jd_done_nolock(katom, true))
+ if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
}
@@ -193,10 +193,10 @@ kbase_dma_fence_work(struct work_struct *pwork)
kbase_fence_free_callbacks(katom);
/*
* Queue atom on GPU, unless it has already completed due to a failing
- * dependency. Run jd_done_nolock() on the katom if it is completed.
+ * dependency. Run kbase_jd_done_nolock() on the katom if it is completed.
*/
if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
- jd_done_nolock(katom, true);
+ kbase_jd_done_nolock(katom, true);
else
kbase_jd_dep_clear_locked(katom);
diff --git a/mali_kbase/mali_kbase_dma_fence.h b/mali_kbase/mali_kbase_dma_fence.h
index be69118..53effbc 100644
--- a/mali_kbase/mali_kbase_dma_fence.h
+++ b/mali_kbase/mali_kbase_dma_fence.h
@@ -88,7 +88,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
struct kbase_dma_fence_resv_info *info);
/**
- * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * kbase_dma_fence_cancel_all_atoms() - Cancel all dma-fences blocked atoms on kctx
* @kctx: Pointer to kbase context
*
* This function will cancel and clean up all katoms on @kctx that is waiting
@@ -105,7 +105,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
* This function cancels all dma-buf fence callbacks on @katom, but does not
* cancel the katom itself.
*
- * The caller is responsible for ensuring that jd_done_nolock is called on
+ * The caller is responsible for ensuring that kbase_jd_done_nolock is called on
* @katom.
*
* Locking: jctx.lock must be held when calling this function.
diff --git a/mali_kbase/mali_kbase_dvfs_debugfs.c b/mali_kbase/mali_kbase_dvfs_debugfs.c
index 1e584de..e4cb716 100644
--- a/mali_kbase/mali_kbase_dvfs_debugfs.c
+++ b/mali_kbase/mali_kbase_dvfs_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -68,11 +68,7 @@ static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = {
void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev)
{
struct dentry *file;
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
return;
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index 2842280..0f9b73a 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -104,7 +104,7 @@ static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
#if defined(CONFIG_SYNC_FILE)
/**
- * kbase_fence_out_remove() - Removes the input fence from atom
+ * kbase_fence_in_remove() - Removes the input fence from atom
* @katom: Atom to remove input fence for
*
* This will also release the reference to this fence which the atom keeps
@@ -272,6 +272,16 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
#endif /* !MALI_USE_CSF */
/**
+ * kbase_fence_get() - Retrieve fence for a KCPUQ fence command.
+ * @fence_info: KCPUQ fence command
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no fence for KCPUQ fence command
+ */
+#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence)
+
+/**
* kbase_fence_put() - Releases a reference to a fence
* @fence: Fence to release reference for.
*/
diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c
index 14ddf03..be14155 100644
--- a/mali_kbase/mali_kbase_fence_ops.c
+++ b/mali_kbase/mali_kbase_fence_ops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -69,9 +69,11 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
}
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+extern const struct fence_ops kbase_fence_ops; /* silence checker warning */
const struct fence_ops kbase_fence_ops = {
.wait = fence_default_wait,
#else
+extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */
const struct dma_fence_ops kbase_fence_ops = {
.wait = dma_fence_default_wait,
#endif
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index 5e490b6..c5ed338 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -198,7 +198,6 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
gpu_props->raw_props.mem_features = regdump.mem_features;
gpu_props->raw_props.mmu_features = regdump.mmu_features;
gpu_props->raw_props.l2_features = regdump.l2_features;
- gpu_props->raw_props.core_features = regdump.core_features;
gpu_props->raw_props.as_present = regdump.as_present;
gpu_props->raw_props.js_present = regdump.js_present;
@@ -326,9 +325,6 @@ static void kbase_gpuprops_calculate_props(
totalram_pages() << PAGE_SHIFT;
#endif
- gpu_props->core_props.num_exec_engines =
- KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
-
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
@@ -507,6 +503,21 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev)
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
gpu_props->thread_props.max_thread_group_split = 0;
+ /*
+ * The CORE_FEATURES register has different meanings depending on GPU.
+ * On tGOx, bits[3:0] encode num_exec_engines.
+ * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed,
+ * instead.
+ * GPUs like tTIx have additional fields like LSC_SIZE that are
+ * otherwise reserved/RAZ on older GPUs.
+ */
+ gpu_props->raw_props.core_features = regdump.core_features;
+
+#if !MALI_USE_CSF
+ gpu_props->core_props.num_exec_engines =
+ KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
+#endif
+
return err;
}
@@ -694,94 +705,102 @@ static struct {
#define PROP(name, member) \
{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
sizeof(((struct base_gpu_props *)0)->member)}
- PROP(PRODUCT_ID, core_props.product_id),
- PROP(VERSION_STATUS, core_props.version_status),
- PROP(MINOR_REVISION, core_props.minor_revision),
- PROP(MAJOR_REVISION, core_props.major_revision),
- PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
- PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
- PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
- PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
- PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
- PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]),
- PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
- PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
-
- PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
- PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
- PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
-
- PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
- PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
-
- PROP(MAX_THREADS, thread_props.max_threads),
- PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
- PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
- PROP(MAX_REGISTERS, thread_props.max_registers),
- PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
- PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
- PROP(IMPL_TECH, thread_props.impl_tech),
- PROP(TLS_ALLOC, thread_props.tls_alloc),
-
- PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
- PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
- PROP(RAW_L2_PRESENT, raw_props.l2_present),
- PROP(RAW_STACK_PRESENT, raw_props.stack_present),
- PROP(RAW_L2_FEATURES, raw_props.l2_features),
- PROP(RAW_CORE_FEATURES, raw_props.core_features),
- PROP(RAW_MEM_FEATURES, raw_props.mem_features),
- PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
- PROP(RAW_AS_PRESENT, raw_props.as_present),
- PROP(RAW_JS_PRESENT, raw_props.js_present),
- PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
- PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
- PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
- PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
- PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
- PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
- PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
- PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
- PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
- PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
- PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
- PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
- PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
- PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
- PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
- PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
- PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
- PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
- PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
- PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
- PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]),
- PROP(RAW_GPU_ID, raw_props.gpu_id),
- PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
- PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
- raw_props.thread_max_workgroup_size),
+#define BACKWARDS_COMPAT_PROP(name, type) \
+ { \
+ KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \
+ }
+ PROP(PRODUCT_ID, core_props.product_id),
+ PROP(VERSION_STATUS, core_props.version_status),
+ PROP(MINOR_REVISION, core_props.minor_revision),
+ PROP(MAJOR_REVISION, core_props.major_revision),
+ PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
+ PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
+ PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
+ PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
+ PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
+ PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]),
+ PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
+
+#if MALI_USE_CSF
+ BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8),
+#else
+ PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
+#endif
+
+ PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
+ PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
+ PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
+
+ PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
+ PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
+
+ PROP(MAX_THREADS, thread_props.max_threads),
+ PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
+ PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
+ PROP(MAX_REGISTERS, thread_props.max_registers),
+ PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
+ PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
+ PROP(IMPL_TECH, thread_props.impl_tech),
+ PROP(TLS_ALLOC, thread_props.tls_alloc),
+
+ PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
+ PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
+ PROP(RAW_L2_PRESENT, raw_props.l2_present),
+ PROP(RAW_STACK_PRESENT, raw_props.stack_present),
+ PROP(RAW_L2_FEATURES, raw_props.l2_features),
+ PROP(RAW_CORE_FEATURES, raw_props.core_features),
+ PROP(RAW_MEM_FEATURES, raw_props.mem_features),
+ PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
+ PROP(RAW_AS_PRESENT, raw_props.as_present),
+ PROP(RAW_JS_PRESENT, raw_props.js_present),
+ PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
+ PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
+ PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
+ PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
+ PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
+ PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
+ PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
+ PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
+ PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
+ PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
+ PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
+ PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
+ PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
+ PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
+ PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
+ PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
+ PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
+ PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
+ PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
+ PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
+ PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]),
+ PROP(RAW_GPU_ID, raw_props.gpu_id),
+ PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
+ PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size),
PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
- PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
- PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
- PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc),
- PROP(RAW_GPU_FEATURES, raw_props.gpu_features),
- PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
- PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
- PROP(COHERENCY_COHERENCY, coherency_info.coherency),
- PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
- PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
- PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
- PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
- PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
- PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
- PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
- PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
- PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
- PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
- PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
- PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
- PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
- PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
- PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
- PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
+ PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
+ PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
+ PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc),
+ PROP(RAW_GPU_FEATURES, raw_props.gpu_features),
+ PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
+ PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
+ PROP(COHERENCY_COHERENCY, coherency_info.coherency),
+ PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
+ PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
+ PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
+ PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
+ PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
+ PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
+ PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
+ PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
+ PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
+ PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
+ PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
+ PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
+ PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
+ PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
+ PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
+ PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
#undef PROP
};
@@ -818,7 +837,14 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
for (i = 0; i < count; i++) {
u32 type = gpu_property_mapping[i].type;
u8 type_size;
- void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
+ const size_t offset = gpu_property_mapping[i].offset;
+ const u64 dummy_backwards_compat_value = (u64)0;
+ const void *field;
+
+ if (likely(offset < sizeof(struct base_gpu_props)))
+ field = ((const u8 *)props) + offset;
+ else
+ field = &dummy_backwards_compat_value;
switch (gpu_property_mapping[i].size) {
case 1:
@@ -844,16 +870,16 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
switch (type_size) {
case KBASE_GPUPROP_VALUE_SIZE_U8:
- WRITE_U8(*((u8 *)field));
+ WRITE_U8(*((const u8 *)field));
break;
case KBASE_GPUPROP_VALUE_SIZE_U16:
- WRITE_U16(*((u16 *)field));
+ WRITE_U16(*((const u16 *)field));
break;
case KBASE_GPUPROP_VALUE_SIZE_U32:
- WRITE_U32(*((u32 *)field));
+ WRITE_U32(*((const u32 *)field));
break;
case KBASE_GPUPROP_VALUE_SIZE_U64:
- WRITE_U64(*((u64 *)field));
+ WRITE_U64(*((const u64 *)field));
break;
default: /* Cannot be reached */
WARN_ON(1);
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index 75e4aaf..f205617 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -137,8 +137,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
static const struct base_hw_product base_hw_products[] = {
{ GPU_ID2_PRODUCT_TMIX,
- { { GPU_ID2_VERSION_MAKE(0, 0, 1),
- base_hw_issues_tMIx_r0p0_05dev0 },
+ { { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tMIx_r0p0_05dev0 },
{ GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 },
{ GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 },
{ U32_MAX /* sentinel value */, NULL } } },
@@ -233,11 +232,15 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_PRODUCT_TTUX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
+ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p1 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_LTUX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
+ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p1 },
{ U32_MAX, NULL } } },
};
@@ -294,12 +297,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
*/
issues = fallback_issues;
-#if MALI_CUSTOMER_RELEASE
dev_warn(kbdev->dev,
"GPU hardware issue table may need updating:\n"
-#else
- dev_info(kbdev->dev,
-#endif
"r%dp%d status %d is unknown; treating as r%dp%d status %d",
(gpu_id & GPU_ID2_VERSION_MAJOR) >>
GPU_ID2_VERSION_MAJOR_SHIFT,
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index 95d7624..124a6d6 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -236,7 +236,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
/**
- * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * kbase_job_check_leave_disjoint - potentially leave disjoint state
* @kbdev: kbase device
* @target_katom: atom which is finishing
*
diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h
index 1c153c4..effb2ff 100644
--- a/mali_kbase/mali_kbase_hwaccess_pm.h
+++ b/mali_kbase/mali_kbase_hwaccess_pm.h
@@ -209,7 +209,7 @@ int kbase_pm_list_policies(struct kbase_device *kbdev,
const struct kbase_pm_policy * const **list);
/**
- * kbase_protected_most_enable - Enable protected mode
+ * kbase_pm_protected_mode_enable() - Enable protected mode
*
* @kbdev: Address of the instance of a GPU platform device.
*
@@ -218,7 +218,7 @@ int kbase_pm_list_policies(struct kbase_device *kbdev,
int kbase_pm_protected_mode_enable(struct kbase_device *kbdev);
/**
- * kbase_protected_mode_disable - Disable protected mode
+ * kbase_pm_protected_mode_disable() - Disable protected mode
*
* @kbdev: Address of the instance of a GPU platform device.
*
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
index c42f2a0..99e8be7 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
@@ -36,8 +36,13 @@
#define BASE_MAX_NR_CLOCKS_REGULATORS 2
#endif
+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/* Backend watch dog timer interval in milliseconds: 18 seconds. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000)
+#else
/* Backend watch dog timer interval in milliseconds: 1 second. */
#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000)
+#endif /* IS_FPGA && !NO_MALI */
/**
* enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
@@ -168,23 +173,29 @@ struct kbase_hwcnt_backend_csf_info {
/**
* struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
* information.
+ * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are
+ * sub-categorized into 4 classes: front-end, tiler, memory system, and shader.
+ * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt.
* @fe_cnt: Front end block count.
* @tiler_cnt: Tiler block count.
- * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count.
+ * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count.
* @shader_cnt: Shader Core block count.
- * @block_cnt: Total block count (sum of all other block counts).
+ * @fw_block_cnt: Total number of firmware counters blocks.
+ * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt).
* @shader_avail_mask: Bitmap of all shader cores in the system.
* @enable_mask_offset: Offset in array elements of enable mask in each block
* starting from the beginning of block.
- * @headers_per_block: Header size per block.
- * @counters_per_block: Counters size per block.
- * @values_per_block: Total size per block.
+ * @headers_per_block: For any block, the number of counters designated as block's header.
+ * @counters_per_block: For any block, the number of counters designated as block's payload.
+ * @values_per_block: For any block, the number of counters in total (header + payload).
*/
struct kbase_hwcnt_csf_physical_layout {
+ u8 hw_block_cnt;
u8 fe_cnt;
u8 tiler_cnt;
u8 mmu_l2_cnt;
u8 shader_cnt;
+ u8 fw_block_cnt;
u8 block_cnt;
u64 shader_avail_mask;
size_t enable_mask_offset;
@@ -361,29 +372,38 @@ static void kbasep_hwcnt_backend_csf_init_layout(
const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
struct kbase_hwcnt_csf_physical_layout *phys_layout)
{
- u8 shader_core_cnt;
+ size_t shader_core_cnt;
size_t values_per_block;
+ size_t fw_blocks_count;
+ size_t hw_blocks_count;
WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
shader_core_cnt = fls64(prfcnt_info->core_mask);
- values_per_block =
- prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+ values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+ fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
+ hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
+
+ /* The number of hardware counters reported by the GPU matches the legacy guess-work we
+ * have done in the past
+ */
+ WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT +
+ KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ prfcnt_info->l2_count + shader_core_cnt);
*phys_layout = (struct kbase_hwcnt_csf_physical_layout){
.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
.mmu_l2_cnt = prfcnt_info->l2_count,
.shader_cnt = shader_core_cnt,
- .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
- KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
- prfcnt_info->l2_count + shader_core_cnt,
+ .fw_block_cnt = fw_blocks_count,
+ .hw_block_cnt = hw_blocks_count,
+ .block_cnt = fw_blocks_count + hw_blocks_count,
.shader_avail_mask = prfcnt_info->core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = values_per_block,
- .counters_per_block =
- values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
};
}
@@ -458,7 +478,15 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
u64 *acc_block = accum_buf;
const size_t values_per_block = phys_layout->values_per_block;
- for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+ /* Performance counter blocks for firmware are stored before blocks for hardware.
+ * We skip over the firmware's performance counter blocks (counters dumping is not
+ * supported for firmware blocks, only hardware ones).
+ */
+ old_block += values_per_block * phys_layout->fw_block_cnt;
+ new_block += values_per_block * phys_layout->fw_block_cnt;
+
+ for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt;
+ block_idx++) {
const u32 old_enable_mask =
old_block[phys_layout->enable_mask_offset];
const u32 new_enable_mask =
@@ -546,8 +574,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(new_block !=
new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
- WARN_ON(acc_block !=
- accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) -
+ (values_per_block * phys_layout->fw_block_cnt));
(void)dump_bytes;
}
@@ -562,7 +590,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
u32 *old_sample_buf = backend_csf->old_sample_buf;
- u32 *new_sample_buf;
+ u32 *new_sample_buf = old_sample_buf;
if (extract_index_to_start == insert_index_to_stop)
/* No samples to accumulate. Early out. */
@@ -1434,7 +1462,6 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
*out_backend = backend_csf;
return 0;
- destroy_workqueue(backend_csf->hwc_dump_workq);
err_alloc_workqueue:
backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
backend_csf->ring_buf);
@@ -1938,7 +1965,6 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable(
int kbase_hwcnt_backend_csf_metadata_init(
struct kbase_hwcnt_backend_interface *iface)
{
- int errcode;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_gpu_info gpu_info;
@@ -1964,19 +1990,8 @@ int kbase_hwcnt_backend_csf_metadata_init(
gpu_info.prfcnt_values_per_block =
csf_info->prfcnt_info.prfcnt_block_size /
KBASE_HWCNT_VALUE_HW_BYTES;
- errcode = kbase_hwcnt_csf_metadata_create(
- &gpu_info, csf_info->counter_set, &csf_info->metadata);
- if (errcode)
- return errcode;
-
- /*
- * Dump abstraction size should be exactly twice the size and layout as
- * the physical dump size since 64-bit per value used in metadata.
- */
- WARN_ON(csf_info->prfcnt_info.dump_bytes * 2 !=
- csf_info->metadata->dump_buf_bytes);
-
- return 0;
+ return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set,
+ &csf_info->metadata);
}
void kbase_hwcnt_backend_csf_metadata_term(
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
index 9c4fef5..24b26c2 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -55,8 +55,12 @@ struct kbase_hwcnt_backend_csf_if_enable {
/**
* struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter
* information.
+ * @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware
+ * counters are sub-divided into 4 classes: front-end, shader, tiler, and
+ * memory system (l2 cache + MMU).
+ * @prfcnt_fw_size: Total length in bytes of all the firmware counters data.
* @dump_bytes: Bytes of GPU memory required to perform a performance
- * counter dump.
+ * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size.
* @prfcnt_block_size: Bytes of each performance counter block.
* @l2_count: The MMU L2 cache count.
* @core_mask: Shader core mask.
@@ -65,6 +69,8 @@ struct kbase_hwcnt_backend_csf_if_enable {
* is taken.
*/
struct kbase_hwcnt_backend_csf_if_prfcnt_info {
+ size_t prfcnt_hw_size;
+ size_t prfcnt_fw_size;
size_t dump_bytes;
size_t prfcnt_block_size;
size_t l2_count;
@@ -273,8 +279,6 @@ typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(
* @timestamp_ns: Function ptr to get the current CSF interface
* timestamp.
* @dump_enable: Function ptr to enable dumping.
- * @dump_enable_nolock: Function ptr to enable dumping while the
- * backend-specific spinlock is already held.
* @dump_disable: Function ptr to disable dumping.
* @dump_request: Function ptr to request a dump.
* @get_indexes: Function ptr to get extract and insert indexes of the
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
index 15ffbfa..bc1d719 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -221,30 +221,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
{
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- size_t dummy_model_blk_count;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
- prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
- prfcnt_info->core_mask =
- (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
- /* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */
- dummy_model_blk_count =
- 2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask);
- prfcnt_info->dump_bytes =
- dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE;
- prfcnt_info->prfcnt_block_size =
- KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
- KBASE_HWCNT_VALUE_HW_BYTES;
- prfcnt_info->clk_cnt = 1;
- prfcnt_info->clearing_samples = true;
+ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
+ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
+ .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
+ .prfcnt_hw_size =
+ KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
+ .prfcnt_fw_size =
+ KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
+ .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE,
+ .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE,
+ .clk_cnt = 1,
+ .clearing_samples = true,
+ };
+
fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
#else
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
u32 prfcnt_size;
- u32 prfcnt_hw_size = 0;
- u32 prfcnt_fw_size = 0;
+ u32 prfcnt_hw_size;
+ u32 prfcnt_fw_size;
u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
KBASE_HWCNT_VALUE_HW_BYTES;
@@ -254,8 +253,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
- prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
- prfcnt_fw_size = (prfcnt_size >> 16) << 8;
+ prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
+ prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
/* Read the block size if the GPU has the register PRFCNT_FEATURES
@@ -269,14 +268,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
<< 8;
}
- prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
- prfcnt_info->prfcnt_block_size = prfcnt_block_size;
- prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
- prfcnt_info->core_mask =
- kbdev->gpu_props.props.coherency_info.group[0].core_mask;
-
- prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
- prfcnt_info->clearing_samples = true;
+ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
+ .prfcnt_hw_size = prfcnt_hw_size,
+ .prfcnt_fw_size = prfcnt_fw_size,
+ .dump_bytes = fw_ctx->buf_bytes,
+ .prfcnt_block_size = prfcnt_block_size,
+ .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
+ .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
+ .clk_cnt = fw_ctx->clk_cnt,
+ .clearing_samples = true,
+ };
/* Block size must be multiple of counter size. */
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
@@ -368,7 +369,11 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
kfree(page_list);
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr;
+#else
fw_ring_buf->gpu_dump_base = gpu_va_base;
+#endif /* CONFIG_MALI_NO_MALI */
fw_ring_buf->cpu_dump_base = cpu_addr;
fw_ring_buf->phys = phys;
fw_ring_buf->num_pages = num_pages;
@@ -379,12 +384,6 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
*out_ring_buf =
(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- /* The dummy model needs the CPU mapping. */
- gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev,
- phys, num_pages);
-#endif /* CONFIG_MALI_NO_MALI */
-
return 0;
mmu_insert_failed:
@@ -422,6 +421,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
WARN_ON(!ctx);
WARN_ON(!ring_buf);
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* When using the dummy backend syncing the ring buffer is unnecessary as
+ * the ring buffer is only accessed by the CPU. It may also cause data loss
+ * due to cache invalidation so return early.
+ */
+ return;
+#endif /* CONFIG_MALI_NO_MALI */
+
/* The index arguments for this function form an inclusive, exclusive
* range.
* However, when masking back to the available buffers we will make this
@@ -500,10 +507,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
if (fw_ring_buf->phys) {
u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
- WARN_ON(kbase_mmu_teardown_pages(
- fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
- gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages,
- MCU_AS_NR));
+ WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
+ gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
+ fw_ring_buf->num_pages, MCU_AS_NR));
vunmap(fw_ring_buf->cpu_dump_base);
@@ -540,8 +546,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
global_iface = &kbdev->csf.global_iface;
/* Configure */
- prfcnt_config = fw_ring_buf->buf_count;
- prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
+ prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count);
+ prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
/* Configure the ring buffer base address */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
index e418212..98019e7 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -414,7 +414,12 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
enable.tiler_bm = phys_enable_map.tiler_bm;
enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
enable.counter_set = phys_counter_set;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* The dummy model needs the CPU mapping. */
+ enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va;
+#else
enable.dump_buffer = backend_jm->gpu_dump_va;
+#endif /* CONFIG_MALI_NO_MALI */
enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
@@ -733,9 +738,6 @@ static int kbasep_hwcnt_backend_jm_create(
int errcode;
struct kbase_device *kbdev;
struct kbase_hwcnt_backend_jm *backend = NULL;
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- size_t page_count;
-#endif
WARN_ON(!info);
WARN_ON(!out_backend);
@@ -775,14 +777,6 @@ static int kbasep_hwcnt_backend_jm_create(
kbase_ccswe_init(&backend->ccswe_shader_cores);
backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- /* The dummy model needs the CPU mapping. */
- page_count = PFN_UP(info->dump_bytes);
- gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va, kbdev,
- backend->vmap->cpu_pages,
- page_count);
-#endif /* CONFIG_MALI_NO_MALI */
-
*out_backend = backend;
return 0;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c
index cdf3cd9..3d786ca 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,13 @@
#include <mali_kbase_hwcnt_backend.h>
#include <mali_kbase_hwcnt_watchdog_if.h>
+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/* Backend watch dog timer interval in milliseconds: 18 seconds. */
+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000;
+#else
+/* Backend watch dog timer interval in milliseconds: 1 second. */
static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000;
+#endif /* IS_FPGA && !NO_MALI */
/*
* IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY on dump_request.
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 752d096..5f5c36f 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,7 +22,6 @@
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_types.h"
-#include <linux/bug.h>
#include <linux/err.h>
/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
@@ -44,13 +43,13 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
if (is_csf)
*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
else
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
break;
case KBASE_HWCNT_SET_TERTIARY:
if (is_csf)
*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
else
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
break;
default:
WARN_ON(true);
@@ -66,7 +65,7 @@ static void kbasep_get_tiler_block_type(u64 *dst,
break;
case KBASE_HWCNT_SET_SECONDARY:
case KBASE_HWCNT_SET_TERTIARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
break;
default:
WARN_ON(true);
@@ -87,7 +86,7 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
if (is_csf)
*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
else
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
break;
default:
WARN_ON(true);
@@ -105,7 +104,7 @@ static void kbasep_get_memsys_block_type(u64 *dst,
*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
break;
case KBASE_HWCNT_SET_TERTIARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
break;
default:
WARN_ON(true);
@@ -320,7 +319,8 @@ static bool is_block_type_shader(
if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
- blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3)
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED)
is_shader = true;
return is_shader;
@@ -335,7 +335,8 @@ static bool is_block_type_l2_cache(
switch (grp_type) {
case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
- blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2)
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
is_l2_cache = true;
break;
default:
@@ -383,6 +384,8 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const bool is_l2_cache = is_block_type_l2_cache(
kbase_hwcnt_metadata_group_type(metadata, grp),
blk_type);
+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
bool hw_res_available = true;
/*
@@ -414,8 +417,23 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
+ bool blk_powered;
+
+ if (!is_shader_core) {
+ /* Under the current PM system, counters will
+ * only be enabled after all non shader core
+ * blocks are powered up.
+ */
+ blk_powered = true;
+ } else {
+ /* Check the PM core mask to see if the shader
+ * core is powered up.
+ */
+ blk_powered = core_mask & 1;
+ }
- if ((!is_shader_core || (core_mask & 1)) && hw_res_available) {
+ if (blk_powered && !is_undefined && hw_res_available) {
+ /* Only powered and defined blocks have valid data. */
if (accumulate) {
kbase_hwcnt_dump_buffer_block_accumulate(
dst_blk, src_blk, hdr_cnt,
@@ -425,9 +443,18 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
dst_blk, src_blk,
(hdr_cnt + ctr_cnt));
}
- } else if (!accumulate) {
- kbase_hwcnt_dump_buffer_block_zero(
- dst_blk, (hdr_cnt + ctr_cnt));
+ } else {
+ /* Even though the block might be undefined, the
+ * user has enabled counter collection for it.
+ * We should not propagate garbage data.
+ */
+ if (accumulate) {
+ /* No-op to preserve existing values */
+ } else {
+ /* src is garbage, so zero the dst */
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk,
+ (hdr_cnt + ctr_cnt));
+ }
}
}
@@ -462,6 +489,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const size_t ctr_cnt =
kbase_hwcnt_metadata_block_counters_count(metadata, grp,
blk);
+ const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
/*
* Skip block if no values in the destination block are enabled.
@@ -472,12 +502,26 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
dst, grp, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
- if (accumulate) {
- kbase_hwcnt_dump_buffer_block_accumulate(
- dst_blk, src_blk, hdr_cnt, ctr_cnt);
+ if (!is_undefined) {
+ if (accumulate) {
+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
+ hdr_cnt, ctr_cnt);
+ } else {
+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
+ (hdr_cnt + ctr_cnt));
+ }
} else {
- kbase_hwcnt_dump_buffer_block_copy(
- dst_blk, src_blk, (hdr_cnt + ctr_cnt));
+ /* Even though the block might be undefined, the
+ * user has enabled counter collection for it.
+ * We should not propagate garbage data.
+ */
+ if (accumulate) {
+ /* No-op to preserve existing values */
+ } else {
+ /* src is garbage, so zero the dst */
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk,
+ (hdr_cnt + ctr_cnt));
+ }
}
}
@@ -564,7 +608,10 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
break;
switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
/* Nothing to do in this case. */
break;
case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
@@ -664,7 +711,10 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
break;
switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
/* Nothing to do in this case. */
break;
case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
index 648f85f..f890d45 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@
#ifndef _KBASE_HWCNT_GPU_H_
#define _KBASE_HWCNT_GPU_H_
+#include <linux/bug.h>
#include <linux/types.h>
struct kbase_device;
@@ -60,33 +61,40 @@ enum kbase_hwcnt_gpu_group_type {
/**
* enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
* used to identify metadata blocks.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a
- * counter set that a block
- * doesn't support is used).
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager
* or CSF HW).
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job
* manager or CSF HW).
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job
* manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block
+ * (e.g. if a counter set that
+ * a block doesn't support is
+ * used).
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block.
*/
enum kbase_hwcnt_gpu_v5_block_type {
- KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED,
};
/**
@@ -188,6 +196,27 @@ struct kbase_hwcnt_curr_config {
};
/**
+ * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined.
+ *
+ * @grp_type: Hardware counter group type.
+ * @blk_type: Hardware counter block type.
+ *
+ * Return: true if the block type is undefined, else false.
+ */
+static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
+ const uint64_t blk_type)
+{
+ /* Warn on unknown group type */
+ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
+ return false;
+
+ return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED);
+}
+
+/**
* kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
* JM GPUs.
* @info: Non-NULL pointer to info struct.
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c
index e2caa1c..2a1cde7 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -161,7 +161,7 @@ void kbase_hwcnt_dump_buffer_narrow_free(
return;
kfree(dump_buf_narrow->dump_buf);
- *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ 0 };
+ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL };
}
int kbase_hwcnt_dump_buffer_narrow_array_alloc(
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 79c8ebb..8667819 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -82,7 +82,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
* Returns whether the JS needs a reschedule.
*
* Note that the caller must also check the atom status and
- * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call kbase_jd_done_nolock
*/
static bool jd_run_atom(struct kbase_jd_atom *katom)
{
@@ -148,7 +148,7 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
/* The atom has already finished */
- resched |= jd_done_nolock(katom, true);
+ resched |= kbase_jd_done_nolock(katom, true);
}
if (resched)
@@ -778,7 +778,7 @@ static void jd_mark_simple_gfx_frame_atoms(struct kbase_jd_atom *katom)
*
* The caller must hold the kbase_jd_context.lock.
*/
-bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
+bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
{
struct kbase_context *kctx = katom->kctx;
struct list_head completed_jobs;
@@ -786,6 +786,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
bool need_to_try_schedule_context = false;
int i;
+ lockdep_assert_held(&kctx->jctx.lock);
+
KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(kctx->kbdev, katom);
INIT_LIST_HEAD(&completed_jobs);
@@ -918,7 +920,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
return need_to_try_schedule_context;
}
-KBASE_EXPORT_TEST_API(jd_done_nolock);
+KBASE_EXPORT_TEST_API(kbase_jd_done_nolock);
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
enum {
@@ -1026,7 +1028,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
katom->jobslot = user_atom->jobslot;
katom->seq_nr = user_atom->seq_nr;
katom->atom_flags = 0;
- katom->retry_count = 0;
katom->need_cache_flush_cores_retained = 0;
katom->pre_dep = NULL;
katom->post_dep = NULL;
@@ -1087,7 +1088,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
* dependencies.
*/
jd_trace_atom_submit(kctx, katom, NULL);
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
}
}
@@ -1151,7 +1152,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (err >= 0)
kbase_finish_soft_job(katom);
}
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
katom->will_fail_event_code = katom->event_code;
@@ -1177,7 +1178,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
/* Create a new atom. */
jd_trace_atom_submit(kctx, katom, &katom->sched_priority);
-#if !MALI_INCREMENTAL_RENDERING
+#if !MALI_INCREMENTAL_RENDERING_JM
/* Reject atoms for incremental rendering if not supported */
if (katom->core_req &
(BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) {
@@ -1185,9 +1186,9 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
"Rejecting atom with unsupported core_req 0x%x\n",
katom->core_req);
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
-#endif /* !MALI_INCREMENTAL_RENDERING */
+#endif /* !MALI_INCREMENTAL_RENDERING_JM */
if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) {
WARN_ON(katom->jc != 0);
@@ -1199,7 +1200,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
*/
dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
/* Reject atoms with an invalid device_nr */
@@ -1209,7 +1210,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
"Rejecting atom with invalid device_nr %d\n",
katom->device_nr);
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
/* Reject atoms with invalid core requirements */
@@ -1219,7 +1220,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
"Rejecting atom with invalid core requirements\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
/* Reject soft-job atom of certain types from accessing external resources */
@@ -1230,7 +1231,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
dev_err(kctx->kbdev->dev,
"Rejecting soft-job atom accessing external resources\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
@@ -1238,7 +1239,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
/* setup failed (no access, bad resource, unknown resource types, etc.) */
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
}
@@ -1249,7 +1250,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
* JIT IDs - atom is invalid.
*/
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
@@ -1263,13 +1264,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
} else {
/* Soft-job */
if (kbase_prepare_soft_job(katom) != 0) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
}
@@ -1293,7 +1294,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
return false;
}
@@ -1323,7 +1324,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
}
/* This is a pure dependency. Resolve it immediately */
- return jd_done_nolock(katom, true);
+ return kbase_jd_done_nolock(katom, true);
}
int kbase_jd_submit(struct kbase_context *kctx,
@@ -1580,8 +1581,8 @@ void kbase_jd_done_worker(struct kthread_work *data)
kbasep_js_remove_job(kbdev, kctx, katom);
rt_mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
rt_mutex_unlock(&js_devdata->queue_mutex);
- /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
- jd_done_nolock(katom, false);
+ /* kbase_jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+ kbase_jd_done_nolock(katom, false);
/* katom may have been freed now, do not use! */
@@ -1647,7 +1648,7 @@ void kbase_jd_done_worker(struct kthread_work *data)
kbase_js_sched_all(kbdev);
if (!atomic_dec_return(&kctx->work_count)) {
- /* If worker now idle then post all events that jd_done_nolock()
+ /* If worker now idle then post all events that kbase_jd_done_nolock()
* has queued
*/
rt_mutex_lock(&jctx->lock);
@@ -1693,6 +1694,7 @@ static void jd_cancel_worker(struct kthread_work *data)
struct kbase_jd_context *jctx;
struct kbase_context *kctx;
struct kbasep_js_kctx_info *js_kctx_info;
+ bool need_to_try_schedule_context;
bool attr_state_changed;
struct kbase_device *kbdev;
@@ -1720,12 +1722,13 @@ static void jd_cancel_worker(struct kthread_work *data)
rt_mutex_lock(&jctx->lock);
- jd_done_nolock(katom, true);
+ need_to_try_schedule_context = kbase_jd_done_nolock(katom, true);
/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
* schedule the context. There's also no need for the jsctx_mutex to have been taken
* around this too.
*/
KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+ CSTD_UNUSED(need_to_try_schedule_context);
/* katom may have been freed now, do not use! */
rt_mutex_unlock(&jctx->lock);
@@ -1764,6 +1767,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
kbdev = kctx->kbdev;
KBASE_DEBUG_ASSERT(kbdev);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c
index f9b41d5..0d6230d 100644
--- a/mali_kbase/mali_kbase_jd_debugfs.c
+++ b/mali_kbase/mali_kbase_jd_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -72,9 +72,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
#endif
seq_printf(sfile,
-#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
- "Sd(%u#%u: %s) ",
-#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
"Sd(%llu#%u: %s) ",
#else
"Sd(%llu#%llu: %s) ",
@@ -93,9 +91,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
#endif
seq_printf(sfile,
-#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
- "Wd(%u#%u: %s) ",
-#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
"Wd(%llu#%u: %s) ",
#else
"Wd(%llu#%llu: %s) ",
@@ -164,7 +160,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
BASE_UK_VERSION_MINOR);
/* Print table heading */
- seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n");
+ seq_puts(sfile, " ID, Core req, St, Predeps, Start time, Additional info...\n");
atoms = kctx->jctx.atoms;
/* General atom states */
@@ -184,8 +180,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
* it is valid
*/
if (ktime_to_ns(atom->start_timestamp))
- start_timestamp = ktime_to_ns(
- ktime_sub(ktime_get(), atom->start_timestamp));
+ start_timestamp =
+ ktime_to_ns(ktime_sub(ktime_get_raw(), atom->start_timestamp));
kbasep_jd_debugfs_atom_deps(deps, atom);
@@ -230,11 +226,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
{
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
/* Caller already ensures this, but we keep the pattern for
* maintenance safety.
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
index 84efbb3..34ba196 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.c
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/version.h>
+#include <linux/version_compat_defs.h>
#include <linux/wait.h>
/* Define static_assert().
@@ -60,10 +61,6 @@
#define __static_assert(e, msg, ...) _Static_assert(e, msg)
#endif
-#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
-typedef unsigned int __poll_t;
-#endif
-
#ifndef ENOTSUP
#define ENOTSUP EOPNOTSUPP
#endif
@@ -637,11 +634,11 @@ static __poll_t reader_poll(struct file *const file,
struct reader_changes *changes;
if (unlikely(!file || !wait))
- return -EINVAL;
+ return (__poll_t)-EINVAL;
reader = file->private_data;
if (unlikely(!reader))
- return -EBADF;
+ return (__poll_t)-EBADF;
changes = &reader->changes;
@@ -666,7 +663,7 @@ static const struct file_operations file_operations = {
static const size_t kbase_kinstr_jm_readers_max = 16;
/**
- * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped
+ * kbase_kinstr_jm_release() - Invoked when the reference count is dropped
* @ref: the context reference count
*/
static void kbase_kinstr_jm_release(struct kref *const ref)
@@ -737,7 +734,7 @@ static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx,
}
/**
- * readers_del() - Deletes a reader from the list of readers
+ * kbase_kinstr_jm_readers_del() - Deletes a reader from the list of readers
* @ctx: the instrumentation context
* @reader: the reader to delete
*/
diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h
index 2c904e5..84fabac 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.h
+++ b/mali_kbase/mali_kbase_kinstr_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -71,8 +71,6 @@
#else
/* empty wrapper macros for userspace */
#define static_branch_unlikely(key) (1)
-#define KERNEL_VERSION(a, b, c) (0)
-#define LINUX_VERSION_CODE (1)
#endif /* __KERNEL__ */
/* Forward declarations */
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index afc008b..b7c8a16 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -36,6 +36,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
+#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
/* The minimum allowed interval between dumps, in nanoseconds
@@ -87,16 +88,13 @@ struct kbase_kinstr_prfcnt_sample {
/**
* struct kbase_kinstr_prfcnt_sample_array - Array of sample data.
- * @page_addr: Address of allocated pages. A single allocation is used
+ * @user_buf: Address of allocated userspace buffer. A single allocation is used
* for all Dump Buffers in the array.
- * @page_order: The allocation order of the pages, the order is on a
- * logarithmic scale.
* @sample_count: Number of allocated samples.
* @samples: Non-NULL pointer to the array of Dump Buffers.
*/
struct kbase_kinstr_prfcnt_sample_array {
- u64 page_addr;
- unsigned int page_order;
+ u8 *user_buf;
size_t sample_count;
struct kbase_kinstr_prfcnt_sample *samples;
};
@@ -229,25 +227,19 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
* Return: POLLIN if data can be read without blocking, 0 if data can not be
* read without blocking, else error code.
*/
-#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
-static unsigned int
-kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
- struct poll_table_struct *wait)
-#else
static __poll_t
kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
struct poll_table_struct *wait)
-#endif
{
struct kbase_kinstr_prfcnt_client *cli;
if (!filp || !wait)
- return -EINVAL;
+ return (__poll_t)-EINVAL;
cli = filp->private_data;
if (!cli)
- return -EINVAL;
+ return (__poll_t)-EINVAL;
poll_wait(filp, &cli->waitq, wait);
@@ -392,7 +384,10 @@ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)
block_type = PRFCNT_BLOCK_TYPE_MEMORY;
break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
default:
block_type = PRFCNT_BLOCK_TYPE_RESERVED;
break;
@@ -429,7 +424,7 @@ static
int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map,
struct kbase_hwcnt_dump_buffer *dst,
struct prfcnt_metadata **block_meta_base,
- u64 base_addr, u8 counter_set)
+ u8 *base_addr, u8 counter_set)
{
size_t grp, blk, blk_inst;
struct prfcnt_metadata **ptr_md = block_meta_base;
@@ -440,7 +435,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u64 *dst_blk;
+ u8 *dst_blk;
/* Skip unavailable or non-enabled blocks */
if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
@@ -448,7 +443,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
!kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
(*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK;
(*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION;
(*ptr_md)->u.block_md.block_type =
@@ -458,7 +453,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
(*ptr_md)->u.block_md.block_idx = (u8)blk_inst;
(*ptr_md)->u.block_md.set = counter_set;
(*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN;
- (*ptr_md)->u.block_md.values_offset = (u32)((u64)(uintptr_t)dst_blk - base_addr);
+ (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr);
/* update the buf meta data block pointer to next item */
(*ptr_md)++;
@@ -504,7 +499,7 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata(
/* Dealing with counter blocks */
ptr_md++;
if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(&cli->enable_map, dump_buf, &ptr_md,
- cli->sample_arr.page_addr,
+ cli->sample_arr.user_buf,
cli->config.counter_set)))
return;
@@ -1017,12 +1012,8 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
}
read_idx %= cli->sample_arr.sample_count;
- sample_offset_bytes =
- (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta -
- (u64)(uintptr_t)cli->sample_arr.page_addr;
- sample_meta =
- (struct prfcnt_metadata *)cli->sample_arr.samples[read_idx]
- .sample_meta;
+ sample_meta = cli->sample_arr.samples[read_idx].sample_meta;
+ sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf;
/* Verify that a valid sample has been dumped in the read_idx.
* There are situations where this may not be the case,
@@ -1067,8 +1058,7 @@ kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli,
read_idx %= cli->sample_arr.sample_count;
sample_offset_bytes =
- (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta -
- (u64)(uintptr_t)cli->sample_arr.page_addr;
+ (u8 *)cli->sample_arr.samples[read_idx].sample_meta - cli->sample_arr.user_buf;
if (sample_access->sample_offset_bytes != sample_offset_bytes) {
err = -EINVAL;
@@ -1160,40 +1150,15 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp,
struct vm_area_struct *vma)
{
struct kbase_kinstr_prfcnt_client *cli;
- unsigned long vm_size, size, addr, pfn, offset;
if (!filp || !vma)
return -EINVAL;
- cli = filp->private_data;
+ cli = filp->private_data;
if (!cli)
return -EINVAL;
- vm_size = vma->vm_end - vma->vm_start;
-
- /* The mapping is allowed to span the entirety of the page allocation,
- * not just the chunk where the dump buffers are allocated.
- * This accommodates the corner case where the combined size of the
- * dump buffers is smaller than a single page.
- * This does not pose a security risk as the pages are zeroed on
- * allocation, and anything out of bounds of the dump buffers is never
- * written to.
- */
- size = (1ull << cli->sample_arr.page_order) * PAGE_SIZE;
-
- if (vma->vm_pgoff > (size >> PAGE_SHIFT))
- return -EINVAL;
-
- offset = vma->vm_pgoff << PAGE_SHIFT;
-
- if (vm_size > size - offset)
- return -EINVAL;
-
- addr = __pa(cli->sample_arr.page_addr + offset);
- pfn = addr >> PAGE_SHIFT;
-
- return remap_pfn_range(vma, vma->vm_start, pfn, vm_size,
- vma->vm_page_prot);
+ return remap_vmalloc_range(vma, cli->sample_arr.user_buf, 0);
}
static void kbasep_kinstr_prfcnt_sample_array_free(
@@ -1202,8 +1167,8 @@ static void kbasep_kinstr_prfcnt_sample_array_free(
if (!sample_arr)
return;
- kfree((void *)sample_arr->samples);
- kfree((void *)(size_t)sample_arr->page_addr);
+ kfree(sample_arr->samples);
+ vfree(sample_arr->user_buf);
memset(sample_arr, 0, sizeof(*sample_arr));
}
@@ -1443,8 +1408,6 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
if (!kinstr_ctx)
return;
- cancel_work_sync(&kinstr_ctx->dump_work);
-
/* Non-zero client count implies client leak */
if (WARN_ON(kinstr_ctx->client_count > 0)) {
struct kbase_kinstr_prfcnt_client *pos, *n;
@@ -1456,6 +1419,8 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
}
}
+ cancel_work_sync(&kinstr_ctx->dump_work);
+
WARN_ON(kinstr_ctx->client_count > 0);
kfree(kinstr_ctx);
}
@@ -1530,8 +1495,6 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl
struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr;
struct kbase_kinstr_prfcnt_sample *samples;
size_t sample_idx;
- u64 addr;
- unsigned int order;
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
size_t sample_meta_bytes;
@@ -1554,16 +1517,13 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl
if (!samples)
return -ENOMEM;
- order = get_order(sample_size * buffer_count);
- addr = (u64)(uintptr_t)kzalloc(sample_size * buffer_count, GFP_KERNEL);
+ sample_arr->user_buf = vmalloc_user(sample_size * buffer_count);
- if (!addr) {
- kfree((void *)samples);
+ if (!sample_arr->user_buf) {
+ kfree(samples);
return -ENOMEM;
}
- sample_arr->page_addr = addr;
- sample_arr->page_order = order;
sample_arr->sample_count = buffer_count;
sample_arr->samples = samples;
@@ -1577,12 +1537,11 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_cl
/* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */
samples[sample_idx].dump_buf.metadata = metadata;
samples[sample_idx].sample_meta =
- (struct prfcnt_metadata *)(uintptr_t)(
- addr + sample_meta_offset);
+ (struct prfcnt_metadata *)(sample_arr->user_buf + sample_meta_offset);
samples[sample_idx].dump_buf.dump_buf =
- (u64 *)(uintptr_t)(addr + dump_buf_offset);
+ (u64 *)(sample_arr->user_buf + dump_buf_offset);
samples[sample_idx].dump_buf.clk_cnt_buf =
- (u64 *)(uintptr_t)(addr + clk_cnt_buf_offset);
+ (u64 *)(sample_arr->user_buf + clk_cnt_buf_offset);
}
return 0;
@@ -2033,7 +1992,6 @@ static int kbasep_kinstr_prfcnt_enum_info_count(
struct kbase_kinstr_prfcnt_context *kinstr_ctx,
struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
{
- int err = 0;
uint32_t count = 0;
size_t block_info_count = 0;
const struct kbase_hwcnt_metadata *metadata;
@@ -2054,7 +2012,7 @@ static int kbasep_kinstr_prfcnt_enum_info_count(
enum_info->info_item_size = sizeof(struct prfcnt_enum_item);
kinstr_ctx->info_item_count = count;
- return err;
+ return 0;
}
static int kbasep_kinstr_prfcnt_enum_info_list(
@@ -2167,15 +2125,10 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
}
bytes = item_count * sizeof(*req_arr);
- req_arr = kmalloc(bytes, GFP_KERNEL);
+ req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes);
- if (!req_arr)
- return -ENOMEM;
-
- if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr), bytes)) {
- err = -EFAULT;
- goto free_buf;
- }
+ if (IS_ERR(req_arr))
+ return PTR_ERR(req_arr);
err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli, req_arr);
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h
index ec42ce0..e834926 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.h
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -124,7 +124,7 @@ size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadat
int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map,
struct kbase_hwcnt_dump_buffer *dst,
struct prfcnt_metadata **block_meta_base,
- u64 base_addr, u8 counter_set);
+ u8 *base_addr, u8 counter_set);
/**
* kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client.
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 989ce1e..fcbaf2b 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1803,9 +1803,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
return err;
bad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, reg->nr_pages,
- kctx->as_nr);
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+ reg->nr_pages, kctx->as_nr);
kbase_remove_va_region(kctx->kbdev, reg);
@@ -1820,6 +1819,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
{
int err = 0;
+ struct kbase_mem_phy_alloc *alloc;
if (reg->start_pfn == 0)
return 0;
@@ -1827,11 +1827,12 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
if (!reg->gpu_alloc)
return -EINVAL;
+ alloc = reg->gpu_alloc;
+
/* Tear down GPU page tables, depending on memory type. */
- switch (reg->gpu_alloc->type) {
+ switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS: {
size_t i = 0;
- struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
/* Due to the way the number of valid PTEs and ATEs are tracked
* currently, only the GPU virtual range that is backed & mapped
@@ -1843,9 +1844,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
if (alloc->imported.alias.aliased[i].alloc) {
int err_loop = kbase_mmu_teardown_pages(
kctx->kbdev, &kctx->mmu,
- reg->start_pfn +
- (i *
- alloc->imported.alias.stride),
+ reg->start_pfn + (i * alloc->imported.alias.stride),
+ alloc->pages + (i * alloc->imported.alias.stride),
alloc->imported.alias.aliased[i].length,
kctx->as_nr);
if (WARN_ON_ONCE(err_loop))
@@ -1855,32 +1855,32 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
}
break;
case KBASE_MEM_TYPE_IMPORTED_UMM:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, reg->nr_pages, kctx->as_nr);
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, reg->nr_pages, kctx->as_nr);
break;
default:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, kbase_reg_current_backed_size(reg),
- kctx->as_nr);
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, kbase_reg_current_backed_size(reg),
+ kctx->as_nr);
break;
}
/* Update tracking, and other cleanup, depending on memory type. */
- switch (reg->gpu_alloc->type) {
+ switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS:
/* We mark the source allocs as unmapped from the GPU when
* putting reg's allocs
*/
break;
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
- struct kbase_alloc_import_user_buf *user_buf = &reg->gpu_alloc->imported.user_buf;
+ struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf;
if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT;
/* The allocation could still have active mappings. */
if (user_buf->current_mapping_usage_count == 0) {
- kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, reg,
+ kbase_jd_user_buf_unmap(kctx, alloc, reg,
(reg->flags &
(KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
}
@@ -3422,7 +3422,7 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
}
/**
- * Acquire the per-context region list lock
+ * kbase_gpu_vm_lock() - Acquire the per-context region list lock
* @kctx: KBase context
*/
void kbase_gpu_vm_lock(struct kbase_context *kctx)
@@ -3434,7 +3434,7 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx)
KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
/**
- * Release the per-context region list lock
+ * kbase_gpu_vm_unlock() - Release the per-context region list lock
* @kctx: KBase context
*/
void kbase_gpu_vm_unlock(struct kbase_context *kctx)
@@ -3672,12 +3672,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
/* prevent unprivileged use of debug file system
* in old kernel version
*/
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
- /* only for newer kernel version debug file system is safe */
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
/* Caller already ensures this, but we keep the pattern for
* maintenance safety.
@@ -3766,6 +3761,7 @@ int kbase_jit_init(struct kbase_context *kctx)
INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
#if MALI_USE_CSF
+ spin_lock_init(&kctx->csf.kcpu_queues.jit_lock);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues);
#else /* !MALI_USE_CSF */
@@ -4203,9 +4199,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
const struct base_jit_alloc_info *info,
bool ignore_pressure_limit)
{
-#if MALI_USE_CSF
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-#else
+#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
#endif
@@ -4298,9 +4292,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
-#if MALI_USE_CSF
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-#else
+#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
#endif
@@ -4813,18 +4805,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages,
-#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
-KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
- write ? FOLL_WRITE : 0, pages, NULL);
-#else
- write, 0, pages, NULL);
-#endif
-#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
- write, 0, pages, NULL);
-#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
write ? FOLL_WRITE : 0, pages, NULL);
#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
@@ -4860,11 +4841,11 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct kbase_mem_phy_alloc *alloc;
struct page **pages;
struct tagged_addr *pa;
- long i;
+ long i, dma_mapped_pages;
unsigned long address;
struct device *dev;
- unsigned long offset;
- unsigned long local_size;
+ unsigned long offset_within_page;
+ unsigned long remaining_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
@@ -4884,17 +4865,16 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset = address & ~PAGE_MASK;
- local_size = alloc->imported.user_buf.size;
+ offset_within_page = address & ~PAGE_MASK;
+ remaining_size = alloc->imported.user_buf.size;
for (i = 0; i < pinned_pages; i++) {
- dma_addr_t dma_addr;
- unsigned long min;
-
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
+ unsigned long map_size =
+ MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = dma_map_page(dev, pages[i],
+ offset_within_page, map_size,
DMA_BIDIRECTIONAL);
+
err = dma_mapping_error(dev, dma_addr);
if (err)
goto unwind;
@@ -4902,8 +4882,8 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ remaining_size -= map_size;
+ offset_within_page = 0;
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4921,10 +4901,19 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
alloc->nents = 0;
- while (i--) {
+ offset_within_page = address & ~PAGE_MASK;
+ remaining_size = alloc->imported.user_buf.size;
+ dma_mapped_pages = i;
+ /* Run the unmap loop in the same order as map loop */
+ for (i = 0; i < dma_mapped_pages; i++) {
+ unsigned long unmap_size =
+ MIN(PAGE_SIZE - offset_within_page, remaining_size);
+
dma_unmap_page(kctx->kbdev->dev,
alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ unmap_size, DMA_BIDIRECTIONAL);
+ remaining_size -= unmap_size;
+ offset_within_page = 0;
}
/* The user buffer could already have been previously pinned before
@@ -4950,7 +4939,8 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
{
long i;
struct page **pages;
- unsigned long size = alloc->imported.user_buf.size;
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
+ unsigned long remaining_size = alloc->imported.user_buf.size;
lockdep_assert_held(&kctx->reg_lock);
@@ -4964,11 +4954,11 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long local_size;
+ unsigned long unmap_size =
+ MIN(remaining_size, PAGE_SIZE - offset_within_page);
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+ dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size,
DMA_BIDIRECTIONAL);
if (writeable)
set_page_dirty_lock(pages[i]);
@@ -4977,7 +4967,8 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
pages[i] = NULL;
#endif
- size -= local_size;
+ remaining_size -= unmap_size;
+ offset_within_page = 0;
}
#if !MALI_USE_CSF
alloc->nents = 0;
@@ -5089,6 +5080,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
if (!kbase_is_region_invalid_or_free(reg)) {
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages,
kbase_reg_current_backed_size(reg),
kctx->as_nr);
}
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 1c7169b..2013d38 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -1735,8 +1735,8 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx,
unsigned int flags);
/**
- * jit_trim_necessary_pages() - calculate and trim the least pages possible to
- * satisfy a new JIT allocation
+ * kbase_jit_trim_necessary_pages() - calculate and trim the least pages
+ * possible to satisfy a new JIT allocation
*
* @kctx: Pointer to the kbase context
* @needed_pages: Number of JIT physical pages by which trimming is requested.
@@ -1983,7 +1983,7 @@ static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
}
/**
- * kbase_mem_pool_lock - Release a memory pool
+ * kbase_mem_pool_unlock - Release a memory pool
* @pool: Memory pool to lock
*/
static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 327b7dc..c0ee10c 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -31,13 +31,11 @@
#include <linux/fs.h>
#include <linux/version.h>
#include <linux/dma-mapping.h>
-#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
-#include <linux/dma-attrs.h>
-#endif /* LINUX_VERSION_CODE < 4.8.0 */
#include <linux/dma-buf.h>
#include <linux/shrinker.h>
#include <linux/cache.h>
#include <linux/memory_group_manager.h>
+#include <linux/math64.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
@@ -84,10 +82,8 @@
#define IR_THRESHOLD_STEPS (256u)
#if MALI_USE_CSF
-static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
- struct vm_area_struct *vma);
-static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
- struct vm_area_struct *vma);
+static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma);
+static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma);
#endif
static int kbase_vmap_phy_pages(struct kbase_context *kctx,
@@ -115,6 +111,7 @@ static bool is_process_exiting(struct vm_area_struct *vma)
*/
if (atomic_read(&vma->vm_mm->mm_users))
return false;
+
return true;
}
@@ -1120,19 +1117,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
ret = 0;
}
#else
- /* Though the below version check could be superfluous depending upon the version condition
- * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow
- * ease of modification for non-ION systems or systems where ION has been patched.
- */
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
- dma_buf_end_cpu_access(dma_buf,
- 0, dma_buf->size,
- dir);
- ret = 0;
-#else
- ret = dma_buf_end_cpu_access(dma_buf,
- dir);
-#endif
+ ret = dma_buf_end_cpu_access(dma_buf, dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
case KBASE_SYNC_TO_CPU:
@@ -1149,11 +1134,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
ret = 0;
}
#else
- ret = dma_buf_begin_cpu_access(dma_buf,
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
- 0, dma_buf->size,
-#endif
- dir);
+ ret = dma_buf_begin_cpu_access(dma_buf, dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
}
@@ -1329,11 +1310,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
return 0;
bad_pad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- alloc->nents,
- kctx->as_nr);
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+ alloc->nents, kctx->as_nr);
bad_insert:
kbase_mem_umm_unmap_attachment(kctx, alloc);
bad_map_attachment:
@@ -1361,11 +1339,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
int err;
- err = kbase_mmu_teardown_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- reg->nr_pages,
- kctx->as_nr);
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, reg->nr_pages, kctx->as_nr);
WARN_ON(err);
}
@@ -1558,13 +1533,15 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
struct kbase_context *kctx, unsigned long address,
unsigned long size, u64 *va_pages, u64 *flags)
{
- long i;
+ long i, dma_mapped_pages;
struct kbase_va_region *reg;
struct rb_root *rbtree;
long faulted_pages;
int zone = KBASE_REG_ZONE_CUSTOM_VA;
bool shared_zone = false;
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
+ unsigned long offset_within_page;
+ unsigned long remaining_size;
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
int write;
@@ -1683,18 +1660,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
- faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
-#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
-KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
- write ? FOLL_WRITE : 0, pages, NULL);
-#else
- write, 0, pages, NULL);
-#endif
-#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
- faulted_pages = get_user_pages(address, *va_pages,
- write, 0, pages, NULL);
-#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
+#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
write ? FOLL_WRITE : 0, pages, NULL);
#else
@@ -1727,29 +1693,27 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
if (pages) {
struct device *dev = kctx->kbdev->dev;
- unsigned long local_size = user_buf->size;
- unsigned long offset = user_buf->address & ~PAGE_MASK;
struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+ offset_within_page = user_buf->address & ~PAGE_MASK;
+ remaining_size = user_buf->size;
for (i = 0; i < faulted_pages; i++) {
- dma_addr_t dma_addr;
- unsigned long min;
+ unsigned long map_size =
+ MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = dma_map_page(dev, pages[i],
+ offset_within_page, map_size, DMA_BIDIRECTIONAL);
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ remaining_size -= map_size;
+ offset_within_page = 0;
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1758,10 +1722,19 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
return reg;
unwind_dma_map:
- while (i--) {
+ offset_within_page = user_buf->address & ~PAGE_MASK;
+ remaining_size = user_buf->size;
+ dma_mapped_pages = i;
+ /* Run the unmap loop in the same order as map loop */
+ for (i = 0; i < dma_mapped_pages; i++) {
+ unsigned long unmap_size =
+ MIN(PAGE_SIZE - offset_within_page, remaining_size);
+
dma_unmap_page(kctx->kbdev->dev,
user_buf->dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ unmap_size, DMA_BIDIRECTIONAL);
+ remaining_size -= unmap_size;
+ offset_within_page = 0;
}
fault_mismatch:
if (pages) {
@@ -1793,6 +1766,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
u64 gpu_va;
size_t i;
bool coherent;
+ uint64_t max_stride;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
@@ -1825,7 +1799,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
if (!nents)
goto bad_nents;
- if (stride > U64_MAX / nents)
+ max_stride = div64_u64(U64_MAX, nents);
+
+ if (stride > max_stride)
goto bad_size;
if ((nents * stride) > (U64_MAX / PAGE_SIZE))
@@ -2217,10 +2193,11 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
u64 const new_pages, u64 const old_pages)
{
u64 delta = old_pages - new_pages;
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
int ret = 0;
- ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn + new_pages, delta, kctx->as_nr);
+ ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
+ alloc->pages + new_pages, delta, kctx->as_nr);
return ret;
}
@@ -3434,13 +3411,6 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
/* Always map the doorbell page as uncached */
doorbell_pgprot = pgprot_device(vma->vm_page_prot);
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- vma->vm_page_prot = doorbell_pgprot;
- input_page_pgprot = doorbell_pgprot;
- output_page_pgprot = doorbell_pgprot;
-#else
if (kbdev->system_coherency == COHERENCY_NONE) {
input_page_pgprot = pgprot_writecombine(vma->vm_page_prot);
output_page_pgprot = pgprot_writecombine(vma->vm_page_prot);
@@ -3448,7 +3418,6 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
input_page_pgprot = vma->vm_page_prot;
output_page_pgprot = vma->vm_page_prot;
}
-#endif
doorbell_cpu_addr = vma->vm_start;
@@ -3572,13 +3541,71 @@ map_failed:
return err;
}
+/**
+ * kbase_csf_user_reg_vm_open - VMA open function for the USER page
+ *
+ * @vma: Pointer to the struct containing information about
+ * the userspace mapping of USER page.
+ * Note:
+ * This function isn't expected to be called. If called (i.e> mremap),
+ * set private_data as NULL to indicate to close() and fault() functions.
+ */
+static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma)
+{
+ pr_debug("Unexpected call to the open method for USER register mapping");
+ vma->vm_private_data = NULL;
+}
+
+/**
+ * kbase_csf_user_reg_vm_close - VMA close function for the USER page
+ *
+ * @vma: Pointer to the struct containing information about
+ * the userspace mapping of USER page.
+ */
static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
{
struct kbase_context *kctx = vma->vm_private_data;
- WARN_ON(!kctx->csf.user_reg_vma);
+ if (!kctx) {
+ pr_debug("Close function called for the unexpected mapping");
+ return;
+ }
+
+ if (unlikely(!kctx->csf.user_reg_vma))
+ dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL");
kctx->csf.user_reg_vma = NULL;
+
+ mutex_lock(&kctx->kbdev->csf.reg_lock);
+ if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0))
+ dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter");
+ else
+ kctx->kbdev->csf.nr_user_page_mapped--;
+ mutex_unlock(&kctx->kbdev->csf.reg_lock);
+}
+
+/**
+ * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page
+ *
+ * @vma: Pointer to the struct containing information about
+ * the userspace mapping of USER page.
+ *
+ * Return: -EINVAL
+ *
+ * Note:
+ * User space must not attempt mremap on USER page mapping.
+ * This function will return an error to fail the attempt.
+ */
+static int
+#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \
+ (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE))
+kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma)
+#else
+kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags)
+#endif
+{
+ pr_debug("Unexpected call to mremap method for USER page mapping vma\n");
+ return -EINVAL;
}
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
@@ -3591,19 +3618,24 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
#endif
struct kbase_context *kctx = vma->vm_private_data;
- struct kbase_device *kbdev = kctx->kbdev;
- struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
- unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
+ struct kbase_device *kbdev;
+ struct memory_group_manager_device *mgm_dev;
+ unsigned long pfn;
size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
vm_fault_t ret = VM_FAULT_SIGBUS;
unsigned long flags;
/* Few sanity checks up front */
- if (WARN_ON(nr_pages != 1) ||
- WARN_ON(vma != kctx->csf.user_reg_vma) ||
- WARN_ON(vma->vm_pgoff !=
- PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE)))
+ if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) ||
+ (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) {
+ pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
+ current->comm, current->tgid, current->pid);
return VM_FAULT_SIGBUS;
+ }
+
+ kbdev = kctx->kbdev;
+ mgm_dev = kbdev->mgm_dev;
+ pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
mutex_lock(&kbdev->csf.reg_lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -3628,14 +3660,31 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
}
static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
+ .open = kbase_csf_user_reg_vm_open,
.close = kbase_csf_user_reg_vm_close,
+ .mremap = kbase_csf_user_reg_vm_mremap,
.fault = kbase_csf_user_reg_vm_fault
};
+/**
+ * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page.
+ *
+ * @kctx: Pointer of the kernel context.
+ * @vma: Pointer to the struct containing the information about
+ * the userspace mapping of USER page.
+ *
+ * Return: 0 on success, error code otherwise.
+ *
+ * Note:
+ * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf.
+ * But this function needs to be kept for backward-compatibility as old Base (<=1.12)
+ * will try to mmap USER page for direct access when it creates a base context.
+ */
static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
struct vm_area_struct *vma)
{
size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
+ struct kbase_device *kbdev = kctx->kbdev;
/* Few sanity checks */
if (kctx->csf.user_reg_vma)
@@ -3659,6 +3708,17 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
kctx->csf.user_reg_vma = vma;
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbdev->csf.nr_user_page_mapped++;
+
+ if (!kbdev->csf.mali_file_inode)
+ kbdev->csf.mali_file_inode = kctx->filp->f_inode;
+
+ if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode))
+ dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts");
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
vma->vm_ops = &kbase_csf_user_reg_vm_ops;
vma->vm_private_data = kctx;
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 1f6877a..5e5d991 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -439,18 +439,7 @@ u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev);
static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, pgprot_t pgprot)
{
- int err;
-
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
- return VM_FAULT_SIGBUS;
-
- err = vm_insert_pfn(vma, addr, pfn);
-#else
- err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
-#endif
+ int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
if (unlikely(err == -ENOMEM))
return VM_FAULT_OOM;
diff --git a/mali_kbase/mali_kbase_mem_pool_debugfs.c b/mali_kbase/mali_kbase_mem_pool_debugfs.c
index cfb43b0..3b1b2ba 100644
--- a/mali_kbase/mali_kbase_mem_pool_debugfs.c
+++ b/mali_kbase/mali_kbase_mem_pool_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -168,13 +168,7 @@ static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = {
void kbase_mem_pool_debugfs_init(struct dentry *parent,
struct kbase_context *kctx)
{
- /* prevent unprivileged use of debug file in old kernel version */
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
- /* only for newer kernel version debug file system is safe */
const mode_t mode = 0644;
-#else
- const mode_t mode = 0600;
-#endif
debugfs_create_file("mem_pool_size", mode, parent,
&kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops);
diff --git a/mali_kbase/mali_kbase_mem_pool_group.h b/mali_kbase/mali_kbase_mem_pool_group.h
index c50ffdb..f97f47d 100644
--- a/mali_kbase/mali_kbase_mem_pool_group.h
+++ b/mali_kbase/mali_kbase_mem_pool_group.h
@@ -49,8 +49,8 @@ static inline struct kbase_mem_pool *kbase_mem_pool_group_select(
}
/**
- * kbase_mem_pool_group_config_init - Set the initial configuration for a
- * set of memory pools
+ * kbase_mem_pool_group_config_set_max_size - Set the initial configuration for
+ * a set of memory pools
*
* @configs: Initial configuration for the set of memory pools
* @max_size: Maximum number of free 4 KiB pages each pool can hold
@@ -92,7 +92,7 @@ int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools,
struct kbase_mem_pool_group *next_pools);
/**
- * kbase_mem_pool_group_term - Mark a set of memory pools as dying
+ * kbase_mem_pool_group_mark_dying - Mark a set of memory pools as dying
*
* @mem_pools: Set of memory pools to mark
*
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs.c b/mali_kbase/mali_kbase_mem_profile_debugfs.c
index 92ab1b8..9317023 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs.c
+++ b/mali_kbase/mali_kbase_mem_profile_debugfs.c
@@ -69,11 +69,7 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = {
int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
size_t size)
{
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
int err = 0;
mutex_lock(&kctx->mem_profile_lock);
diff --git a/mali_kbase/mali_kbase_pbha_debugfs.c b/mali_kbase/mali_kbase_pbha_debugfs.c
index 47eab63..4130dd6 100644
--- a/mali_kbase/mali_kbase_pbha_debugfs.c
+++ b/mali_kbase/mali_kbase_pbha_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -120,14 +120,10 @@ static const struct file_operations pbha_int_id_overrides_fops = {
void kbase_pbha_debugfs_init(struct kbase_device *kbdev)
{
if (kbasep_pbha_supported(kbdev)) {
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
- /* only for newer kernel version debug file system is safe */
const mode_t mode = 0644;
-#else
- const mode_t mode = 0600;
-#endif
struct dentry *debugfs_pbha_dir = debugfs_create_dir(
"pbha", kbdev->mali_debugfs_directory);
+
if (IS_ERR_OR_NULL(debugfs_pbha_dir)) {
dev_err(kbdev->dev,
"Couldn't create mali debugfs page-based hardware attributes directory\n");
diff --git a/mali_kbase/mali_kbase_pbha_debugfs.h b/mali_kbase/mali_kbase_pbha_debugfs.h
index 3f477b4..508ecdf 100644
--- a/mali_kbase/mali_kbase_pbha_debugfs.h
+++ b/mali_kbase/mali_kbase_pbha_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,7 @@
#include <mali_kbase.h>
/**
- * kbasep_pbha_debugfs_init - Initialize pbha debugfs directory
+ * kbase_pbha_debugfs_init - Initialize pbha debugfs directory
*
* @kbdev: Device pointer
*/
diff --git a/mali_kbase/mali_kbase_platform_fake.c b/mali_kbase/mali_kbase_platform_fake.c
index bf525ed..761a636 100644
--- a/mali_kbase/mali_kbase_platform_fake.c
+++ b/mali_kbase/mali_kbase_platform_fake.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,7 +39,8 @@ static struct platform_device *mali_device;
#ifndef CONFIG_OF
/**
- * Convert data in struct kbase_io_resources struct to Linux-specific resources
+ * kbasep_config_parse_io_resources - Convert data in struct kbase_io_resources
+ * struct to Linux-specific resources
* @io_resources: Input IO resource data
* @linux_resources: Pointer to output array of Linux resource structures
*
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index de2422c..1545f3e 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -462,11 +462,12 @@ static enum hrtimer_restart kbase_pm_apc_timer_callback(struct hrtimer *timer)
int kbase_pm_apc_init(struct kbase_device *kbdev)
{
- kthread_init_worker(&kbdev->apc.worker);
- kbdev->apc.thread = kbase_create_realtime_thread(kbdev,
+ int ret;
+
+ ret = kbase_create_realtime_thread(kbdev,
kthread_worker_fn, &kbdev->apc.worker, "mali_apc_thread");
- if (IS_ERR(kbdev->apc.thread))
- return PTR_ERR(kbdev->apc.thread);
+ if (ret)
+ return ret;
/*
* We initialize power off and power on work on init as they will each
@@ -486,6 +487,5 @@ int kbase_pm_apc_init(struct kbase_device *kbdev)
void kbase_pm_apc_term(struct kbase_device *kbdev)
{
hrtimer_cancel(&kbdev->apc.timer);
- kthread_flush_worker(&kbdev->apc.worker);
- kthread_stop(kbdev->apc.thread);
+ kbase_destroy_kworker_stack(&kbdev->apc.worker);
}
diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.c b/mali_kbase/mali_kbase_regs_history_debugfs.c
index f8dec6b..c19b4a3 100644
--- a/mali_kbase/mali_kbase_regs_history_debugfs.c
+++ b/mali_kbase/mali_kbase_regs_history_debugfs.c
@@ -25,6 +25,7 @@
#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <linux/debugfs.h>
+#include <linux/version_compat_defs.h>
/**
* kbase_io_history_resize - resize the register access history buffer.
@@ -158,11 +159,8 @@ static int regs_history_size_set(void *data, u64 val)
return kbase_io_history_resize(h, (u16)val);
}
-
-DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
- regs_history_size_get,
- regs_history_size_set,
- "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(regs_history_size_fops, regs_history_size_get, regs_history_size_set,
+ "%llu\n");
/**
* regs_history_show - show callback for the register access history file.
diff --git a/mali_kbase/mali_kbase_smc.h b/mali_kbase/mali_kbase_smc.h
index 91eb9ee..40a3483 100644
--- a/mali_kbase/mali_kbase_smc.h
+++ b/mali_kbase/mali_kbase_smc.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -49,7 +49,7 @@
u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
/**
- * kbase_invoke_smc_fid - Perform a secure monitor call
+ * kbase_invoke_smc - Perform a secure monitor call
* @oen: Owning Entity number (SIP, STD etc).
* @function_number: The function number within the OEN.
* @smc64: use SMC64 calling convention instead of SMC32.
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index ae3b9ad..665bc09 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -75,7 +75,7 @@ static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
/* Record the start time of this atom so we could cancel it at
* the right time.
*/
- katom->start_timestamp = ktime_get();
+ katom->start_timestamp = ktime_get_raw();
/* Add the atom to the waiting list before the timer is
* (re)started to make sure that it gets processed.
@@ -215,7 +215,7 @@ void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
rt_mutex_lock(&kctx->jctx.lock);
kbasep_remove_waiting_soft_job(katom);
kbase_finish_soft_job(katom);
- if (jd_done_nolock(katom, true))
+ if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(kctx->kbdev);
rt_mutex_unlock(&kctx->jctx.lock);
}
@@ -229,7 +229,7 @@ static void kbasep_soft_event_complete_job(struct kthread_work *work)
int resched;
rt_mutex_lock(&kctx->jctx.lock);
- resched = jd_done_nolock(katom, true);
+ resched = kbase_jd_done_nolock(katom, true);
rt_mutex_unlock(&kctx->jctx.lock);
if (resched)
@@ -390,7 +390,7 @@ void kbasep_soft_job_timeout_worker(struct timer_list *timer)
soft_job_timeout);
u32 timeout_ms = (u32)atomic_read(
&kctx->kbdev->js_data.soft_job_timeout_ms);
- ktime_t cur_time = ktime_get();
+ ktime_t cur_time = ktime_get_raw();
bool restarting = false;
unsigned long lflags;
struct list_head *entry, *tmp;
@@ -500,7 +500,7 @@ out:
static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
{
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
- if (jd_done_nolock(katom, true))
+ if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
@@ -812,11 +812,7 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
dma_to_copy = min(dma_buf->size,
(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
- ret = dma_buf_begin_cpu_access(dma_buf,
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
- 0, dma_to_copy,
-#endif
- DMA_FROM_DEVICE);
+ ret = dma_buf_begin_cpu_access(dma_buf, DMA_FROM_DEVICE);
if (ret)
goto out_unlock;
@@ -843,11 +839,7 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
break;
}
}
- dma_buf_end_cpu_access(dma_buf,
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
- 0, dma_to_copy,
-#endif
- DMA_FROM_DEVICE);
+ dma_buf_end_cpu_access(dma_buf, DMA_FROM_DEVICE);
break;
}
default:
@@ -1357,7 +1349,7 @@ static void kbasep_jit_finish_worker(struct kthread_work *work)
rt_mutex_lock(&kctx->jctx.lock);
kbase_finish_soft_job(katom);
- resched = jd_done_nolock(katom, true);
+ resched = kbase_jd_done_nolock(katom, true);
rt_mutex_unlock(&kctx->jctx.lock);
if (resched)
@@ -1798,7 +1790,7 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
if (kbase_process_soft_job(katom_iter) == 0) {
kbase_finish_soft_job(katom_iter);
- resched |= jd_done_nolock(katom_iter, true);
+ resched |= kbase_jd_done_nolock(katom_iter, true);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
atomic_dec(&kbdev->pm.gpu_users_waiting);
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
diff --git a/mali_kbase/mali_kbase_sync_android.c b/mali_kbase/mali_kbase_sync_android.c
index fa17877..ae6e669 100644
--- a/mali_kbase/mali_kbase_sync_android.c
+++ b/mali_kbase/mali_kbase_sync_android.c
@@ -441,7 +441,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
kbasep_remove_waiting_soft_job(katom);
kbase_finish_soft_job(katom);
- if (jd_done_nolock(katom, true))
+ if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
diff --git a/mali_kbase/mali_kbase_sync_file.c b/mali_kbase/mali_kbase_sync_file.c
index 1462a6b..649a862 100644
--- a/mali_kbase/mali_kbase_sync_file.c
+++ b/mali_kbase/mali_kbase_sync_file.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -251,7 +251,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
kbasep_remove_waiting_soft_job(katom);
kbase_finish_soft_job(katom);
- if (jd_done_nolock(katom, true))
+ if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
@@ -298,10 +298,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence,
info->status = 0; /* still active (unsignaled) */
}
-#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
- scnprintf(info->name, sizeof(info->name), "%u#%u",
- fence->context, fence->seqno);
-#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
scnprintf(info->name, sizeof(info->name), "%llu#%u",
fence->context, fence->seqno);
#else
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index d7a6c98..e9f843b 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
+#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
/* Hwcnt reader API version */
@@ -113,9 +114,7 @@ struct kbase_vinstr_client {
wait_queue_head_t waitq;
};
-static unsigned int kbasep_vinstr_hwcnt_reader_poll(
- struct file *filp,
- poll_table *wait);
+static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait);
static long kbasep_vinstr_hwcnt_reader_ioctl(
struct file *filp,
@@ -517,8 +516,6 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
if (!vctx)
return;
- cancel_work_sync(&vctx->dump_work);
-
/* Non-zero client count implies client leak */
if (WARN_ON(vctx->client_count != 0)) {
struct kbase_vinstr_client *pos, *n;
@@ -530,6 +527,7 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
}
}
+ cancel_work_sync(&vctx->dump_work);
kbase_hwcnt_gpu_metadata_narrow_destroy(vctx->metadata_user);
WARN_ON(vctx->client_count != 0);
@@ -1039,18 +1037,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(
* Return: POLLIN if data can be read without blocking, 0 if data can not be
* read without blocking, else error code.
*/
-static unsigned int kbasep_vinstr_hwcnt_reader_poll(
- struct file *filp,
- poll_table *wait)
+static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait)
{
struct kbase_vinstr_client *cli;
if (!filp || !wait)
- return -EINVAL;
+ return (__poll_t)-EINVAL;
cli = filp->private_data;
if (!cli)
- return -EINVAL;
+ return (__poll_t)-EINVAL;
poll_wait(filp, &cli->waitq, wait);
if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h
index fc8dcbc..d25c29f 100644
--- a/mali_kbase/mali_malisw.h
+++ b/mali_kbase/mali_malisw.h
@@ -19,7 +19,7 @@
*
*/
-/**
+/*
* Kernel-wide include for common macros and types.
*/
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index c9ba3fc..04f5cdf 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -152,8 +152,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at VA 0x%016llX\n"
- "VA_VALID: %s\n"
+ "GPU bus fault in AS%d at PA 0x%016llX\n"
+ "PA_VALID: %s\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"access type 0x%X: %s\n"
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index fad5554..3130b33 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,7 +66,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at VA 0x%016llX\n"
+ "GPU bus fault in AS%d at PA 0x%016llX\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"exception data 0x%X\n"
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index fbdb7a9..c98d830 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -49,8 +49,25 @@
#include <mali_kbase_trace_gpu_mem.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
+/* Threshold used to decide whether to flush full caches or just a physical range */
+#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20
#define MGM_DEFAULT_PTE_GROUP (0)
+/* Macro to convert updated PDGs to flags indicating levels skip in flush */
+#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF)
+
+/* Small wrapper function to factor out GPU-dependent context releasing */
+static void release_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+#if MALI_USE_CSF
+ CSTD_UNUSED(kbdev);
+ kbase_ctx_sched_release_ctx_lock(kctx);
+#else /* MALI_USE_CSF */
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+#endif /* MALI_USE_CSF */
+}
+
static void mmu_hw_operation_begin(struct kbase_device *kbdev)
{
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
@@ -110,94 +127,66 @@ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
return arch_maj_cur > 11;
}
-/* Small wrapper function to factor out GPU-dependent context releasing */
-static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
-{
-#if MALI_USE_CSF
- CSTD_UNUSED(kbdev);
- kbase_ctx_sched_release_ctx_lock(kctx);
-#else /* MALI_USE_CSF */
- kbasep_js_runpool_release_ctx(kbdev, kctx);
-#endif /* MALI_USE_CSF */
-}
-
/**
- * mmu_flush_invalidate_on_gpu_ctrl() - Flush and invalidate the GPU caches
- * through GPU_CONTROL interface.
- * @kbdev: kbase device to issue the MMU operation on.
- * @as: address space to issue the MMU operation on.
- * @op_param: parameters for the operation.
- *
- * This wrapper function alternates AS_COMMAND_FLUSH_PT and AS_COMMAND_FLUSH_MEM
- * to equivalent GPU_CONTROL command FLUSH_CACHES.
- * The function first issue LOCK to MMU-AS with kbase_mmu_hw_do_operation().
- * And issues cache-flush with kbase_gpu_cache_flush_and_busy_wait() function
- * then issue UNLOCK to MMU-AS with kbase_mmu_hw_do_operation().
+ * mmu_invalidate() - Perform an invalidate operation on MMU caches.
+ * @kbdev: The Kbase device.
+ * @kctx: The Kbase context.
+ * @as_nr: GPU address space number for which invalidate is required.
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
+ * operation to perform.
*
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Perform an MMU invalidate operation on a particual address space
+ * by issuing a UNLOCK command.
*/
-static int
-mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev,
- struct kbase_as *as,
- struct kbase_mmu_hw_op_param *op_param)
+static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
+ const struct kbase_mmu_hw_op_param *op_param)
{
- u32 flush_op;
- int ret, ret2;
-
- if (WARN_ON(kbdev == NULL) ||
- WARN_ON(as == NULL) ||
- WARN_ON(op_param == NULL))
- return -EINVAL;
+ int err = 0;
+ unsigned long flags;
- lockdep_assert_held(&kbdev->hwaccess_lock);
- lockdep_assert_held(&kbdev->mmu_hw_mutex);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- /* Translate operation to command */
- if (op_param->op == KBASE_MMU_OP_FLUSH_PT) {
- flush_op = GPU_COMMAND_CACHE_CLN_INV_L2;
- } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
- flush_op = GPU_COMMAND_CACHE_CLN_INV_L2_LSC;
- } else {
- dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n",
- op_param->op);
- return -EINVAL;
+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
+ as_nr = kctx ? kctx->as_nr : as_nr;
+ err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param);
}
- /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */
- op_param->op = KBASE_MMU_OP_LOCK;
- ret = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param);
- if (ret)
- return ret;
-
- /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */
- ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, flush_op);
-
- /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */
- op_param->op = KBASE_MMU_OP_UNLOCK;
- ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param);
+ if (err) {
+ dev_err(kbdev->dev,
+ "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover");
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
+ }
- return ret ?: ret2;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
+/* Perform a flush/invalidate on a particular address space
+ */
static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int err = 0;
unsigned long flags;
+
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
if (kbdev->pm.backend.gpu_powered)
err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param);
+
if (err) {
/* Flush failed to complete, assume the GPU has hung and
* perform a reset to recover.
*/
- dev_err(kbdev->dev,
- "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
- if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu_locked(kbdev);
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
+
+ if (kbase_prepare_to_reset_gpu(
+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
}
+
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
@@ -221,14 +210,19 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as
* If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue
* a cache flush + invalidate to the L2 and GPU Load/Store caches as well as
* invalidating the TLBs.
+ *
+ * If operation is set to KBASE_MMU_OP_UNLOCK then this function will only
+ * invalidate the MMU caches and TLBs.
*/
static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
const struct kbase_mmu_hw_op_param *op_param)
{
bool ctx_is_in_runpool;
+
/* Early out if there is nothing to do */
if (op_param->nr == 0)
return;
+
/* If no context is provided then MMU operation is performed on address
* space which does not belong to user space context. Otherwise, retain
* refcount to context provided and release after flush operation.
@@ -243,49 +237,67 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex
#else
ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
#endif /* !MALI_USE_CSF */
+
if (ctx_is_in_runpool) {
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param);
+
release_ctx(kbdev, kctx);
}
}
}
/**
- * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
- *
- * @kbdev: Pointer to kbase device.
- * @kctx: Pointer to kbase context.
- * @as_nr: Address space number, for GPU cache maintenance operations
- * that happen outside a specific kbase context.
- * @op_param: Non-NULL pointer to struct containing information about the flush
- * operation to perform.
+ * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via
+ * the GPU_CONTROL interface
+ * @kbdev: The Kbase device.
+ * @kctx: The Kbase context.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
+ * operation to perform.
*
- * This function will do one of three things:
- * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
- * individual pages that were unmapped if feature is supported on GPU.
- * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
- * supported on GPU or,
- * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
+ * Perform a flush/invalidate on a particular address space via the GPU_CONTROL
+ * interface.
*/
-static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
- struct kbase_context *kctx, int as_nr,
- struct kbase_mmu_hw_op_param *op_param)
+static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
+ int as_nr, const struct kbase_mmu_hw_op_param *op_param)
{
- /* Full cache flush through the MMU_COMMAND */
- mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
+ int err = 0;
+ unsigned long flags;
+
+ /* AS transaction begin */
+ mutex_lock(&kbdev->mmu_hw_mutex);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
+ as_nr = kctx ? kctx->as_nr : as_nr;
+ err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr],
+ op_param);
+ }
+
+ if (err) {
+ /* Flush failed to complete, assume the GPU has hung and
+ * perform a reset to recover.
+ */
+ dev_err(kbdev->dev,
+ "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
}
-/**
- * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
- * @kbdev: Device pointer.
- * @handle: Address of DMA region.
- * @size: Size of the region to sync.
- *
- * This should be called after each page directory update.
- */
-static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
- dma_addr_t handle, size_t size)
+static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op)
+{
+}
+
+static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
{
/* In non-coherent system, ensure the GPU can read
* the pages from memory
@@ -295,6 +307,34 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
DMA_TO_DEVICE);
}
+/**
+ * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
+ * @kbdev: Device pointer.
+ * @kctx: Context pointer.
+ * @phys: Starting physical address of the destination region.
+ * @handle: Address of DMA region.
+ * @size: Size of the region to sync.
+ * @flush_op: MMU cache flush operation to perform on the physical address
+ * range, if GPU control is available.
+ *
+ * This function is called whenever the association between a virtual address
+ * range and a physical address range changes, because a mapping is created or
+ * destroyed.
+ * One of the effects of this operation is performing an MMU cache flush
+ * operation only on the physical address range affected by this function, if
+ * GPU control is available.
+ *
+ * This should be called after each page directory update.
+ */
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, dma_addr_t handle, size_t size,
+ enum kbase_mmu_op_type flush_op)
+{
+
+ kbase_mmu_sync_pgd_cpu(kbdev, handle, size);
+ kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op);
+}
+
/*
* Definitions:
* - PGD: Page Directory.
@@ -305,8 +345,8 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
*/
static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int group_id);
+ struct tagged_addr *phys, size_t nr, unsigned long flags,
+ int group_id, u64 *dirty_pgds);
/**
* kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
@@ -317,14 +357,15 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
* @pgds: Physical addresses of page directories to be freed.
* @vpfn: The virtual page frame number.
* @level: The level of MMU page table.
+ * @flush_op: The type of MMU flush operation to perform.
* @dirty_pgds: Flags to track every level where a PGD has been updated.
* @free_pgds_list: Linked list of the page directory pages to free.
*/
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, phys_addr_t *pgds,
- u64 vpfn, int level, u64 *dirty_pgds,
+ u64 vpfn, int level,
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
struct list_head *free_pgds_list);
-
/**
* kbase_mmu_free_pgd() - Free memory of the page directory
*
@@ -470,8 +511,10 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
struct kbase_as *faulting_as,
u64 start_pfn, size_t nr,
- u32 kctx_id)
+ u32 kctx_id, u64 dirty_pgds)
{
+ int err;
+
/* Calls to this function are inherently synchronous, with respect to
* MMU operations.
*/
@@ -484,22 +527,23 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
KBASE_MMU_FAULT_TYPE_PAGE);
/* flush L2 and unlock the VA (resumes the MMU) */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = start_pfn,
- .nr = nr,
- .op = KBASE_MMU_OP_FLUSH_PT,
- .kctx_id = kctx_id,
- .mmu_sync_info = mmu_sync_info,
- };
+ op_param.vpfn = start_pfn;
+ op_param.nr = nr;
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ op_param.kctx_id = kctx_id;
+ op_param.mmu_sync_info = mmu_sync_info;
if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
unsigned long irq_flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, &op_param);
+ op_param.flush_skip_levels =
+ pgd_level_to_skip_flush(dirty_pgds);
+ err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as,
+ &op_param);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
} else {
mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_flush_locked(kbdev, faulting_as, &op_param);
+ err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
mmu_hw_operation_end(kbdev);
}
@@ -539,6 +583,7 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
u64 fault_pfn, pfn_offset;
int ret;
int as_no;
+ u64 dirty_pgds = 0;
as_no = faulting_as->number;
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
@@ -597,12 +642,11 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
}
/* Now make this faulting page writable to GPU. */
- ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
- fault_phys_addr,
- 1, region->flags, region->gpu_alloc->group_id);
+ ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags,
+ region->gpu_alloc->group_id, &dirty_pgds);
kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
- kctx->id);
+ kctx->id, dirty_pgds);
kbase_gpu_vm_unlock(kctx);
}
@@ -837,7 +881,6 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return true;
}
-
void kbase_mmu_page_fault_worker(struct work_struct *data)
{
u64 fault_pfn;
@@ -1052,16 +1095,29 @@ page_fault_retry:
* transaction (which should cause the other page fault to be
* raised again).
*/
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = 0,
- .nr = 0,
- .op = KBASE_MMU_OP_UNLOCK,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
- mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, &op_param);
- mmu_hw_operation_end(kbdev);
+ op_param.mmu_sync_info = mmu_sync_info;
+ op_param.kctx_id = kctx->id;
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_hw_operation_begin(kbdev);
+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
+ &op_param);
+ mmu_hw_operation_end(kbdev);
+ } else {
+ /* Can safely skip the invalidate for all levels in case
+ * of duplicate page faults.
+ */
+ op_param.flush_skip_levels = 0xF;
+ op_param.vpfn = fault_pfn;
+ op_param.nr = 1;
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
+ &op_param);
+ }
+
+ if (err) {
+ dev_err(kbdev->dev,
+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx",
+ fault->addr);
+ }
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -1089,16 +1145,29 @@ page_fault_retry:
KBASE_MMU_FAULT_TYPE_PAGE);
/* See comment [1] about UNLOCK usage */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = 0,
- .nr = 0,
- .op = KBASE_MMU_OP_UNLOCK,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
- mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, &op_param);
- mmu_hw_operation_end(kbdev);
+ op_param.mmu_sync_info = mmu_sync_info;
+ op_param.kctx_id = kctx->id;
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_hw_operation_begin(kbdev);
+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
+ &op_param);
+ mmu_hw_operation_end(kbdev);
+ } else {
+ /* Can safely skip the invalidate for all levels in case
+ * of duplicate page faults.
+ */
+ op_param.flush_skip_levels = 0xF;
+ op_param.vpfn = fault_pfn;
+ op_param.nr = 1;
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
+ &op_param);
+ }
+
+ if (err) {
+ dev_err(kbdev->dev,
+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx",
+ fault->addr);
+ }
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -1164,7 +1233,7 @@ page_fault_retry:
(u64)new_pages);
trace_mali_mmu_page_fault_grow(region, fault, new_pages);
-#if MALI_INCREMENTAL_RENDERING
+#if MALI_INCREMENTAL_RENDERING_JM
/* Switch to incremental rendering if we have nearly run out of
* memory in a JIT memory allocation.
*/
@@ -1200,24 +1269,22 @@ page_fault_retry:
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
- /* flush L2 and unlock the VA (resumes the MMU) */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = fault->addr >> PAGE_SHIFT,
- .nr = new_pages,
- .op = KBASE_MMU_OP_FLUSH_PT,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
+ op_param.vpfn = region->start_pfn + pfn_offset;
+ op_param.nr = new_pages;
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ op_param.kctx_id = kctx->id;
+ op_param.mmu_sync_info = mmu_sync_info;
if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as,
- &op_param);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+ /* Unlock to invalidate the TLB (and resume the MMU) */
+ op_param.flush_skip_levels =
+ pgd_level_to_skip_flush(dirty_pgds);
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
+ &op_param);
} else {
+ /* flush L2 and unlock the VA (resumes the MMU) */
mmu_hw_operation_begin(kbdev);
- err = kbase_mmu_hw_do_flush_locked(kbdev, faulting_as, &op_param);
+ err = kbase_mmu_hw_do_flush(kbdev, faulting_as,
+ &op_param);
mmu_hw_operation_end(kbdev);
}
@@ -1335,15 +1402,18 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
{
u64 *page;
struct page *p;
+ phys_addr_t pgd;
p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
if (!p)
- return 0;
+ return KBASE_MMU_INVALID_PGD_ADDRESS;
page = kmap(p);
if (page == NULL)
goto alloc_free;
+ pgd = page_to_phys(p);
+
/* If the MMU tables belong to a context then account the memory usage
* to that context, otherwise the MMU tables are device wide and are
* only accounted to the device.
@@ -1366,23 +1436,26 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES);
- kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+ /* As this page is newly created, therefore there is no content to
+ * clean or invalidate in the GPU caches.
+ */
+ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
kunmap(p);
- return page_to_phys(p);
+ return pgd;
alloc_free:
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
- return 0;
+ return KBASE_MMU_INVALID_PGD_ADDRESS;
}
/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
* new table from the pool if needed and possible
*/
-static int mmu_get_next_pgd(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- phys_addr_t *pgd, u64 vpfn, int level)
+static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd,
+ u64 *dirty_pgds)
{
u64 *page;
phys_addr_t target_pgd;
@@ -1406,15 +1479,13 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev,
return -EINVAL;
}
- target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
- kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
- kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
-
- if (!target_pgd) {
+ if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
unsigned int current_valid_entries;
u64 managed_pte;
+
target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
- if (!target_pgd) {
+ if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
__func__);
kunmap(p);
@@ -1427,8 +1498,30 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev,
kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1);
- kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
/* Rely on the caller to update the address space flags. */
+ if (newly_created_pgd && !*newly_created_pgd) {
+ *newly_created_pgd = true;
+ /* If code reaches here we know parent PGD of target PGD was
+ * not newly created and should be flushed.
+ */
+ flush_op = KBASE_MMU_OP_FLUSH_PT;
+
+ if (dirty_pgds)
+ *dirty_pgds |= 1ULL << level;
+ }
+
+ /* A new valid entry is added to an existing PGD. Perform the
+ * invalidate operation for GPU cache as it could be having a
+ * cacheline that contains the entry (in an invalid form).
+ */
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx,
+ *pgd + (vpfn * sizeof(u64)),
+ kbase_dma_addr(p) + (vpfn * sizeof(u64)),
+ sizeof(u64), flush_op);
+ } else {
+ target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
}
kunmap(p);
@@ -1440,11 +1533,9 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev,
/*
* Returns the PGD for the specified level of translation
*/
-static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- u64 vpfn,
- int level,
- phys_addr_t *out_pgd)
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ int level, phys_addr_t *out_pgd, bool *newly_created_pgd,
+ u64 *dirty_pgds)
{
phys_addr_t pgd;
int l;
@@ -1453,7 +1544,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
pgd = mmut->pgd;
for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
- int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+ int err =
+ mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds);
/* Handle failure condition */
if (err) {
dev_dbg(kbdev->dev,
@@ -1468,13 +1560,11 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
return 0;
}
-static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- u64 vpfn,
- phys_addr_t *out_pgd)
+static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds)
{
- return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
- out_pgd);
+ return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd,
+ newly_created_pgd, dirty_pgds);
}
static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
@@ -1538,6 +1628,9 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
goto next;
}
+ if (dirty_pgds && pcount > 0)
+ *dirty_pgds |= 1ULL << level;
+
num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
if (WARN_ON_ONCE(num_of_valid_entries < pcount))
num_of_valid_entries = 0;
@@ -1553,19 +1646,21 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
list_add(&p->lru, free_pgds_list);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
- dirty_pgds, free_pgds_list);
-
+ KBASE_MMU_OP_NONE, dirty_pgds,
+ free_pgds_list);
vpfn += count;
continue;
}
mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
-
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(phys_to_page(pgd)) + sizeof(u64) * idx,
- sizeof(u64) * pcount);
- kunmap(phys_to_page(pgd));
+ /* MMU cache flush strategy is NONE because GPU cache maintenance is
+ * going to be done by the caller
+ */
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
+ kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
+ KBASE_MMU_OP_NONE);
+ kunmap(p);
next:
vpfn += count;
}
@@ -1584,6 +1679,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
op_param.op = KBASE_MMU_OP_FLUSH_PT;
op_param.mmu_sync_info = mmu_sync_info;
op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF;
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
#if MALI_USE_CSF
as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR;
@@ -1591,7 +1687,18 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
WARN_ON(!mmut->kctx);
#endif
- mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
+ /* MMU cache flush strategy depends on whether GPU control commands for
+ * flushing physical address ranges are supported. The new physical pages
+ * are not present in GPU caches therefore they don't need any cache
+ * maintenance, but PGDs in the page table may or may not be created anew.
+ *
+ * Operations that affect the whole GPU cache shall only be done if it's
+ * impossible to update physical ranges.
+ */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
+ else
+ mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
}
/*
@@ -1613,6 +1720,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
size_t remain = nr;
int err;
struct kbase_device *kbdev;
+ enum kbase_mmu_op_type flush_op;
u64 dirty_pgds = 0;
LIST_HEAD(free_pgds_list);
@@ -1636,6 +1744,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
struct page *p;
register unsigned int num_of_valid_entries;
+ bool newly_created_pgd = false;
if (count > remain)
count = remain;
@@ -1648,8 +1757,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
* 256 pages at once (on average). Do we really care?
*/
do {
- err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
- vpfn, &pgd);
+ err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd,
+ &dirty_pgds);
if (err != -ENOMEM)
break;
/* Fill the memory pool with enough pages for
@@ -1669,7 +1778,6 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
/* Invalidate the pages we have partially
* completed
*/
-
mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
start_vpfn + recover_count,
&dirty_pgds, &free_pgds_list);
@@ -1712,9 +1820,21 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
vpfn += count;
remain -= count;
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (index * sizeof(u64)),
- count * sizeof(u64));
+ if (count > 0 && !newly_created_pgd)
+ dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL;
+
+ /* MMU cache flush operation here will depend on whether bottom level
+ * PGD is newly created or not.
+ *
+ * If bottom level PGD is newly created then no cache maintenance is
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
+ * maintenance is required for existing PGD.
+ */
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
+
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64),
+ flush_op);
kunmap(p);
/* We have started modifying the page table.
@@ -1737,7 +1857,6 @@ fail_unlock:
mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
mmu_sync_info);
kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list);
-
return err;
}
@@ -1783,6 +1902,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
struct page *p;
int cur_level;
register unsigned int num_of_valid_entries;
+ enum kbase_mmu_op_type flush_op;
+ bool newly_created_pgd = false;
if (count > remain)
count = remain;
@@ -1800,8 +1921,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
* 256 pages at once (on average). Do we really care?
*/
do {
- err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
- cur_level, &pgd);
+ err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd,
+ &newly_created_pgd, dirty_pgds);
if (err != -ENOMEM)
break;
/* Fill the memory pool with enough pages for
@@ -1815,8 +1936,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
} while (!err);
if (err) {
- dev_warn(kbdev->dev,
- "%s: mmu_get_bottom_pgd failure\n", __func__);
+ dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__);
if (insert_vpfn != start_vpfn) {
/* Invalidate the pages we have partially
* completed
@@ -1837,7 +1957,6 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
/* Invalidate the pages we have partially
* completed
*/
-
mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
insert_vpfn, dirty_pgds,
&free_pgds_list);
@@ -1877,13 +1996,28 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+ if (dirty_pgds && count > 0 && !newly_created_pgd)
+ *dirty_pgds |= 1ULL << cur_level;
+
phys += count;
insert_vpfn += count;
remain -= count;
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (vindex * sizeof(u64)),
- count * sizeof(u64));
+ /* For the most part, the creation of a new virtual memory mapping does
+ * not require cache flush operations, because the operation results
+ * into the creation of new memory pages which are not present in GPU
+ * caches. Therefore the defaul operation is NONE.
+ *
+ * However, it is quite common for the mapping to start and/or finish
+ * at an already existing PGD. Moreover, the PTEs modified are not
+ * necessarily aligned with GPU cache lines. Therefore, GPU cache
+ * maintenance is required for existing PGDs.
+ */
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
+
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
+ flush_op);
kunmap(p);
}
@@ -1916,9 +2050,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
u64 dirty_pgds = 0;
LIST_HEAD(free_pgds_list);
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
&dirty_pgds);
-
if (err)
return err;
@@ -1930,7 +2067,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
/**
- * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
* without retaining the kbase context.
* @kctx: The KBase context.
* @vpfn: The virtual page frame number to start the flush on.
@@ -1939,17 +2076,15 @@ KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
* As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
* other locking.
*/
-static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
- u64 vpfn, size_t nr)
+static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_mmu_hw_op_param op_param;
int err;
-
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ struct kbase_mmu_hw_op_param op_param;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
@@ -1959,155 +2094,32 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
return;
/* flush L2 and unlock the VA (resumes the MMU) */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = vpfn,
- .nr = nr,
- .op = KBASE_MMU_OP_FLUSH_MEM,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
-
+ op_param.vpfn = vpfn;
+ op_param.nr = nr;
+ op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+ op_param.kctx_id = kctx->id;
+ op_param.mmu_sync_info = mmu_sync_info;
if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- err = mmu_flush_invalidate_on_gpu_ctrl(
- kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ /* Value used to prevent skipping of any levels when flushing */
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+ err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr],
+ &op_param);
} else {
- err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr],
+ &op_param);
}
if (err) {
/* Flush failed to complete, assume the
* GPU has hung and perform a reset to recover
*/
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
}
}
-/* Perform a flush/invalidate on a particular address space
- */
-static void
-kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
- u64 vpfn, size_t nr, bool sync, u32 kctx_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
-{
- int err;
- bool gpu_powered;
- unsigned long flags;
- struct kbase_mmu_hw_op_param op_param;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- gpu_powered = kbdev->pm.backend.gpu_powered;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- /* GPU is off so there's no need to perform flush/invalidate.
- * But even if GPU is not actually powered down, after gpu_powered flag
- * was set to false, it is still safe to skip the flush/invalidate.
- * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE
- * which is sent when address spaces are restored after gpu_powered flag
- * is set to true. Flushing of L2 cache is certainly not required as L2
- * cache is definitely off if gpu_powered is false.
- */
- if (!gpu_powered)
- return;
-
- if (kbase_pm_context_active_handle_suspend(kbdev,
- KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
- /* GPU has just been powered off due to system suspend.
- * So again, no need to perform flush/invalidate.
- */
- return;
- }
-
- /*
- * Taking a pm reference does not guarantee that the GPU has finished powering up.
- * It's possible that the power up has been deferred until after a scheduled power down.
- * We must wait here for the L2 to be powered up, and holding a pm reference guarantees that
- * it will not be powered down afterwards.
- */
- err = kbase_pm_wait_for_l2_powered(kbdev);
- if (err) {
- dev_err(kbdev->dev, "Wait for L2 power up failed, skipping MMU command");
- /* Drop the pm ref */
- goto idle;
- }
-
- /* AS transaction begin */
- mutex_lock(&kbdev->mmu_hw_mutex);
-
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = vpfn,
- .nr = nr,
- .kctx_id = kctx_id,
- .mmu_sync_info = mmu_sync_info,
- };
-
- if (sync)
- op_param.op = KBASE_MMU_OP_FLUSH_MEM;
- else
- op_param.op = KBASE_MMU_OP_FLUSH_PT;
-
- if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, as, &op_param);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- } else {
- mmu_hw_operation_begin(kbdev);
- err = kbase_mmu_hw_do_flush_locked(kbdev, as, &op_param);
- mmu_hw_operation_end(kbdev);
- }
-
- if (err) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover
- */
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
-
- if (kbase_prepare_to_reset_gpu(
- kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
- }
-
- mutex_unlock(&kbdev->mmu_hw_mutex);
- /* AS transaction end */
-
-idle:
- kbase_pm_context_idle(kbdev);
-}
-
-static void
-kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr,
- bool sync,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
-{
- struct kbase_device *kbdev;
- bool ctx_is_in_runpool;
-
- /* Early out if there is nothing to do */
- if (nr == 0)
- return;
-
- kbdev = kctx->kbdev;
-#if !MALI_USE_CSF
- rt_mutex_lock(&kbdev->js_data.queue_mutex);
- ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
- rt_mutex_unlock(&kbdev->js_data.queue_mutex);
-#else
- ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
-#endif /* !MALI_USE_CSF */
-
- if (ctx_is_in_runpool) {
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-
- kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
- vpfn, nr, sync, kctx->id,
- mmu_sync_info);
-
- release_ctx(kbdev, kctx);
- }
-}
-
void kbase_mmu_update(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut,
int as_nr)
@@ -2147,7 +2159,7 @@ void kbase_mmu_disable(struct kbase_context *kctx)
* The job scheduler code will already be holding the locks and context
* so just do the flush.
*/
- kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0);
+ kbase_mmu_flush_noretain(kctx, 0, ~0);
kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
#if !MALI_USE_CSF
@@ -2164,7 +2176,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable);
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, phys_addr_t *pgds,
- u64 vpfn, int level, u64 *dirty_pgds,
+ u64 vpfn, int level,
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
struct list_head *free_pgds_list)
{
int current_level;
@@ -2180,11 +2193,23 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
kbdev->mmu_mode->get_num_valid_entries(current_page);
int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
+ /* We need to track every level that needs updating */
+ if (dirty_pgds)
+ *dirty_pgds |= 1ULL << current_level;
+
kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
if (current_valid_entries == 1 &&
current_level != MIDGARD_MMU_LEVEL(0)) {
kunmap(p);
+ /* Ensure the cacheline containing the last valid entry
+ * of PGD is invalidated from the GPU cache, before the
+ * PGD page is freed.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
+ current_pgd + (index * sizeof(u64)),
+ sizeof(u64), flush_op);
+
list_add(&p->lru, free_pgds_list);
} else {
current_valid_entries--;
@@ -2193,14 +2218,62 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
current_page, current_valid_entries);
kunmap(p);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64));
+
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
+ flush_op);
break;
}
}
}
-/*
+/**
+ * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
+ *
+ * @kbdev: Pointer to kbase device.
+ * @kctx: Pointer to kbase context.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @phys: Array of physical pages to flush.
+ * @op_param: Non-NULL pointer to struct containing information about the flush
+ * operation to perform.
+ *
+ * This function will do one of three things:
+ * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
+ * individual pages that were unmapped if feature is supported on GPU.
+ * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
+ * supported on GPU or,
+ * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
+ */
+static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
+ struct kbase_context *kctx, int as_nr,
+ struct tagged_addr *phys,
+ struct kbase_mmu_hw_op_param *op_param)
+{
+
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
+ return;
+ } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param);
+ return;
+ }
+
+}
+
+/**
+ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev: Pointer to kbase device.
+ * @mmut: Pointer to GPU MMU page table.
+ * @vpfn: Start page frame number of the GPU virtual pages to unmap.
+ * @phys: Array of physical pages currently mapped to the virtual
+ * pages to unmap, or NULL. This is only used for GPU cache
+ * maintenance.
+ * @nr: Number of pages to unmap.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ *
* We actually discard the ATE and free the page table pages if no valid entries
* exist in PGD.
*
@@ -2209,14 +2282,22 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
* These locks must be taken in the correct order with respect to others
* already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
* information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
*/
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr, int as_nr)
{
u64 start_vpfn = vpfn;
size_t requested_nr = nr;
- struct kbase_mmu_hw_op_param op_param;
+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
struct kbase_mmu_mode const *mmu_mode;
+ struct kbase_mmu_hw_op_param op_param;
int err = -EFAULT;
u64 dirty_pgds = 0;
LIST_HEAD(free_pgds_list);
@@ -2230,6 +2311,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
/* early out if nothing to do */
return 0;
}
+ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
+ * the operation is invalidate but the granularity of cache maintenance may change
+ * according to the situation.
+ *
+ * If GPU control command operations are present and the number of pages is "small",
+ * then the optimal strategy is flushing on the physical address range of the pages
+ * which are affected by the operation. That implies both the PGDs which are modified
+ * or removed from the page table and the physical pages which are freed from memory.
+ *
+ * Otherwise, there's no alternative to invalidating the whole GPU cache.
+ */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
+ flush_op = KBASE_MMU_OP_FLUSH_PT;
if (!rt_mutex_trylock(&mmut->mmu_lock)) {
/*
@@ -2329,6 +2423,9 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
continue;
}
+ if (pcount > 0)
+ dirty_pgds |= 1ULL << level;
+
num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
if (WARN_ON_ONCE(num_of_valid_entries < pcount))
num_of_valid_entries = 0;
@@ -2341,10 +2438,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
if (!num_of_valid_entries) {
kunmap(p);
+ /* Ensure the cacheline(s) containing the last valid entries
+ * of PGD is invalidated from the GPU cache, before the
+ * PGD page is freed.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
+ pgd + (index * sizeof(u64)),
+ pcount * sizeof(u64), flush_op);
+
list_add(&p->lru, &free_pgds_list);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
- &dirty_pgds, &free_pgds_list);
+ flush_op, &dirty_pgds,
+ &free_pgds_list);
vpfn += count;
nr -= count;
@@ -2353,11 +2459,9 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
-
- kbase_mmu_sync_pgd(
- kbdev, kbase_dma_addr(p) + (index * sizeof(u64)),
- pcount * sizeof(u64));
-
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
+ flush_op);
next:
kunmap(p);
vpfn += count;
@@ -2369,13 +2473,14 @@ out:
/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
op_param = (struct kbase_mmu_hw_op_param){
.vpfn = start_vpfn,
- .mmu_sync_info = mmu_sync_info,
.nr = requested_nr,
+ .mmu_sync_info = mmu_sync_info,
.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
- .op = KBASE_MMU_OP_FLUSH_MEM,
+ .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
+ KBASE_MMU_OP_FLUSH_MEM,
+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
};
-
- mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, &op_param);
+ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param);
kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
@@ -2397,6 +2502,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
* @flags: Flags
* @group_id: The physical memory group in which the page was allocated.
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @dirty_pgds: Flags to track every level where a PGD has been updated.
*
* This will update page table entries that already exist on the GPU based on
* the new flags that are passed (the physical pages pointed to by the page
@@ -2409,8 +2515,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
* successfully, otherwise an error code.
*/
static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id)
+ struct tagged_addr *phys, size_t nr, unsigned long flags,
+ int const group_id, u64 *dirty_pgds)
{
phys_addr_t pgd;
u64 *pgd_page;
@@ -2444,7 +2550,8 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (is_huge(*phys) && (index == index_in_large_page(*phys)))
cur_level = MIDGARD_MMU_LEVEL(2);
- err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd);
+ err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL,
+ dirty_pgds);
if (WARN_ON(err))
goto fail_unlock;
@@ -2471,9 +2578,9 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
*target_phys, flags, MIDGARD_MMU_LEVEL(2),
group_id);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (level_index * sizeof(u64)),
- sizeof(u64));
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)),
+ kbase_dma_addr(p) + (level_index * sizeof(u64)),
+ sizeof(u64), KBASE_MMU_OP_NONE);
} else {
for (i = 0; i < count; i++) {
#ifdef CONFIG_MALI_DEBUG
@@ -2485,14 +2592,21 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
group_id);
}
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (index * sizeof(u64)),
- count * sizeof(u64));
+
+ /* MMU cache flush strategy is NONE because GPU cache maintenance
+ * will be done by the caller.
+ */
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)),
+ count * sizeof(u64), KBASE_MMU_OP_NONE);
}
kbdev->mmu_mode->set_num_valid_entries(pgd_page,
num_of_valid_entries);
+ if (dirty_pgds && count > 0)
+ *dirty_pgds |= 1ULL << cur_level;
+
phys += count;
vpfn += count;
nr -= count;
@@ -2513,15 +2627,29 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
unsigned long flags, int const group_id)
{
int err;
+ struct kbase_mmu_hw_op_param op_param;
+ u64 dirty_pgds = 0;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
- err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
- group_id);
- kbase_mmu_flush_invalidate(kctx, vpfn, nr, true, mmu_sync_info);
+ err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds);
+
+ op_param = (const struct kbase_mmu_hw_op_param){
+ .vpfn = vpfn,
+ .nr = nr,
+ .op = KBASE_MMU_OP_FLUSH_MEM,
+ .kctx_id = kctx->id,
+ .mmu_sync_info = mmu_sync_info,
+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
+ };
+
+ if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev))
+ mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+ else
+ mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param);
return err;
}
@@ -2583,7 +2711,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
mmut->group_id = group_id;
rt_mutex_init(&mmut->mmu_lock);
mmut->kctx = kctx;
- mmut->pgd = 0;
+ mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
/* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */
for (level = MIDGARD_MMU_TOPLEVEL;
@@ -2601,7 +2729,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
* kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
* avoid allocations from the kernel happening with the lock held.
*/
- while (!mmut->pgd) {
+ while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
int err;
err = kbase_mem_pool_grow(
@@ -2624,7 +2752,7 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
int level;
- if (mmut->pgd) {
+ if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) {
rt_mutex_lock(&mmut->mmu_lock);
mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
rt_mutex_unlock(&mmut->mmu_lock);
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 1c8e8b0..5330306 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -25,6 +25,7 @@
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#define KBASE_MMU_PAGE_ENTRIES 512
+#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0)
struct kbase_context;
struct kbase_mmu_table;
@@ -142,9 +143,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
unsigned long flags, int group_id,
enum kbase_caller_mmu_sync_info mmu_sync_info);
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn,
- size_t nr, int as_nr);
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr, int as_nr);
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags, int const group_id);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 4f73380..438dd5e 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -75,12 +75,14 @@ enum kbase_mmu_op_type {
};
/**
- * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_operation()
- * @vpfn: MMU Virtual Page Frame Number to start the operation on.
- * @nr: Number of pages to work on.
- * @op: Operation type (written to ASn_COMMAND).
- * @kctx_id: Kernel context ID for MMU command tracepoint
- * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
+ * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions
+ * @vpfn: MMU Virtual Page Frame Number to start the operation on.
+ * @nr: Number of pages to work on.
+ * @op: Operation type (written to ASn_COMMAND).
+ * @kctx_id: Kernel context ID for MMU command tracepoint.
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
+ * @flush_skip_levels: Page table levels to skip flushing. (Only
+ * applicable if GPU supports feature)
*/
struct kbase_mmu_hw_op_param {
u64 vpfn;
@@ -88,6 +90,7 @@ struct kbase_mmu_hw_op_param {
enum kbase_mmu_op_type op;
u32 kctx_id;
enum kbase_caller_mmu_sync_info mmu_sync_info;
+ u64 flush_skip_levels;
};
/**
@@ -111,13 +114,11 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
- * This function should be called for GPU where GPU command is used to flush
- * the cache(s) instead of MMU command.
- *
* Return: 0 if issuing the command was successful, otherwise an error code.
*/
int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
+
/**
* kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it
* to complete before returning.
@@ -144,10 +145,13 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
* GPUs where MMU command to flush the cache(s) is deprecated.
* mmu_hw_mutex needs to be held when calling this function.
*
- * Return: 0 if the operation was successful, non-zero otherwise.
+ * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex
+ *
+ * Return: Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
+
/**
* kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU.
*
@@ -162,12 +166,29 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
* Both mmu_hw_mutex and hwaccess_lock need to be held when calling this
* function.
*
- * Return: 0 if the operation was successful, non-zero otherwise.
+ * Return: Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
/**
+ * kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU.
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @as: Address space to issue the MMU operation on.
+ * @op_param: Pointer to struct containing information about the MMU
+ * operation to perform.
+ *
+ * Issue a flush operation on the address space as per the information
+ * specified inside @op_param. GPU command is used to flush the cache(s)
+ * instead of the MMU command.
+ *
+ * Return: Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
+ const struct kbase_mmu_hw_op_param *op_param);
+
+/**
* kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by
* the MMU.
* @kbdev: kbase device to clear the fault from.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index cf89c0e..1a6157a 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -26,12 +26,17 @@
#include <mali_kbase_mem.h>
#include <mmu/mali_kbase_mmu_hw.h>
#include <tl/mali_kbase_tracepoints.h>
+#include <linux/delay.h>
+
/**
* lock_region() - Generate lockaddr to lock memory region in MMU
- * @gpu_props: GPU properties for finding the MMU lock region size
- * @lockaddr: Address and size of memory region to lock
- * @op_param: Pointer to a struct containing information about the MMU operation.
+ *
+ * @gpu_props: GPU properties for finding the MMU lock region size.
+ * @lockaddr: Address and size of memory region to lock.
+ * @op_param: Pointer to a struct containing the starting page frame number of
+ * the region to lock, the number of pages to lock and page table
+ * levels to skip when flushing (if supported).
*
* The lockaddr value is a combination of the starting address and
* the size of the region that encompasses all the memory pages to lock.
@@ -62,13 +67,13 @@
*
* Return: 0 if success, or an error code on failure.
*/
-
static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
const struct kbase_mmu_hw_op_param *op_param)
{
const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT;
const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1;
u64 lockaddr_size_log2;
+
if (op_param->nr == 0)
return -EINVAL;
@@ -121,14 +126,13 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
*/
*lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1);
*lockaddr |= lockaddr_size_log2 - 1;
-
return 0;
}
static int wait_ready(struct kbase_device *kbdev,
unsigned int as_nr)
{
- unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+ u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
/* Wait for the MMU status to indicate there is no active command. */
while (--max_loops &&
@@ -167,6 +171,100 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
return status;
}
+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+static int wait_cores_power_trans_complete(struct kbase_device *kbdev)
+{
+#define WAIT_TIMEOUT 1000 /* 1ms timeout */
+#define DELAY_TIME_IN_US 1
+ const int max_iterations = WAIT_TIMEOUT;
+ int loop;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ for (loop = 0; loop < max_iterations; loop++) {
+ u32 lo =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO));
+ u32 hi =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI));
+
+ if (!lo && !hi)
+ break;
+
+ udelay(DELAY_TIME_IN_US);
+ }
+
+ if (loop == max_iterations) {
+ dev_warn(kbdev->dev, "SHADER_PWRTRANS set for too long");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+/**
+ * apply_hw_issue_GPU2019_3901_wa - Apply WA for the HW issue GPU2019_3901
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @mmu_cmd: Pointer to the variable contain the value of MMU command
+ * that needs to be sent to flush the L2 cache and do an
+ * implicit unlock.
+ * @as_nr: Address space number for which MMU command needs to be
+ * sent.
+ * @hwaccess_locked: Flag to indicate if hwaccess_lock is held by the caller.
+ *
+ * This functions ensures that the flush of LSC is not missed for the pages that
+ * were unmapped from the GPU, due to the power down transition of shader cores.
+ *
+ * Return: 0 if the WA was successfully applied, non-zero otherwise.
+ */
+static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev,
+ u32 *mmu_cmd, unsigned int as_nr, bool hwaccess_locked)
+{
+ unsigned long flags = 0;
+ int ret = 0;
+
+ if (!hwaccess_locked)
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ /* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so
+ * the workaround can be safely skipped.
+ */
+ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
+ if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) {
+ dev_warn(kbdev->dev,
+ "Unexpected mmu command received");
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ /* Wait for the LOCK MMU command to complete, issued by the caller */
+ ret = wait_ready(kbdev, as_nr);
+ if (ret)
+ goto unlock;
+
+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+ GPU_COMMAND_CACHE_CLN_INV_LSC);
+ if (ret)
+ goto unlock;
+
+ ret = wait_cores_power_trans_complete(kbdev);
+ if (ret)
+ goto unlock;
+
+ /* As LSC is guaranteed to have been flushed we can use FLUSH_PT
+ * MMU command to only flush the L2.
+ */
+ *mmu_cmd = AS_COMMAND_FLUSH_PT;
+ }
+
+unlock:
+ if (!hwaccess_locked)
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return ret;
+}
+#endif
+
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
{
struct kbase_mmu_setup *current_setup = &as->current_setup;
@@ -224,6 +322,27 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
#endif
}
+/**
+ * mmu_command_instr - Record an MMU command for instrumentation purposes.
+ *
+ * @kbdev: Kbase device used to issue MMU operation on.
+ * @kctx_id: Kernel context ID for MMU command tracepoint.
+ * @cmd: Command issued to the MMU.
+ * @lock_addr: Address of memory region locked for the operation.
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
+ */
+static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+ u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr);
+ u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr);
+
+ bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC);
+
+ KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base,
+ lock_addr_size);
+}
+
/* Helper function to program the LOCKADDR register before LOCK/UNLOCK command
* is issued.
*/
@@ -231,7 +350,9 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock
const struct kbase_mmu_hw_op_param *op_param)
{
int ret;
+
ret = lock_region(&kbdev->gpu_props, lock_addr, op_param);
+
if (!ret) {
/* Set the region that needs to be updated */
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO),
@@ -241,6 +362,7 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock
}
return ret;
}
+
/**
* mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without
* waiting for it's completion.
@@ -256,74 +378,191 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a
const struct kbase_mmu_hw_op_param *op_param)
{
int ret;
+
ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param);
+
if (!ret)
write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+
return ret;
}
+
+static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+ const struct kbase_mmu_hw_op_param *op_param)
+{
+ int ret;
+ u64 lock_addr = 0x0;
+
+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
+ return -EINVAL;
+
+ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param);
+
+ if (!ret)
+ ret = wait_ready(kbdev, as->number);
+
+ if (!ret)
+ mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr,
+ op_param->mmu_sync_info);
+ else
+ dev_err(kbdev->dev, "AS_ACTIVE bit stuck after sending UNLOCK command");
+
+ return ret;
+}
+
int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret = 0;
+
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
+
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+
/* Wait for UNLOCK command to complete */
if (!ret)
ret = wait_ready(kbdev, as->number);
+
+ if (!ret) {
+ u64 lock_addr = 0x0;
+ /* read MMU_AS_CONTROL.LOCKADDR register */
+ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
+ << 32;
+ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO));
+
+ mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK,
+ lock_addr, op_param->mmu_sync_info);
+ }
+
return ret;
}
+
int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret = 0;
u64 lock_addr = 0x0;
+
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
+
ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param);
+
if (!ret)
- ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param);
+ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as,
+ op_param);
+
return ret;
}
+
static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
- const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
+ const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
{
int ret;
u64 lock_addr = 0x0;
u32 mmu_cmd = AS_COMMAND_FLUSH_MEM;
+
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
+
/* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at
* this point would be unexpected.
*/
- if (op_param->op != KBASE_MMU_OP_FLUSH_PT && op_param->op != KBASE_MMU_OP_FLUSH_MEM) {
+ if (op_param->op != KBASE_MMU_OP_FLUSH_PT &&
+ op_param->op != KBASE_MMU_OP_FLUSH_MEM) {
dev_err(kbdev->dev, "Unexpected flush operation received");
return -EINVAL;
}
+
lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
if (op_param->op == KBASE_MMU_OP_FLUSH_PT)
mmu_cmd = AS_COMMAND_FLUSH_PT;
+
/* Lock the region that needs to be updated */
ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param);
if (ret)
return ret;
+
+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here
+ * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is
+ * supported, and this function doesn't gets called for the GPUs where
+ * FLUSH_MEM/PT command is deprecated.
+ */
+ if (mmu_cmd == AS_COMMAND_FLUSH_MEM) {
+ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd,
+ as->number, hwaccess_locked);
+ if (ret)
+ return ret;
+ }
+#endif
+
write_cmd(kbdev, as->number, mmu_cmd);
+
/* Wait for the command to complete */
ret = wait_ready(kbdev, as->number);
+
+ if (!ret)
+ mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr,
+ op_param->mmu_sync_info);
+
return ret;
}
+
int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
+
return mmu_hw_do_flush(kbdev, as, op_param, true);
}
+
int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
return mmu_hw_do_flush(kbdev, as, op_param, false);
}
+int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
+ const struct kbase_mmu_hw_op_param *op_param)
+{
+ int ret, ret2;
+ u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC;
+
+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
+ return -EINVAL;
+
+ /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at
+ * this point would be unexpected.
+ */
+ if (op_param->op != KBASE_MMU_OP_FLUSH_PT &&
+ op_param->op != KBASE_MMU_OP_FLUSH_MEM) {
+ dev_err(kbdev->dev, "Unexpected flush operation received");
+ return -EINVAL;
+ }
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+ if (op_param->op == KBASE_MMU_OP_FLUSH_PT)
+ gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2;
+
+ /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */
+ ret = mmu_hw_do_lock(kbdev, as, op_param);
+ if (ret)
+ return ret;
+
+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */
+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd);
+
+ /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */
+ ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param);
+
+ return ret ?: ret2;
+}
+
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
enum kbase_mmu_fault_type type)
{
diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
index 0dd8a55..ff1d902 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,7 @@
#include "mali_kbase_config_platform.h"
+
static void enable_gpu_power_control(struct kbase_device *kbdev)
{
unsigned int i;
@@ -50,7 +51,6 @@ static void enable_gpu_power_control(struct kbase_device *kbdev)
}
}
-
static void disable_gpu_power_control(struct kbase_device *kbdev)
{
unsigned int i;
@@ -99,9 +99,8 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
#else
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#ifdef KBASE_PM_RUNTIME
error = pm_runtime_get_sync(kbdev->dev);
- enable_gpu_power_control(kbdev);
-
if (error == 1) {
/*
* Let core know that the chip has not been
@@ -109,8 +108,11 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
*/
ret = 0;
}
-
dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+#else
+ enable_gpu_power_control(kbdev);
+#endif /* KBASE_PM_RUNTIME */
+
#endif /* MALI_USE_CSF */
return ret;
@@ -126,7 +128,9 @@ static void pm_callback_power_off(struct kbase_device *kbdev)
WARN_ON(kbdev->pm.backend.gpu_powered);
#if MALI_USE_CSF
if (likely(kbdev->csf.firmware_inited)) {
+#ifdef CONFIG_MALI_DEBUG
WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+#endif
WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -241,7 +245,9 @@ static int pm_callback_runtime_on(struct kbase_device *kbdev)
{
dev_dbg(kbdev->dev, "%s\n", __func__);
+#if !MALI_USE_CSF
enable_gpu_power_control(kbdev);
+#endif
return 0;
}
@@ -249,7 +255,9 @@ static void pm_callback_runtime_off(struct kbase_device *kbdev)
{
dev_dbg(kbdev->dev, "%s\n", __func__);
+#if !MALI_USE_CSF
disable_gpu_power_control(kbdev);
+#endif
}
static void pm_callback_resume(struct kbase_device *kbdev)
diff --git a/mali_kbase/platform/meson/Kbuild b/mali_kbase/platform/meson/Kbuild
new file mode 100644
index 0000000..3f55378
--- /dev/null
+++ b/mali_kbase/platform/meson/Kbuild
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+mali_kbase-y += \
+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_meson.o \
+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o
diff --git a/mali_kbase/platform/meson/mali_kbase_config_meson.c b/mali_kbase/platform/meson/mali_kbase_config_meson.c
new file mode 100644
index 0000000..c999a52
--- /dev/null
+++ b/mali_kbase/platform/meson/mali_kbase_config_meson.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017, 2019, 2021, 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+ return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+ return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+#if MALI_USE_CSF
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation)
+#else
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2])
+#endif
+{
+ return 1;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
diff --git a/mali_kbase/platform/meson/mali_kbase_config_platform.h b/mali_kbase/platform/meson/mali_kbase_config_platform.h
new file mode 100644
index 0000000..06279e2
--- /dev/null
+++ b/mali_kbase/platform/meson/mali_kbase_config_platform.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/mali_kbase/platform/meson/mali_kbase_runtime_pm.c b/mali_kbase/platform/meson/mali_kbase_runtime_pm.c
new file mode 100644
index 0000000..c00cbcb
--- /dev/null
+++ b/mali_kbase/platform/meson/mali_kbase_runtime_pm.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <device/mali_kbase_device.h>
+
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/regulator/consumer.h>
+
+#include "mali_kbase_config_platform.h"
+
+
+static struct reset_control **resets;
+static int nr_resets;
+
+static int resets_init(struct kbase_device *kbdev)
+{
+ struct device_node *np;
+ int i;
+ int err = 0;
+
+ np = kbdev->dev->of_node;
+
+ nr_resets = of_count_phandle_with_args(np, "resets", "#reset-cells");
+ if (nr_resets <= 0) {
+ dev_err(kbdev->dev, "Failed to get GPU resets from dtb\n");
+ return nr_resets;
+ }
+
+ resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets),
+ GFP_KERNEL);
+ if (!resets)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_resets; ++i) {
+ resets[i] = devm_reset_control_get_exclusive_by_index(
+ kbdev->dev, i);
+ if (IS_ERR(resets[i])) {
+ err = PTR_ERR(resets[i]);
+ nr_resets = i;
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int pm_callback_soft_reset(struct kbase_device *kbdev)
+{
+ int ret, i;
+
+ if (!resets) {
+ ret = resets_init(kbdev);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < nr_resets; ++i)
+ reset_control_assert(resets[i]);
+
+ udelay(10);
+
+ for (i = 0; i < nr_resets; ++i)
+ reset_control_deassert(resets[i]);
+
+ udelay(10);
+
+ /* Override Power Management Settings, values from manufacturer's defaults */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1),
+ 0xfff | (0x20 << 16));
+
+ /*
+ * RESET_COMPLETED interrupt will be raised, so continue with
+ * the normal soft reset procedure
+ */
+ return 0;
+}
+
+static void enable_gpu_power_control(struct kbase_device *kbdev)
+{
+ unsigned int i;
+
+#if defined(CONFIG_REGULATOR)
+ for (i = 0; i < kbdev->nr_regulators; i++) {
+ if (WARN_ON(kbdev->regulators[i] == NULL))
+ ;
+ else if (!regulator_is_enabled(kbdev->regulators[i]))
+ WARN_ON(regulator_enable(kbdev->regulators[i]));
+ }
+#endif
+
+ for (i = 0; i < kbdev->nr_clocks; i++) {
+ if (WARN_ON(kbdev->clocks[i] == NULL))
+ ;
+ else if (!__clk_is_enabled(kbdev->clocks[i]))
+ WARN_ON(clk_prepare_enable(kbdev->clocks[i]));
+ }
+}
+
+static void disable_gpu_power_control(struct kbase_device *kbdev)
+{
+ unsigned int i;
+
+ for (i = 0; i < kbdev->nr_clocks; i++) {
+ if (WARN_ON(kbdev->clocks[i] == NULL))
+ ;
+ else if (__clk_is_enabled(kbdev->clocks[i])) {
+ clk_disable_unprepare(kbdev->clocks[i]);
+ WARN_ON(__clk_is_enabled(kbdev->clocks[i]));
+ }
+ }
+
+#if defined(CONFIG_REGULATOR)
+ for (i = 0; i < kbdev->nr_regulators; i++) {
+ if (WARN_ON(kbdev->regulators[i] == NULL))
+ ;
+ else if (regulator_is_enabled(kbdev->regulators[i]))
+ WARN_ON(regulator_disable(kbdev->regulators[i]));
+ }
+#endif
+}
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+ int ret = 1; /* Assume GPU has been powered off */
+ int error;
+
+ dev_dbg(kbdev->dev, "%s %p\n", __func__, (void *)kbdev->dev->pm_domain);
+
+#ifdef KBASE_PM_RUNTIME
+ error = pm_runtime_get_sync(kbdev->dev);
+ if (error == 1) {
+ /*
+ * Let core know that the chip has not been
+ * powered off, so we can save on re-initialization.
+ */
+ ret = 0;
+ }
+ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+#else
+ enable_gpu_power_control(kbdev);
+#endif
+
+ return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+
+#ifdef KBASE_PM_RUNTIME
+ pm_runtime_mark_last_busy(kbdev->dev);
+ pm_runtime_put_autosuspend(kbdev->dev);
+#else
+ /* Power down the GPU immediately as runtime PM is disabled */
+ disable_gpu_power_control(kbdev);
+#endif
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+ int ret = 0;
+
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+
+ pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+ pm_runtime_use_autosuspend(kbdev->dev);
+
+ pm_runtime_set_active(kbdev->dev);
+ pm_runtime_enable(kbdev->dev);
+
+ if (!pm_runtime_enabled(kbdev->dev)) {
+ dev_warn(kbdev->dev, "pm_runtime not enabled");
+ ret = -EINVAL;
+ } else if (atomic_read(&kbdev->dev->power.usage_count)) {
+ dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d",
+ __func__, atomic_read(&kbdev->dev->power.usage_count));
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+
+ if (atomic_read(&kbdev->dev->power.usage_count))
+ dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d",
+ __func__, atomic_read(&kbdev->dev->power.usage_count));
+
+ pm_runtime_disable(kbdev->dev);
+}
+#endif /* KBASE_PM_RUNTIME */
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+
+ enable_gpu_power_control(kbdev);
+ return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+
+ disable_gpu_power_control(kbdev);
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+ int ret = pm_callback_runtime_on(kbdev);
+
+ WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+ pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+ .power_on_callback = pm_callback_power_on,
+ .power_off_callback = pm_callback_power_off,
+ .power_suspend_callback = pm_callback_suspend,
+ .power_resume_callback = pm_callback_resume,
+ .soft_reset_callback = pm_callback_soft_reset,
+#ifdef KBASE_PM_RUNTIME
+ .power_runtime_init_callback = kbase_device_runtime_init,
+ .power_runtime_term_callback = kbase_device_runtime_disable,
+ .power_runtime_on_callback = pm_callback_runtime_on,
+ .power_runtime_off_callback = pm_callback_runtime_off,
+#else /* KBASE_PM_RUNTIME */
+ .power_runtime_init_callback = NULL,
+ .power_runtime_term_callback = NULL,
+ .power_runtime_on_callback = NULL,
+ .power_runtime_off_callback = NULL,
+#endif /* KBASE_PM_RUNTIME */
+};
diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
index 44a55d9..7a0885c 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
@@ -116,7 +116,7 @@ static void get_fw_trace(struct kbase_device *kbdev, struct sscd_segment *seg)
.version = 1,
};
- tb = kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
+ tb = kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "pixel: failed to open firmware trace buffer");
diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild
index ee3de7b..38e4dd4 100644
--- a/mali_kbase/tests/Kbuild
+++ b/mali_kbase/tests/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -27,4 +27,5 @@ subdir-ccflags-y += -I$(src)/include \
obj-$(CONFIG_MALI_KUTF) += kutf/
obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/
obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/
+obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/
diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig
index a86e1ce..e6f0376 100644
--- a/mali_kbase/tests/Kconfig
+++ b/mali_kbase/tests/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,18 @@ config MALI_KUTF_CLK_RATE_TRACE
Modules:
- mali_kutf_clk_rate_trace_test_portal.ko
+config MALI_KUTF_MGM_INTEGRATION_TEST
+ bool "Build Mali KUTF MGM integration test module"
+ depends on MALI_KUTF
+ default y
+ help
+ This option will build the MGM integration test module.
+ It can test the implementation of PTE translation for specific
+ group ids.
+
+ Modules:
+ - mali_kutf_mgm_integration_test.ko
+
comment "Enable MALI_DEBUG for KUTF modules support"
depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF
diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig
index 167facd..4203971 100644
--- a/mali_kbase/tests/Mconfig
+++ b/mali_kbase/tests/Mconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,18 @@ config MALI_KUTF_CLK_RATE_TRACE
Modules:
- mali_kutf_clk_rate_trace_test_portal.ko
+config MALI_KUTF_MGM_INTEGRATION_TEST
+ bool "Build Mali KUTF MGM integration test module"
+ depends on MALI_KUTF
+ default y
+ help
+ This option will build the MGM integration test module.
+ It can test the implementation of PTE translation for specific
+ group ids.
+
+ Modules:
+ - mali_kutf_mgm_integration_test.ko
+
# Enable MALI_DEBUG for KUTF modules support
diff --git a/mali_kbase/tests/kutf/kutf_helpers_user.c b/mali_kbase/tests/kutf/kutf_helpers_user.c
index f88e138..c4e2943 100644
--- a/mali_kbase/tests/kutf/kutf_helpers_user.c
+++ b/mali_kbase/tests/kutf/kutf_helpers_user.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
#include <linux/slab.h>
#include <linux/export.h>
-const char *valtype_names[] = {
+static const char *const valtype_names[] = {
"INVALID",
"U64",
"STR",
diff --git a/mali_kbase/tests/kutf/kutf_suite.c b/mali_kbase/tests/kutf/kutf_suite.c
index 91065b5..4468066 100644
--- a/mali_kbase/tests/kutf/kutf_suite.c
+++ b/mali_kbase/tests/kutf/kutf_suite.c
@@ -106,22 +106,16 @@ struct kutf_convert_table {
enum kutf_result_status result;
};
-struct kutf_convert_table kutf_convert[] = {
-#define ADD_UTF_RESULT(_name) \
-{ \
- #_name, \
- _name, \
-},
-ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK)
-ADD_UTF_RESULT(KUTF_RESULT_SKIP)
-ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN)
-ADD_UTF_RESULT(KUTF_RESULT_PASS)
-ADD_UTF_RESULT(KUTF_RESULT_DEBUG)
-ADD_UTF_RESULT(KUTF_RESULT_INFO)
-ADD_UTF_RESULT(KUTF_RESULT_WARN)
-ADD_UTF_RESULT(KUTF_RESULT_FAIL)
-ADD_UTF_RESULT(KUTF_RESULT_FATAL)
-ADD_UTF_RESULT(KUTF_RESULT_ABORT)
+static const struct kutf_convert_table kutf_convert[] = {
+#define ADD_UTF_RESULT(_name) \
+ { \
+#_name, _name, \
+ }
+ ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK), ADD_UTF_RESULT(KUTF_RESULT_SKIP),
+ ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN), ADD_UTF_RESULT(KUTF_RESULT_PASS),
+ ADD_UTF_RESULT(KUTF_RESULT_DEBUG), ADD_UTF_RESULT(KUTF_RESULT_INFO),
+ ADD_UTF_RESULT(KUTF_RESULT_WARN), ADD_UTF_RESULT(KUTF_RESULT_FAIL),
+ ADD_UTF_RESULT(KUTF_RESULT_FATAL), ADD_UTF_RESULT(KUTF_RESULT_ABORT),
};
#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert))
@@ -191,8 +185,7 @@ static void kutf_set_expected_result(struct kutf_context *context,
*
* Return: 1 if test result was successfully converted to string, 0 otherwise
*/
-static int kutf_result_to_string(char **result_str,
- enum kutf_result_status result)
+static int kutf_result_to_string(const char **result_str, enum kutf_result_status result)
{
int i;
int ret = 0;
@@ -382,7 +375,7 @@ static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf,
struct kutf_result *res;
unsigned long bytes_not_copied;
ssize_t bytes_copied = 0;
- char *kutf_str_ptr = NULL;
+ const char *kutf_str_ptr = NULL;
size_t kutf_str_len = 0;
size_t message_len = 0;
char separator = ':';
@@ -599,11 +592,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func,
goto fail_file;
}
-#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE
tmp = debugfs_create_file_unsafe(
-#else
- tmp = debugfs_create_file(
-#endif
"run", 0600, test_fix->dir,
test_fix,
&kutf_debugfs_run_ops);
diff --git a/mali_kbase/tests/kutf/kutf_utils.c b/mali_kbase/tests/kutf/kutf_utils.c
index 2ae1510..21f5fad 100644
--- a/mali_kbase/tests/kutf/kutf_utils.c
+++ b/mali_kbase/tests/kutf/kutf_utils.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,7 @@
static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN];
-DEFINE_MUTEX(buffer_lock);
+static DEFINE_MUTEX(buffer_lock);
const char *kutf_dsprintf(struct kutf_mempool *pool,
const char *fmt, ...)
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index 935f8ca..2d7289d 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -46,7 +46,7 @@
#define MINOR_FOR_FIRST_KBASE_DEV (-1)
/* KUTF test application pointer for this test */
-struct kutf_application *kutf_app;
+static struct kutf_application *kutf_app;
enum portal_server_state {
PORTAL_STATE_NO_CLK,
@@ -113,7 +113,7 @@ struct kbasep_cmd_name_pair {
const char *name;
};
-struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = {
+static const struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = {
{ PORTAL_CMD_GET_PLATFORM, GET_PLATFORM },
{ PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR },
{ PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE },
@@ -128,7 +128,7 @@ struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = {
* this pointer is engaged, new requests for create fixture will fail
* hence limiting the use of the portal at any time to a singleton.
*/
-struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data;
+static struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data;
#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN)
static char portal_msg_buf[PORTAL_MSG_LEN];
@@ -825,14 +825,14 @@ static void *mali_kutf_clk_rate_trace_create_fixture(
if (!data)
return NULL;
- *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 };
+ *data = (const struct kutf_clk_rate_trace_fixture_data){ NULL };
pr_debug("Hooking up the test portal to kbdev clk rate trace\n");
spin_lock(&kbdev->pm.clk_rtm.lock);
if (g_ptr_portal_data != NULL) {
pr_warn("Test portal is already in use, run aborted\n");
- kutf_test_fail(context, "Portal allows single session only");
spin_unlock(&kbdev->pm.clk_rtm.lock);
+ kutf_test_fail(context, "Portal allows single session only");
return NULL;
}
@@ -909,7 +909,7 @@ static int __init mali_kutf_clk_rate_trace_test_module_init(void)
{
struct kutf_suite *suite;
unsigned int filters;
- union kutf_callback_data suite_data = { 0 };
+ union kutf_callback_data suite_data = { NULL };
pr_debug("Creating app\n");
diff --git a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
index 5824a4c..2d6e689 100644
--- a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
+++ b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -40,7 +40,7 @@
*/
/* KUTF test application pointer for this test */
-struct kutf_application *irq_app;
+static struct kutf_application *irq_app;
/**
* struct kutf_irq_fixture_data - test fixture used by the test functions.
diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild b/mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild
new file mode 100644
index 0000000..e9bff98
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/Kbuild
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+ifeq ($(CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST),y)
+obj-m += mali_kutf_mgm_integration_test.o
+
+mali_kutf_mgm_integration_test-y := mali_kutf_mgm_integration_test_main.o
+endif
diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp b/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp
new file mode 100644
index 0000000..2e4a083
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+bob_kernel_module {
+ name: "mali_kutf_mgm_integration_test",
+ defaults: [
+ "mali_kbase_shared_config_defaults",
+ "kernel_test_configs",
+ "kernel_test_includes",
+ ],
+ srcs: [
+ "Kbuild",
+ "mali_kutf_mgm_integration_test_main.c",
+ ],
+ extra_symbols: [
+ "mali_kbase",
+ "kutf",
+ ],
+ enabled: false,
+ mali_kutf_mgm_integration_test: {
+ kbuild_options: ["CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST=y"],
+ enabled: true,
+ },
+} \ No newline at end of file
diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c
new file mode 100644
index 0000000..5a42bd6
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#include <linux/module.h>
+#include "mali_kbase.h"
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_helpers_user.h>
+
+#define MINOR_FOR_FIRST_KBASE_DEV (-1)
+
+#define BASE_MEM_GROUP_COUNT (16)
+#define PA_MAX ((1ULL << 48) - 1)
+#define PA_START_BIT 12
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+
+#define ENTRY_IS_ATE_L3 3ULL
+#define ENTRY_IS_ATE_L02 1ULL
+
+#define MGM_INTEGRATION_SUITE_NAME "mgm_integration"
+#define MGM_INTEGRATION_PTE_TRANSLATION "pte_translation"
+
+static char msg_buf[KUTF_MAX_LINE_LENGTH];
+
+/* KUTF test application pointer for this test */
+struct kutf_application *mgm_app;
+
+/**
+ * struct kutf_mgm_fixture_data - test fixture used by test functions
+ * @kbdev: kbase device for the GPU.
+ * @group_id: Memory group ID to test based on fixture index.
+ */
+struct kutf_mgm_fixture_data {
+ struct kbase_device *kbdev;
+ int group_id;
+};
+
+/**
+ * mali_kutf_mgm_pte_translation_test() - Tests forward and reverse translation
+ * of PTE by the MGM module
+ * @context: KUTF context within which to perform the test.
+ *
+ * This test creates PTEs with physical addresses in the range
+ * 0x0000-0xFFFFFFFFF000 and tests that mgm_update_gpu_pte() returns a different
+ * PTE and mgm_pte_to_original_pte() returns the original PTE. This is tested
+ * at MMU level 2 and 3 as mgm_update_gpu_pte() is called for ATEs only.
+ *
+ * This test is run for a specific group_id depending on the fixture_id.
+ */
+static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context)
+{
+ struct kutf_mgm_fixture_data *data = context->fixture;
+ struct kbase_device *kbdev = data->kbdev;
+ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
+ u64 addr;
+
+ for (addr = 1 << (PA_START_BIT - 1); addr <= PA_MAX; addr <<= 1) {
+ /* Mask 1 << 11 by ~0xFFF to get 0x0000 at first iteration */
+ phys_addr_t pa = addr;
+ u8 mmu_level;
+
+ /* Test MMU level 3 and 2 (2MB pages) only */
+ for (mmu_level = MIDGARD_MMU_LEVEL(2); mmu_level <= MIDGARD_MMU_LEVEL(3);
+ mmu_level++) {
+ u64 translated_pte;
+ u64 returned_pte;
+ u64 original_pte;
+
+ if (mmu_level == MIDGARD_MMU_LEVEL(3))
+ original_pte =
+ (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3;
+ else
+ original_pte =
+ (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02;
+
+ dev_dbg(kbdev->dev, "Testing group_id=%u, mmu_level=%u, pte=0x%llx\n",
+ data->group_id, mmu_level, original_pte);
+
+ translated_pte = mgm_dev->ops.mgm_update_gpu_pte(mgm_dev, data->group_id,
+ mmu_level, original_pte);
+ if (translated_pte == original_pte) {
+ snprintf(
+ msg_buf, sizeof(msg_buf),
+ "PTE unchanged. translated_pte (0x%llx) == original_pte (0x%llx) for mmu_level=%u, group_id=%d",
+ translated_pte, original_pte, mmu_level, data->group_id);
+ kutf_test_fail(context, msg_buf);
+ return;
+ }
+
+ returned_pte = mgm_dev->ops.mgm_pte_to_original_pte(
+ mgm_dev, data->group_id, mmu_level, translated_pte);
+ dev_dbg(kbdev->dev, "\treturned_pte=%llx\n", returned_pte);
+
+ if (returned_pte != original_pte) {
+ snprintf(
+ msg_buf, sizeof(msg_buf),
+ "Original PTE not returned. returned_pte (0x%llx) != origin al_pte (0x%llx) for mmu_level=%u, group_id=%d",
+ returned_pte, original_pte, mmu_level, data->group_id);
+ kutf_test_fail(context, msg_buf);
+ return;
+ }
+ }
+ }
+ snprintf(msg_buf, sizeof(msg_buf), "Translation passed for group_id=%d", data->group_id);
+ kutf_test_pass(context, msg_buf);
+}
+
+/**
+ * mali_kutf_mgm_integration_create_fixture() - Creates the fixture data
+ * required for all tests in the mgm integration suite.
+ * @context: KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_mgm_integration_create_fixture(struct kutf_context *context)
+{
+ struct kutf_mgm_fixture_data *data;
+ struct kbase_device *kbdev;
+
+ pr_debug("Finding kbase device\n");
+ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV);
+ if (kbdev == NULL) {
+ kutf_test_fail(context, "Failed to find kbase device");
+ return NULL;
+ }
+ pr_debug("Creating fixture\n");
+
+ data = kutf_mempool_alloc(&context->fixture_pool, sizeof(struct kutf_mgm_fixture_data));
+ if (!data)
+ return NULL;
+ data->kbdev = kbdev;
+ data->group_id = context->fixture_index;
+
+ pr_debug("Fixture created\n");
+ return data;
+}
+
+/**
+ * mali_kutf_mgm_integration_remove_fixture() - Destroy fixture data previously
+ * created by mali_kutf_mgm_integration_create_fixture.
+ * @context: KUTF context.
+ */
+static void mali_kutf_mgm_integration_remove_fixture(struct kutf_context *context)
+{
+ struct kutf_mgm_fixture_data *data = context->fixture;
+ struct kbase_device *kbdev = data->kbdev;
+
+ kbase_release_device(kbdev);
+}
+
+/**
+ * mali_kutf_mgm_integration_test_main_init() - Module entry point for this test.
+ *
+ * Return: 0 on success, error code on failure.
+ */
+static int __init mali_kutf_mgm_integration_test_main_init(void)
+{
+ struct kutf_suite *suite;
+
+ mgm_app = kutf_create_application("mgm");
+
+ if (mgm_app == NULL) {
+ pr_warn("Creation of mgm KUTF app failed!\n");
+ return -ENOMEM;
+ }
+ suite = kutf_create_suite(mgm_app, MGM_INTEGRATION_SUITE_NAME, BASE_MEM_GROUP_COUNT,
+ mali_kutf_mgm_integration_create_fixture,
+ mali_kutf_mgm_integration_remove_fixture);
+ if (suite == NULL) {
+ pr_warn("Creation of %s suite failed!\n", MGM_INTEGRATION_SUITE_NAME);
+ kutf_destroy_application(mgm_app);
+ return -ENOMEM;
+ }
+ kutf_add_test(suite, 0x0, MGM_INTEGRATION_PTE_TRANSLATION,
+ mali_kutf_mgm_pte_translation_test);
+ return 0;
+}
+
+/**
+ * mali_kutf_mgm_integration_test_main_exit() - Module exit point for this test.
+ */
+static void __exit mali_kutf_mgm_integration_test_main_exit(void)
+{
+ kutf_destroy_application(mgm_app);
+}
+
+module_init(mali_kutf_mgm_integration_test_main_init);
+module_exit(mali_kutf_mgm_integration_test_main_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index d656c03..09de3f0 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,6 @@
#include <mali_kbase.h>
#include <mali_kbase_jm.h>
-#include <linux/anon_inodes.h>
#include <linux/atomic.h>
#include <linux/file.h>
#include <linux/mutex.h>
@@ -35,7 +34,7 @@
#include <linux/stringify.h>
#include <linux/timer.h>
#include <linux/wait.h>
-
+#include <linux/delay.h>
/* The period of autoflush checker execution in milliseconds. */
#define AUTOFLUSH_INTERVAL 1000 /* ms */
@@ -184,90 +183,109 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev)
}
#endif /* CONFIG_MALI_DEVFREQ */
-int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
+int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags)
{
- int ret = 0;
+ int err = 0;
u32 timeline_flags = TLSTREAM_ENABLED | flags;
- struct kbase_timeline *timeline = kbdev->timeline;
+ struct kbase_timeline *timeline;
+ int rcode;
+
+ if (WARN_ON(!kbdev) || WARN_ON(flags & ~BASE_TLSTREAM_FLAGS_MASK))
+ return -EINVAL;
+
+ timeline = kbdev->timeline;
+ if (WARN_ON(!timeline))
+ return -EFAULT;
- if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) {
- int rcode;
+ if (atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags))
+ return -EBUSY;
#if MALI_USE_CSF
- if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) {
- ret = kbase_csf_tl_reader_start(
- &timeline->csf_tl_reader, kbdev);
- if (ret) {
- atomic_set(timeline->timeline_flags, 0);
- return ret;
- }
- }
-#endif
- ret = anon_inode_getfd(
- "[mali_tlstream]",
- &kbasep_tlstream_fops,
- timeline,
- O_RDONLY | O_CLOEXEC);
- if (ret < 0) {
+ if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) {
+ err = kbase_csf_tl_reader_start(&timeline->csf_tl_reader, kbdev);
+ if (err) {
atomic_set(timeline->timeline_flags, 0);
-#if MALI_USE_CSF
- kbase_csf_tl_reader_stop(&timeline->csf_tl_reader);
-#endif
- return ret;
+ return err;
}
+ }
+#endif
- /* Reset and initialize header streams. */
- kbase_tlstream_reset(
- &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]);
+ /* Reset and initialize header streams. */
+ kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]);
- timeline->obj_header_btc = obj_desc_header_size;
- timeline->aux_header_btc = aux_desc_header_size;
+ timeline->obj_header_btc = obj_desc_header_size;
+ timeline->aux_header_btc = aux_desc_header_size;
#if !MALI_USE_CSF
- /* If job dumping is enabled, readjust the software event's
- * timeout as the default value of 3 seconds is often
- * insufficient.
- */
- if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
- dev_info(kbdev->dev,
- "Job dumping is enabled, readjusting the software event's timeout\n");
- atomic_set(&kbdev->js_data.soft_job_timeout_ms,
- 1800000);
- }
+ /* If job dumping is enabled, readjust the software event's
+ * timeout as the default value of 3 seconds is often
+ * insufficient.
+ */
+ if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+ dev_info(kbdev->dev,
+ "Job dumping is enabled, readjusting the software event's timeout\n");
+ atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000);
+ }
#endif /* !MALI_USE_CSF */
- /* Summary stream was cleared during acquire.
- * Create static timeline objects that will be
- * read by client.
- */
- kbase_create_timeline_objects(kbdev);
+ /* Summary stream was cleared during acquire.
+ * Create static timeline objects that will be
+ * read by client.
+ */
+ kbase_create_timeline_objects(kbdev);
#ifdef CONFIG_MALI_DEVFREQ
- /* Devfreq target tracepoints are only fired when the target
- * changes, so we won't know the current target unless we
- * send it now.
- */
- kbase_tlstream_current_devfreq_target(kbdev);
+ /* Devfreq target tracepoints are only fired when the target
+ * changes, so we won't know the current target unless we
+ * send it now.
+ */
+ kbase_tlstream_current_devfreq_target(kbdev);
#endif /* CONFIG_MALI_DEVFREQ */
- /* Start the autoflush timer.
- * We must do this after creating timeline objects to ensure we
- * don't auto-flush the streams which will be reset during the
- * summarization process.
- */
- atomic_set(&timeline->autoflush_timer_active, 1);
- rcode = mod_timer(&timeline->autoflush_timer,
- jiffies +
- msecs_to_jiffies(AUTOFLUSH_INTERVAL));
- CSTD_UNUSED(rcode);
- } else {
- ret = -EBUSY;
- }
+ /* Start the autoflush timer.
+ * We must do this after creating timeline objects to ensure we
+ * don't auto-flush the streams which will be reset during the
+ * summarization process.
+ */
+ atomic_set(&timeline->autoflush_timer_active, 1);
+ rcode = mod_timer(&timeline->autoflush_timer,
+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+ CSTD_UNUSED(rcode);
+
+ timeline->last_acquire_time = ktime_get_raw();
+
+ return err;
+}
+
+void kbase_timeline_release(struct kbase_timeline *timeline)
+{
+ ktime_t elapsed_time;
+ s64 elapsed_time_ms, time_to_sleep;
+
+ if (WARN_ON(!timeline) || WARN_ON(!atomic_read(timeline->timeline_flags)))
+ return;
+
+ /* Get the amount of time passed since the timeline was acquired and ensure
+ * we sleep for long enough such that it has been at least
+ * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release.
+ * This prevents userspace from spamming acquire and release too quickly.
+ */
+ elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time);
+ elapsed_time_ms = ktime_to_ms(elapsed_time);
+ time_to_sleep = (elapsed_time_ms < 0 ? TIMELINE_HYSTERESIS_TIMEOUT_MS :
+ TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms);
+ if (time_to_sleep > 0)
+ msleep_interruptible(time_to_sleep);
- if (ret >= 0)
- timeline->last_acquire_time = ktime_get();
+#if MALI_USE_CSF
+ kbase_csf_tl_reader_stop(&timeline->csf_tl_reader);
+#endif
- return ret;
+ /* Stop autoflush timer before releasing access to streams. */
+ atomic_set(&timeline->autoflush_timer_active, 0);
+ del_timer_sync(&timeline->autoflush_timer);
+
+ atomic_set(timeline->timeline_flags, 0);
}
int kbase_timeline_streams_flush(struct kbase_timeline *timeline)
@@ -275,11 +293,17 @@ int kbase_timeline_streams_flush(struct kbase_timeline *timeline)
enum tl_stream_type stype;
bool has_bytes = false;
size_t nbytes = 0;
+
+ if (WARN_ON(!timeline))
+ return -EINVAL;
+
#if MALI_USE_CSF
- int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader);
+ {
+ int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader);
- if (ret > 0)
- has_bytes = true;
+ if (ret > 0)
+ has_bytes = true;
+ }
#endif
for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
diff --git a/mali_kbase/tl/mali_kbase_timeline.h b/mali_kbase/tl/mali_kbase_timeline.h
index 96a4b18..62be6c6 100644
--- a/mali_kbase/tl/mali_kbase_timeline.h
+++ b/mali_kbase/tl/mali_kbase_timeline.h
@@ -117,4 +117,12 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx);
void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated);
#endif /* MALI_UNIT_TEST */
+/**
+ * kbase_timeline_io_debugfs_init - Add a debugfs entry for reading timeline stream data
+ *
+ * @kbdev: An instance of the GPU platform device, allocated from the probe
+ * method of the driver.
+ */
+void kbase_timeline_io_debugfs_init(struct kbase_device *kbdev);
+
#endif /* _KBASE_TIMELINE_H */
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 3391e75..03178cc 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -24,27 +24,20 @@
#include "mali_kbase_tracepoints.h"
#include "mali_kbase_timeline.h"
-#include <linux/delay.h>
+#include <device/mali_kbase_device.h>
+
#include <linux/poll.h>
+#include <linux/version_compat_defs.h>
+#include <linux/anon_inodes.h>
/* The timeline stream file operations functions. */
static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
size_t size, loff_t *f_pos);
-static unsigned int kbasep_timeline_io_poll(struct file *filp,
- poll_table *wait);
+static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait);
static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
int datasync);
-/* The timeline stream file operations structure. */
-const struct file_operations kbasep_tlstream_fops = {
- .owner = THIS_MODULE,
- .release = kbasep_timeline_io_release,
- .read = kbasep_timeline_io_read,
- .poll = kbasep_timeline_io_poll,
- .fsync = kbasep_timeline_io_fsync,
-};
-
/**
* kbasep_timeline_io_packet_pending - check timeline streams for pending
* packets
@@ -292,7 +285,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
*
* Return: POLLIN if data can be read without blocking, otherwise zero
*/
-static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
+static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
{
struct kbase_tlstream *stream;
unsigned int rb_idx;
@@ -302,20 +295,90 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
KBASE_DEBUG_ASSERT(wait);
if (WARN_ON(!filp->private_data))
- return -EFAULT;
+ return (__force __poll_t)-EFAULT;
timeline = (struct kbase_timeline *)filp->private_data;
/* If there are header bytes to copy, read will not block */
if (kbasep_timeline_has_header_data(timeline))
- return POLLIN;
+ return (__force __poll_t)POLLIN;
poll_wait(filp, &timeline->event_queue, wait);
if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx))
- return POLLIN;
+ return (__force __poll_t)POLLIN;
return 0;
}
+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
+{
+ /* The timeline stream file operations structure. */
+ static const struct file_operations kbasep_tlstream_fops = {
+ .owner = THIS_MODULE,
+ .release = kbasep_timeline_io_release,
+ .read = kbasep_timeline_io_read,
+ .poll = kbasep_timeline_io_poll,
+ .fsync = kbasep_timeline_io_fsync,
+ };
+ int err;
+
+ if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK))
+ return -EINVAL;
+
+ err = kbase_timeline_acquire(kbdev, flags);
+ if (err)
+ return err;
+
+ err = anon_inode_getfd("[mali_tlstream]", &kbasep_tlstream_fops, kbdev->timeline,
+ O_RDONLY | O_CLOEXEC);
+ if (err < 0)
+ kbase_timeline_release(kbdev->timeline);
+
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static int kbasep_timeline_io_open(struct inode *in, struct file *file)
+{
+ struct kbase_device *const kbdev = in->i_private;
+
+ if (WARN_ON(!kbdev))
+ return -EFAULT;
+
+ file->private_data = kbdev->timeline;
+ return kbase_timeline_acquire(kbdev, BASE_TLSTREAM_FLAGS_MASK &
+ ~BASE_TLSTREAM_JOB_DUMPING_ENABLED);
+}
+
+void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev)
+{
+ static const struct file_operations kbasep_tlstream_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = kbasep_timeline_io_open,
+ .release = kbasep_timeline_io_release,
+ .read = kbasep_timeline_io_read,
+ .poll = kbasep_timeline_io_poll,
+ .fsync = kbasep_timeline_io_fsync,
+ };
+ struct dentry *file;
+
+ if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
+ return;
+
+ file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev,
+ &kbasep_tlstream_debugfs_fops);
+
+ if (IS_ERR_OR_NULL(file))
+ dev_warn(kbdev->dev, "Unable to create timeline debugfs entry");
+}
+#else
+/*
+ * Stub function for when debugfs is disabled
+ */
+void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev)
+{
+}
+#endif
+
/**
* kbasep_timeline_io_release - release timeline stream descriptor
* @inode: Pointer to inode structure
@@ -325,55 +388,18 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
*/
static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
{
- struct kbase_timeline *timeline;
- ktime_t elapsed_time;
- s64 elapsed_time_ms, time_to_sleep;
-
- KBASE_DEBUG_ASSERT(inode);
- KBASE_DEBUG_ASSERT(filp);
- KBASE_DEBUG_ASSERT(filp->private_data);
-
CSTD_UNUSED(inode);
- timeline = (struct kbase_timeline *)filp->private_data;
-
- /* Get the amount of time passed since the timeline was acquired and ensure
- * we sleep for long enough such that it has been at least
- * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release.
- * This prevents userspace from spamming acquire and release too quickly.
- */
- elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time);
- elapsed_time_ms = ktime_to_ms(elapsed_time);
- time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS,
- TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms);
- if (time_to_sleep > 0)
- msleep(time_to_sleep);
-
-#if MALI_USE_CSF
- kbase_csf_tl_reader_stop(&timeline->csf_tl_reader);
-#endif
-
- /* Stop autoflush timer before releasing access to streams. */
- atomic_set(&timeline->autoflush_timer_active, 0);
- del_timer_sync(&timeline->autoflush_timer);
-
- atomic_set(timeline->timeline_flags, 0);
+ kbase_timeline_release(filp->private_data);
return 0;
}
static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
int datasync)
{
- struct kbase_timeline *timeline;
-
CSTD_UNUSED(start);
CSTD_UNUSED(end);
CSTD_UNUSED(datasync);
- if (WARN_ON(!filp->private_data))
- return -EFAULT;
-
- timeline = (struct kbase_timeline *)filp->private_data;
-
- return kbase_timeline_streams_flush(timeline);
+ return kbase_timeline_streams_flush(filp->private_data);
}
diff --git a/mali_kbase/tl/mali_kbase_timeline_priv.h b/mali_kbase/tl/mali_kbase_timeline_priv.h
index bf2c385..de30bcc 100644
--- a/mali_kbase/tl/mali_kbase_timeline_priv.h
+++ b/mali_kbase/tl/mali_kbase_timeline_priv.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,7 @@
* @event_queue: Timeline stream event queue
* @bytes_collected: Number of bytes read by user
* @timeline_flags: Zero, if timeline is disabled. Timeline stream flags
- * otherwise. See kbase_timeline_io_acquire().
+ * otherwise. See kbase_timeline_acquire().
* @obj_header_btc: Remaining bytes to copy for the object stream header
* @aux_header_btc: Remaining bytes to copy for the aux stream header
* @last_acquire_time: The time at which timeline was last acquired.
@@ -77,8 +77,27 @@ struct kbase_timeline {
#endif
};
-extern const struct file_operations kbasep_tlstream_fops;
-
void kbase_create_timeline_objects(struct kbase_device *kbdev);
+/**
+ * kbase_timeline_acquire - acquire timeline for a userspace client.
+ * @kbdev: An instance of the GPU platform device, allocated from the probe
+ * method of the driver.
+ * @flags: Timeline stream flags
+ *
+ * Each timeline instance can be acquired by only one userspace client at a time.
+ *
+ * Return: Zero on success, error number on failure (e.g. if already acquired).
+ */
+int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags);
+
+/**
+ * kbase_timeline_release - release timeline for a userspace client.
+ * @timeline: Timeline instance to be stopped. It must be previously acquired
+ * with kbase_timeline_acquire().
+ *
+ * Releasing the timeline instance allows it to be acquired by another userspace client.
+ */
+void kbase_timeline_release(struct kbase_timeline *timeline);
+
#endif /* _KBASE_TIMELINE_PRIV_H */
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index 6aae4e0..3ac7850 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -305,11 +305,11 @@ enum tl_msg_id_obj {
"@p", \
"atom") \
TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \
- "Within function jd_done_nolock", \
+ "Within function kbase_jd_done_nolock", \
"@p", \
"atom") \
TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \
- "Within function jd_done_nolock - end", \
+ "Within function kbase_jd_done_nolock - end", \
"@p", \
"atom") \
TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index b15fe6a..f01fc54 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1686,7 +1686,7 @@ struct kbase_tlstream;
} while (0)
/**
- * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function jd_done_nolock
+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function kbase_jd_done_nolock
*
* @kbdev: Kbase device
* @atom: Atom identifier
@@ -1705,7 +1705,7 @@ struct kbase_tlstream;
} while (0)
/**
- * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function jd_done_nolock - end
+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function kbase_jd_done_nolock - end
*
* @kbdev: Kbase device
* @atom: Atom identifier
diff --git a/mali_pixel/memory_group_manager.c b/mali_pixel/memory_group_manager.c
index faa414e..436404e 100644
--- a/mali_pixel/memory_group_manager.c
+++ b/mali_pixel/memory_group_manager.c
@@ -549,7 +549,7 @@ static u64 mgm_update_gpu_pte(
switch (group_id) {
case MGM_RESERVED_GROUP_ID:
- case MGM_IMPORTED_MEMORY_GROUP_ID:
+ case MGM_IMPORTED_MEMORY_GROUP_ID:
/* The reserved group doesn't set PBHA bits */
/* TODO: Determine what to do with imported memory */
break;
@@ -745,13 +745,14 @@ static int memory_group_manager_probe(struct platform_device *pdev)
return -ENOMEM;
mgm_dev->owner = THIS_MODULE;
- mgm_dev->ops.mgm_alloc_page = mgm_alloc_page;
- mgm_dev->ops.mgm_free_page = mgm_free_page;
- mgm_dev->ops.mgm_get_import_memory_id =
- mgm_get_import_memory_id;
- mgm_dev->ops.mgm_vmf_insert_pfn_prot = mgm_vmf_insert_pfn_prot;
- mgm_dev->ops.mgm_update_gpu_pte = mgm_update_gpu_pte;
- mgm_dev->ops.mgm_pte_to_original_pte = mgm_pte_to_original_pte;
+ mgm_dev->ops = (struct memory_group_manager_ops){
+ .mgm_alloc_page = mgm_alloc_page,
+ .mgm_free_page = mgm_free_page,
+ .mgm_get_import_memory_id = mgm_get_import_memory_id,
+ .mgm_update_gpu_pte = mgm_update_gpu_pte,
+ .mgm_pte_to_original_pte = mgm_pte_to_original_pte,
+ .mgm_vmf_insert_pfn_prot = mgm_vmf_insert_pfn_prot,
+ };
mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL);
if (!mgm_data) {