summaryrefslogtreecommitdiff
path: root/mali_kbase
diff options
context:
space:
mode:
authorDebarshi Dutta <debarshid@google.com>2023-06-02 13:36:22 +0000
committerDebarshi Dutta <debarshid@google.com>2023-07-12 18:55:15 +0000
commit20fff721667a227b3d6decf9dbc3798476390302 (patch)
treefba7129be28198dc2af1fb34fe0ec3a9ec0ce572 /mali_kbase
parent9e12ba5986f91fa0192b1ab55fafcea5e9b37094 (diff)
downloadgpu-20fff721667a227b3d6decf9dbc3798476390302.tar.gz
Merge upstream DDK R43P0 KMD
Merge DDK version R43P0 from upstream branch Provenance: 48a9c7e25986318c8475bc245de51e7bec2606e8 (ipdelivery/EAC/v_r43p0) VX504X08X-BU-00000-r43p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r43p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r43p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r43p0-01eac0 - Valhall Android Renderscript AOSP parts Bug 278174418 Commit-Topic: R43P0_KMD Signed-off-by: Debarshi Dutta <debarshid@google.com> Change-Id: I84fb19e7ce5f28e735d44a4993d51bd985aac80b
Diffstat (limited to 'mali_kbase')
-rw-r--r--mali_kbase/Kbuild38
-rw-r--r--mali_kbase/Kconfig99
-rw-r--r--mali_kbase/Makefile92
-rw-r--r--mali_kbase/Mconfig98
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbif.c13
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_interface.h170
-rw-r--r--mali_kbase/arbitration/Kconfig49
-rw-r--r--mali_kbase/arbitration/ptm/Kconfig28
-rw-r--r--mali_kbase/backend/gpu/Kbuild10
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c51
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h21
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c1
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_defs.h4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_irq_linux.c13
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_as.c8
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c79
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h47
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c130
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.h14
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c423
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.h29
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_error_generator.c28
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.c33
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.h125
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c55
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_ca.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c181
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h23
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h16
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_metrics.c4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c79
-rw-r--r--mali_kbase/build.bp45
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_csf.c5
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_jm.c5
-rw-r--r--mali_kbase/context/mali_kbase_context.c20
-rw-r--r--mali_kbase/context/mali_kbase_context.h15
-rw-r--r--mali_kbase/csf/Kbuild15
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c4
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c1136
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h43
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.c204
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.h9
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h365
-rw-r--r--mali_kbase/csf/mali_kbase_csf_event.c11
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c667
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h111
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.c62
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.h16
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c809
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h65
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_log.c156
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_log.h21
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c150
-rw-r--r--mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c50
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c331
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h28
-rw-r--r--mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c817
-rw-r--r--mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h139
-rw-r--r--mali_kbase/csf/mali_kbase_csf_registers.h133
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c9
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c1357
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h39
-rw-r--r--mali_kbase/csf/mali_kbase_csf_sync_debugfs.c788
-rw-r--r--mali_kbase/csf/mali_kbase_csf_sync_debugfs.h (renamed from mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h)23
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c172
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.h20
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h48
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c371
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h80
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c91
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.h34
-rw-r--r--mali_kbase/csf/mali_kbase_debug_csf_fault.c271
-rw-r--r--mali_kbase/csf/mali_kbase_debug_csf_fault.h137
-rw-r--r--mali_kbase/debug/Kbuild3
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c851
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h182
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h37
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h23
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace_codes.h7
-rw-r--r--mali_kbase/debug/mali_kbase_debug_linux_ktrace.h5
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c42
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c27
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_jm.c4
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c33
-rw-r--r--mali_kbase/device/mali_kbase_device.c27
-rw-r--r--mali_kbase/device/mali_kbase_device.h6
-rw-r--r--mali_kbase/device/mali_kbase_device_hw.c36
-rw-r--r--mali_kbase/device/mali_kbase_device_internal.h12
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c66
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_fault_jm.c4
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h10
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h24
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_fault.h6
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_regmap.h127
-rw-r--r--mali_kbase/hwcnt/Kbuild37
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h (renamed from mali_kbase/mali_kbase_hwcnt_backend.h)48
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c (renamed from mali_kbase/mali_kbase_hwcnt_backend_csf.c)716
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h (renamed from mali_kbase/mali_kbase_hwcnt_backend_csf.h)51
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h (renamed from mali_kbase/mali_kbase_hwcnt_backend_csf_if.h)69
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c (renamed from mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c)304
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h (renamed from mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h)11
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c (renamed from mali_kbase/mali_kbase_hwcnt_backend_jm.c)308
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h (renamed from mali_kbase/mali_kbase_hwcnt_backend_jm.h)12
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c (renamed from mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c)58
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h (renamed from mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.h)6
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt.c (renamed from mali_kbase/mali_kbase_hwcnt.c)129
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h (renamed from mali_kbase/mali_kbase_hwcnt_accumulator.h)24
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_context.h (renamed from mali_kbase/mali_kbase_hwcnt_context.h)13
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c (renamed from mali_kbase/mali_kbase_hwcnt_gpu.c)221
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h (renamed from mali_kbase/mali_kbase_hwcnt_gpu.h)51
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c (renamed from mali_kbase/mali_kbase_hwcnt_gpu_narrow.c)129
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h (renamed from mali_kbase/mali_kbase_hwcnt_gpu_narrow.h)97
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_types.c (renamed from mali_kbase/mali_kbase_hwcnt_types.c)304
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_types.h (renamed from mali_kbase/mali_kbase_hwcnt_types.h)297
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c (renamed from mali_kbase/mali_kbase_hwcnt_virtualizer.c)240
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h (renamed from mali_kbase/mali_kbase_hwcnt_virtualizer.h)43
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h (renamed from mali_kbase/mali_kbase_hwcnt_watchdog_if.h)17
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c (renamed from mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c)43
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h (renamed from mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h)8
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h4
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c50
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c35
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.c30
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa.h4
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_simple.c10
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h78
-rw-r--r--mali_kbase/jm/mali_kbase_jm_js.h23
-rw-r--r--mali_kbase/jm/mali_kbase_js_defs.h5
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h18
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h201
-rw-r--r--mali_kbase/mali_kbase.h28
-rw-r--r--mali_kbase/mali_kbase_as_fault_debugfs.c10
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h21
-rw-r--r--mali_kbase/mali_kbase_core_linux.c388
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c38
-rw-r--r--mali_kbase/mali_kbase_debug_mem_allocs.c138
-rw-r--r--mali_kbase/mali_kbase_debug_mem_allocs.h (renamed from mali_kbase/platform/devicetree/mali_kbase_config_platform.c)38
-rw-r--r--mali_kbase/mali_kbase_debug_mem_view.h4
-rw-r--r--mali_kbase/mali_kbase_debugfs_helper.c5
-rw-r--r--mali_kbase/mali_kbase_defs.h236
-rw-r--r--mali_kbase/mali_kbase_dma_fence.c491
-rw-r--r--mali_kbase/mali_kbase_dma_fence.h150
-rw-r--r--mali_kbase/mali_kbase_fence.c94
-rw-r--r--mali_kbase/mali_kbase_fence.h210
-rw-r--r--mali_kbase/mali_kbase_fence_ops.c50
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c65
-rw-r--r--mali_kbase/mali_kbase_gwt.c15
-rw-r--r--mali_kbase/mali_kbase_hw.c69
-rw-r--r--mali_kbase/mali_kbase_hwaccess_jm.h17
-rw-r--r--mali_kbase/mali_kbase_hwaccess_time.h62
-rw-r--r--mali_kbase/mali_kbase_jd.c217
-rw-r--r--mali_kbase/mali_kbase_jd_debugfs.c51
-rw-r--r--mali_kbase/mali_kbase_jm.c18
-rw-r--r--mali_kbase/mali_kbase_js.c225
-rw-r--r--mali_kbase/mali_kbase_kinstr_jm.c31
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.c540
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.h3
-rw-r--r--mali_kbase/mali_kbase_linux.h4
-rw-r--r--mali_kbase/mali_kbase_mem.c691
-rw-r--r--mali_kbase/mali_kbase_mem.h404
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c563
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.c635
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.h108
-rw-r--r--mali_kbase/mali_kbase_mem_pool.c244
-rw-r--r--mali_kbase/mali_kbase_mem_pool_group.c29
-rw-r--r--mali_kbase/mali_kbase_mem_pool_group.h7
-rw-r--r--mali_kbase/mali_kbase_pbha.c66
-rw-r--r--mali_kbase/mali_kbase_pbha_debugfs.c95
-rw-r--r--mali_kbase/mali_kbase_platform_fake.c11
-rw-r--r--mali_kbase/mali_kbase_pm.c4
-rw-r--r--mali_kbase/mali_kbase_refcount_defs.h57
-rw-r--r--mali_kbase/mali_kbase_reset_gpu.h12
-rw-r--r--mali_kbase/mali_kbase_softjobs.c78
-rw-r--r--mali_kbase/mali_kbase_sync.h5
-rw-r--r--mali_kbase/mali_kbase_sync_android.c515
-rw-r--r--mali_kbase/mali_kbase_sync_file.c146
-rw-r--r--mali_kbase/mali_kbase_vinstr.c26
-rw-r--r--mali_kbase/mali_malisw.h12
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c21
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c10
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c2140
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h169
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw.h47
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c189
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c2
-rw-r--r--mali_kbase/platform/devicetree/Kbuild1
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_config_platform.h3
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c6
-rw-r--r--mali_kbase/platform/meson/mali_kbase_runtime_pm.c27
-rw-r--r--mali_kbase/tests/Mconfig16
-rw-r--r--mali_kbase/tests/build.bp10
-rw-r--r--mali_kbase/tests/include/kutf/kutf_helpers.h27
-rw-r--r--mali_kbase/tests/kutf/kutf_helpers.c14
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c2
-rw-r--r--mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c7
-rw-r--r--mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp2
-rw-r--r--mali_kbase/thirdparty/mali_kbase_mmap.c53
-rw-r--r--mali_kbase/tl/Kbuild2
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.c4
-rw-r--r--mali_kbase/tl/mali_kbase_timeline_io.c24
-rw-r--r--mali_kbase/tl/mali_kbase_tlstream.h12
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c392
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h483
204 files changed, 17416 insertions, 9241 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index b4431d6..9da4141 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -59,10 +59,8 @@ ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y)
endif
ifeq ($(CONFIG_MALI_FENCE_DEBUG), y)
- ifneq ($(CONFIG_SYNC), y)
- ifneq ($(CONFIG_SYNC_FILE), y)
- $(error CONFIG_MALI_FENCE_DEBUG depends on CONFIG_SYNC || CONFIG_SYNC_FILE to be set in Kernel configuration)
- endif
+ ifneq ($(CONFIG_SYNC_FILE), y)
+ $(error CONFIG_MALI_FENCE_DEBUG depends on CONFIG_SYNC_FILE to be set in Kernel configuration)
endif
endif
@@ -71,7 +69,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r38p1-01eac0"'
+MALI_RELEASE_NAME ?= '"r43p0-01eac0"'
# We are building for Pixel
CONFIG_MALI_PLATFORM_NAME="pixel"
@@ -162,13 +160,14 @@ subdir-ccflags-y += $(ccflags-y)
# Kernel Modules
#
obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
-obj-$(CONFIG_MALI_ARBITRATION) += arbitration/
+obj-$(CONFIG_MALI_ARBITRATION) += ../arbitration/
obj-$(CONFIG_MALI_KUTF) += tests/
mali_kbase-y := \
mali_kbase_cache_policy.o \
mali_kbase_ccswe.o \
mali_kbase_mem.o \
+ mali_kbase_mem_migrate.o \
mali_kbase_mem_pool_group.o \
mali_kbase_native_mgm.o \
mali_kbase_ctx_sched.o \
@@ -177,12 +176,6 @@ mali_kbase-y := \
mali_kbase_config.o \
mali_kbase_kinstr_prfcnt.o \
mali_kbase_vinstr.o \
- mali_kbase_hwcnt.o \
- mali_kbase_hwcnt_gpu.o \
- mali_kbase_hwcnt_gpu_narrow.o \
- mali_kbase_hwcnt_types.o \
- mali_kbase_hwcnt_virtualizer.o \
- mali_kbase_hwcnt_watchdog_if_timer.o \
mali_kbase_softjobs.o \
mali_kbase_hw.o \
mali_kbase_debug.o \
@@ -193,6 +186,7 @@ mali_kbase-y := \
mali_kbase_disjoint_events.o \
mali_kbase_debug_mem_view.o \
mali_kbase_debug_mem_zones.o \
+ mali_kbase_debug_mem_allocs.o \
mali_kbase_smc.o \
mali_kbase_mem_pool.o \
mali_kbase_mem_pool_debugfs.o \
@@ -209,24 +203,14 @@ mali_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o
mali_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o
-mali_kbase-$(CONFIG_SYNC) += \
- mali_kbase_sync_android.o \
- mali_kbase_sync_common.o
-
mali_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_fence_ops.o \
mali_kbase_sync_file.o \
mali_kbase_sync_common.o
-ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
- mali_kbase-y += \
- mali_kbase_hwcnt_backend_csf.o \
- mali_kbase_hwcnt_backend_csf_if_fw.o
-else
+ifneq ($(CONFIG_MALI_CSF_SUPPORT),y)
mali_kbase-y += \
mali_kbase_jm.o \
- mali_kbase_hwcnt_backend_jm.o \
- mali_kbase_hwcnt_backend_jm_watchdog.o \
mali_kbase_dummy_job_wa.o \
mali_kbase_debug_job_fault.o \
mali_kbase_event.o \
@@ -236,11 +220,6 @@ else
mali_kbase_js_ctx_attr.o \
mali_kbase_kinstr_jm.o
- mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
- mali_kbase_fence_ops.o \
- mali_kbase_dma_fence.o \
- mali_kbase_fence.o
-
mali_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_fence_ops.o \
mali_kbase_fence.o
@@ -254,6 +233,7 @@ INCLUDE_SUBDIR = \
$(src)/backend/gpu/Kbuild \
$(src)/mmu/Kbuild \
$(src)/tl/Kbuild \
+ $(src)/hwcnt/Kbuild \
$(src)/gpu/Kbuild \
$(src)/thirdparty/Kbuild \
$(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index de27ae4..46e3546 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -43,9 +43,30 @@ config MALI_PLATFORM_NAME
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
+choice
+ prompt "Mali HW backend"
+ depends on MALI_MIDGARD
+ default MALI_REAL_HW
+
config MALI_REAL_HW
+ bool "Enable build of Mali kernel driver for real HW"
depends on MALI_MIDGARD
- def_bool !MALI_NO_MALI
+ help
+ This is the default HW backend.
+
+config MALI_NO_MALI
+ bool "Enable build of Mali kernel driver for No Mali"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ help
+ This can be used to test the driver in a simulated environment
+ whereby the hardware is not physically present. If the hardware is physically
+ present it will not be used. This can be used to test the majority of the
+ driver without needing actual hardware or for software benchmarking.
+ All calls to the simulated hardware will complete immediately as if the hardware
+ completed the task.
+
+
+endchoice
menu "Platform specific options"
source "drivers/gpu/arm/midgard/platform/Kconfig"
@@ -94,16 +115,6 @@ config MALI_MIDGARD_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
-config MALI_DMA_FENCE
- bool "Enable DMA_BUF fence support for Mali"
- depends on MALI_MIDGARD
- default n
- help
- Support DMA_BUF fences for Mali.
-
- This option should only be enabled if the Linux Kernel has built in
- support for DMA_BUF fences.
-
config MALI_ARBITER_SUPPORT
bool "Enable arbiter support for Mali"
depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -120,7 +131,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND
depends on MALI_MIDGARD
default n
help
- This option caused kbase to set up the GPU mapping of imported
+ This option will cause kbase to set up the GPU mapping of imported
dma-buf when needed to run atoms. This is the legacy behavior.
This is intended for testing and the option will get removed in the
@@ -140,6 +151,11 @@ config MALI_DMA_BUF_LEGACY_COMPAT
flushes in other drivers. This only has an effect for clients using
UK 11.18 or older. For later UK versions it is not possible.
+config MALI_CORESIGHT
+ depends on MALI_MIDGARD && MALI_CSF_SUPPORT && !MALI_NO_MALI
+ bool "Enable Kbase CoreSight tracing support"
+ default n
+
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
@@ -150,7 +166,19 @@ menuconfig MALI_EXPERT
if MALI_EXPERT
-config MALI_2MB_ALLOC
+config LARGE_PAGE_ALLOC_OVERRIDE
+ bool "Override default setting of 2MB pages"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ An override config for LARGE_PAGE_ALLOC config.
+ When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be
+ enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be
+ enabled when GPU HW satisfies requirements.
+
+ If in doubt, say N
+
+config LARGE_PAGE_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
default n
@@ -159,6 +187,10 @@ config MALI_2MB_ALLOC
allocate 2MB pages from the kernel. This reduces TLB pressure and
helps to prevent memory fragmentation.
+ Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config
+ is enabled and enabling this on a GPU HW that does not satisfy
+ requirements can cause serious problem.
+
If in doubt, say N
config MALI_MEMORY_FULLY_BACKED
@@ -187,18 +219,6 @@ config MALI_CORESTACK
comment "Platform options"
depends on MALI_MIDGARD && MALI_EXPERT
-config MALI_NO_MALI
- bool "Enable No Mali"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- This can be used to test the driver in a simulated environment
- whereby the hardware is not physically present. If the hardware is physically
- present it will not be used. This can be used to test the majority of the
- driver without needing actual hardware or for software benchmarking.
- All calls to the simulated hardware will complete immediately as if the hardware
- completed the task.
-
config MALI_ERROR_INJECT
bool "Enable No Mali error injection"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
@@ -206,31 +226,9 @@ config MALI_ERROR_INJECT
help
Enables insertion of errors to test module failure and recovery mechanisms.
-config MALI_GEM5_BUILD
- bool "Enable build of Mali kernel driver for GEM5"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- This option is to do a Mali GEM5 build.
- If unsure, say N.
-
comment "Debug options"
depends on MALI_MIDGARD && MALI_EXPERT
-config MALI_FW_CORE_DUMP
- bool "Enable support for FW core dump"
- depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
- default n
- help
- Adds ability to request firmware core dump
-
- Example:
- * To explicitly request core dump:
- echo 1 >/sys/kernel/debug/mali0/fw_core_dump
- * To output current core dump (after explicitly requesting a core dump,
- or kernel driver reported an internal firmware error):
- cat /sys/kernel/debug/mali0/fw_core_dump
-
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -240,7 +238,7 @@ config MALI_DEBUG
config MALI_FENCE_DEBUG
bool "Enable debug sync fence usage"
- depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
+ depends on MALI_MIDGARD && MALI_EXPERT && SYNC_FILE
default y if MALI_DEBUG
help
Select this option to enable additional checking and reporting on the
@@ -397,9 +395,6 @@ config MALI_ARBITRATION
virtualization setup for Mali
If unsure, say N.
-if MALI_ARBITRATION
-source "drivers/gpu/arm/midgard/arbitration/Kconfig"
-endif
source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index c64cc94..d851653 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -62,17 +62,10 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_ARBITRATION ?= n
CONFIG_MALI_PARTITION_MANAGER ?= n
- ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined)
- CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION)
- endif
-
- ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined)
- CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION)
- endif
-
ifneq ($(CONFIG_MALI_NO_MALI),y)
# Prevent misuse when CONFIG_MALI_NO_MALI=y
CONFIG_MALI_REAL_HW ?= y
+ CONFIG_MALI_CORESIGHT = n
endif
ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
@@ -87,10 +80,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
endif
- ifeq ($(CONFIG_XEN),y)
- ifneq ($(CONFIG_MALI_ARBITRATION), n)
- CONFIG_MALI_XEN ?= m
- endif
+ ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
+ CONFIG_MALI_CORESIGHT ?= n
endif
#
@@ -99,12 +90,14 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
ifeq ($(CONFIG_MALI_EXPERT), y)
ifeq ($(CONFIG_MALI_NO_MALI), y)
CONFIG_MALI_REAL_HW = n
+
else
# Prevent misuse when CONFIG_MALI_NO_MALI=n
CONFIG_MALI_REAL_HW = y
CONFIG_MALI_ERROR_INJECT = n
endif
+
ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
# Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
@@ -114,14 +107,10 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y
CONFIG_MALI_SYSTEM_TRACE ?= y
- ifeq ($(CONFIG_SYNC), y)
+ ifeq ($(CONFIG_SYNC_FILE), y)
CONFIG_MALI_FENCE_DEBUG ?= y
else
- ifeq ($(CONFIG_SYNC_FILE), y)
- CONFIG_MALI_FENCE_DEBUG ?= y
- else
- CONFIG_MALI_FENCE_DEBUG = n
- endif
+ CONFIG_MALI_FENCE_DEBUG = n
endif
else
# Prevent misuse when CONFIG_MALI_DEBUG=n
@@ -131,7 +120,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_EXPERT=n
CONFIG_MALI_CORESTACK = n
- CONFIG_MALI_2MB_ALLOC = n
+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
+ CONFIG_LARGE_PAGE_ALLOC = n
CONFIG_MALI_PWRSOFT_765 = n
CONFIG_MALI_MEMORY_FULLY_BACKED = n
CONFIG_MALI_JOB_DUMP = n
@@ -140,6 +130,7 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_ERROR_INJECT = n
CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+ CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n
CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
CONFIG_MALI_DEBUG = n
CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
@@ -168,8 +159,6 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_MIDGARD=n
CONFIG_MALI_ARBITRATION = n
- CONFIG_MALI_ARBITER_MODULES = n
- CONFIG_MALI_GPU_POWER_MODULES = n
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
@@ -180,21 +169,18 @@ endif
CONFIGS := \
CONFIG_MALI_MIDGARD \
CONFIG_MALI_GATOR_SUPPORT \
- CONFIG_MALI_DMA_FENCE \
CONFIG_MALI_ARBITER_SUPPORT \
CONFIG_MALI_ARBITRATION \
- CONFIG_MALI_ARBITER_MODULES \
- CONFIG_MALI_GPU_POWER_MODULES \
CONFIG_MALI_PARTITION_MANAGER \
CONFIG_MALI_REAL_HW \
- CONFIG_MALI_GEM5_BUILD \
CONFIG_MALI_DEVFREQ \
CONFIG_MALI_MIDGARD_DVFS \
CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
CONFIG_MALI_EXPERT \
CONFIG_MALI_CORESTACK \
- CONFIG_MALI_2MB_ALLOC \
+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
+ CONFIG_LARGE_PAGE_ALLOC \
CONFIG_MALI_PWRSOFT_765 \
CONFIG_MALI_MEMORY_FULLY_BACKED \
CONFIG_MALI_JOB_DUMP \
@@ -202,6 +188,7 @@ CONFIGS := \
CONFIG_MALI_ERROR_INJECT \
CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
+ CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
CONFIG_MALI_PRFCNT_SET_PRIMARY \
CONFIG_MALI_PRFCNT_SET_SECONDARY \
CONFIG_MALI_PRFCNT_SET_TERTIARY \
@@ -214,7 +201,8 @@ CONFIGS := \
CONFIG_MALI_KUTF_IRQ_TEST \
CONFIG_MALI_KUTF_CLK_RATE_TRACE \
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
- CONFIG_MALI_XEN
+ CONFIG_MALI_XEN \
+ CONFIG_MALI_CORESIGHT
# Pixel integration CONFIG options
CONFIGS += \
@@ -225,7 +213,9 @@ CONFIGS += \
CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
CONFIG_MALI_PIXEL_GPU_SLC
-#
+THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+-include $(THIS_DIR)/../arbitration/Makefile
+
# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build
#
# Generate the list of CONFIGs and values.
@@ -256,26 +246,60 @@ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
#
EXTRA_SYMBOLS += $(OUT_DIR)/../google-modules/gpu/mali_pixel/Module.symvers
-# The following were added to align with W=1 in scripts/Makefile.extrawarn
-# from the Linux source tree
KBUILD_CFLAGS += -Wall -Werror
+
+# The following were added to align with W=1 in scripts/Makefile.extrawarn
+# from the Linux source tree (v5.18.14)
KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
KBUILD_CFLAGS += -Wmissing-declarations
KBUILD_CFLAGS += -Wmissing-format-attribute
KBUILD_CFLAGS += -Wmissing-prototypes
KBUILD_CFLAGS += -Wold-style-definition
-KBUILD_CFLAGS += -Wmissing-include-dirs
+# The -Wmissing-include-dirs cannot be enabled as the path to some of the
+# included directories change depending on whether it is an in-tree or
+# out-of-tree build.
KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-missing-field-initializers
KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-type-limits
+KBUILD_CFLAGS += -Wno-shift-negative-value
+# This flag is needed to avoid build errors on older kernels
+KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
+# The following were added to align with W=2 in scripts/Makefile.extrawarn
+# from the Linux source tree (v5.18.14)
+KBUILD_CFLAGS += -Wdisabled-optimization
+# The -Wshadow flag cannot be enabled unless upstream kernels are
+# patched to fix redefinitions of certain built-in functions and
+# global variables.
+KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
+KBUILD_CFLAGS += -Wmissing-field-initializers
+# -Wtype-limits must be disabled due to build failures on kernel 5.x
+KBUILD_CFLAGS += -Wno-type-limits
+KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
+KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
+
+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
+
+# This warning is disabled to avoid build failures in some kernel versions
+KBUILD_CFLAGS += -Wno-ignored-qualifiers
+
+ifeq ($(CONFIG_GCOV_KERNEL),y)
+ KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
+ KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+ EXTRA_CFLAGS += -DGCOV_PROFILE=1
+endif
+
+ifeq ($(CONFIG_MALI_KCOV),y)
+ KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+ EXTRA_CFLAGS += -DKCOV=1
+ EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
+endif
+
all:
$(MAKE) -C $(KDIR) M=$(M) W=1 $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index d03322c..77a528f 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -41,11 +41,31 @@ config MALI_PLATFORM_NAME
When PLATFORM_CUSTOM is set, this needs to be set manually to
pick up the desired platform files.
+choice
+ prompt "Mali HW backend"
+ depends on MALI_MIDGARD
+ default MALI_NO_MALI if NO_MALI
+ default MALI_REAL_HW
+
config MALI_REAL_HW
- bool
+ bool "Enable build of Mali kernel driver for real HW"
depends on MALI_MIDGARD
- default y
- default n if NO_MALI
+ help
+ This is the default HW backend.
+
+config MALI_NO_MALI
+ bool "Enable build of Mali kernel driver for No Mali"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ help
+ This can be used to test the driver in a simulated environment
+ whereby the hardware is not physically present. If the hardware is physically
+ present it will not be used. This can be used to test the majority of the
+ driver without needing actual hardware or for software benchmarking.
+ All calls to the simulated hardware will complete immediately as if the hardware
+ completed the task.
+
+
+endchoice
config MALI_PLATFORM_DT_PIN_RST
bool "Enable Juno GPU Pin reset"
@@ -97,16 +117,6 @@ config MALI_MIDGARD_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
-config MALI_DMA_FENCE
- bool "Enable DMA_BUF fence support for Mali"
- depends on MALI_MIDGARD
- default n
- help
- Support DMA_BUF fences for Mali.
-
- This option should only be enabled if the Linux Kernel has built in
- support for DMA_BUF fences.
-
config MALI_ARBITER_SUPPORT
bool "Enable arbiter support for Mali"
depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -129,7 +139,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND
default n
default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
help
- This option caused kbase to set up the GPU mapping of imported
+ This option will cause kbase to set up the GPU mapping of imported
dma-buf when needed to run atoms. This is the legacy behavior.
This is intended for testing and the option will get removed in the
@@ -149,6 +159,12 @@ config MALI_DMA_BUF_LEGACY_COMPAT
flushes in other drivers. This only has an effect for clients using
UK 11.18 or older. For later UK versions it is not possible.
+config MALI_CORESIGHT
+ depends on MALI_MIDGARD && MALI_CSF_SUPPORT && !NO_MALI
+ select CSFFW_DEBUG_FW_AS_RW
+ bool "Enable Kbase CoreSight tracing support"
+ default n
+
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
@@ -157,17 +173,6 @@ menuconfig MALI_EXPERT
Enabling this option and modifying the default settings may produce
a driver with performance or other limitations.
-config MALI_2MB_ALLOC
- bool "Attempt to allocate 2MB pages"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- Rather than allocating all GPU memory page-by-page, attempt to
- allocate 2MB pages from the kernel. This reduces TLB pressure and
- helps to prevent memory fragmentation.
-
- If in doubt, say N
-
config MALI_MEMORY_FULLY_BACKED
bool "Enable memory fully physically-backed"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -191,20 +196,6 @@ config MALI_CORESTACK
If unsure, say N.
-config MALI_FW_CORE_DUMP
- bool "Enable support for FW core dump"
- depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
- default n
- help
- Adds ability to request firmware core dump
-
- Example:
- * To explicitly request core dump:
- echo 1 >/sys/kernel/debug/mali0/fw_core_dump
- * To output current core dump (after explicitly requesting a core dump,
- or kernel driver reported an internal firmware error):
- cat /sys/kernel/debug/mali0/fw_core_dump
-
choice
prompt "Error injection level"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -244,14 +235,6 @@ config MALI_ERROR_INJECT
depends on MALI_MIDGARD && MALI_EXPERT
default y if !MALI_ERROR_INJECT_NONE
-config MALI_GEM5_BUILD
- bool "Enable build of Mali kernel driver for GEM5"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- This option is to do a Mali GEM5 build.
- If unsure, say N.
-
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -260,6 +243,23 @@ config MALI_DEBUG
help
Select this option for increased checking and reporting of errors.
+config MALI_GCOV_KERNEL
+ bool "Enable branch coverage via gcov"
+ depends on MALI_MIDGARD && MALI_DEBUG
+ default n
+ help
+ Choose this option to enable building kbase with branch
+ coverage information. When built against a supporting kernel,
+ the coverage information will be available via debugfs.
+
+config MALI_KCOV
+ bool "Enable kcov coverage to support fuzzers"
+ depends on MALI_MIDGARD && MALI_DEBUG
+ default n
+ help
+ Choose this option to enable building with fuzzing-oriented
+ coverage, to improve the random test cases that are generated.
+
config MALI_FENCE_DEBUG
bool "Enable debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -352,5 +352,5 @@ config MALI_HOST_CONTROLS_SC_RAILS
Adapter) inside the GPU to handshake with SoC PMU to control the
power of cores.
-source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig"
+source "kernel/drivers/gpu/arm/arbitration/Mconfig"
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/mali_kbase/arbiter/mali_kbase_arbif.c b/mali_kbase/arbiter/mali_kbase_arbif.c
index 64e11ce..b5d3cd6 100644
--- a/mali_kbase/arbiter/mali_kbase_arbif.c
+++ b/mali_kbase/arbiter/mali_kbase_arbif.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,12 +28,12 @@
#include <tl/mali_kbase_tracepoints.h>
#include <linux/of.h>
#include <linux/of_platform.h>
-#include "mali_kbase_arbiter_interface.h"
+#include "linux/mali_arbiter_interface.h"
/* Arbiter interface version against which was implemented this module */
#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5
#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \
- MALI_KBASE_ARBITER_INTERFACE_VERSION
+ MALI_ARBITER_INTERFACE_VERSION
#error "Unsupported Mali Arbiter interface version."
#endif
@@ -205,6 +205,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) {
dev_err(kbdev->dev, "arbiter_if driver not available\n");
+ put_device(&pdev->dev);
return -EPROBE_DEFER;
}
kbdev->arb.arb_dev = &pdev->dev;
@@ -212,6 +213,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
if (!arb_if) {
dev_err(kbdev->dev, "arbiter_if driver not ready\n");
module_put(pdev->dev.driver->owner);
+ put_device(&pdev->dev);
return -EPROBE_DEFER;
}
@@ -233,6 +235,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
if (err) {
dev_err(&pdev->dev, "Failed to register with arbiter\n");
module_put(pdev->dev.driver->owner);
+ put_device(&pdev->dev);
if (err != -EPROBE_DEFER)
err = -EFAULT;
return err;
@@ -262,8 +265,10 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if);
}
kbdev->arb.arb_if = NULL;
- if (kbdev->arb.arb_dev)
+ if (kbdev->arb.arb_dev) {
module_put(kbdev->arb.arb_dev->driver->owner);
+ put_device(kbdev->arb.arb_dev);
+ }
kbdev->arb.arb_dev = NULL;
}
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
deleted file mode 100644
index a0ca1cc..0000000
--- a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/**
- * DOC: Defines the Mali arbiter interface
- */
-
-#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
-#define _MALI_KBASE_ARBITER_INTERFACE_H_
-
-/**
- * DOC: Mali arbiter interface version
- *
- * This specifies the current version of the configuration interface. Whenever
- * the arbiter interface changes, so that integration effort is required, the
- * version number will be increased. Each configuration must make an effort
- * to check that it implements the correct version.
- *
- * Version history:
- * 1 - Added the Mali arbiter configuration interface.
- * 2 - Strip out reference code from header
- * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side)
- * 4 - Added max_config support
- * 5 - Added GPU clock frequency reporting support from arbiter
- */
-#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
-
-/**
- * DOC: NO_FREQ is used in case platform doesn't support reporting frequency
- */
-#define NO_FREQ 0
-
-struct arbiter_if_dev;
-
-/**
- * struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM
- *
- * @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
- * dev: The arbif kernel module device.
- *
- * Informs KBase to stop using the GPU as soon as possible.
- * Note: Once the driver is no longer using the GPU, a call
- * to vm_arb_gpu_stopped is expected by the arbiter.
- * @arb_vm_gpu_granted: Callback to indicate that GPU has been granted to VM.
- * dev: The arbif kernel module device.
- *
- * Informs KBase that the GPU can now be used by the VM.
- * @arb_vm_gpu_lost: Callback to indicate that VM has lost the GPU.
- * dev: The arbif kernel module device.
- *
- * This is called if KBase takes too long to respond to the
- * arbiter stop request.
- * Once this is called, KBase will assume that access to the
- * GPU has been lost and will fail all running jobs and
- * reset its internal state.
- * If successful, will respond with a vm_arb_gpu_stopped
- * message.
- * @arb_vm_max_config: Callback to send the max config info to the VM.
- * dev: The arbif kernel module device.
- * max_l2_slices: The maximum number of L2 slices.
- * max_core_mask: The largest core mask.
- *
- * Informs KBase the maximum resources that can be
- * allocated to the partition in use.
- * @arb_vm_update_freq: Callback to notify that GPU clock frequency has been
- * updated.
- * dev: The arbif kernel module device.
- * freq: GPU clock frequency value reported from arbiter
- *
- * Informs KBase that the GPU clock frequency has been updated.
- *
- * This struct contains callbacks used to deliver messages
- * from the arbiter to the corresponding VM.
- * Note that calls into these callbacks may have synchronous calls back into
- * the arbiter arbiter_if_vm_arb_ops callbacks below.
- * For example vm_arb_gpu_stopped() may be called as a side effect of
- * arb_vm_gpu_stop() being called here.
- */
-struct arbiter_if_arb_vm_ops {
- void (*arb_vm_gpu_stop)(struct device *dev);
- void (*arb_vm_gpu_granted)(struct device *dev);
- void (*arb_vm_gpu_lost)(struct device *dev);
- void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices,
- uint32_t max_core_mask);
- void (*arb_vm_update_freq)(struct device *dev, uint32_t freq);
-};
-
-/**
- * struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter
- *
- * @vm_arb_register_dev: Callback to register VM device driver callbacks.
- * arbif_dev: The arbiter interface to register
- * with for device callbacks
- * dev: The device structure to supply in the callbacks.
- * ops: The callbacks that the device driver supports
- * (none are optional).
- *
- * Returns
- * 0 - successful.
- * -EINVAL - invalid argument.
- * -EPROBE_DEFER - module dependencies are not yet
- * available.
- * @vm_arb_unregister_dev: Callback to unregister VM device driver callbacks.
- * arbif_dev: The arbiter interface to unregistering
- * from.
- * @vm_arb_get_max_config: Callback to Request the max config from the Arbiter.
- * arbif_dev: The arbiter interface to issue the
- * request to.
- * @vm_arb_gpu_request: Callback to ask the arbiter interface for GPU access.
- * arbif_dev: The arbiter interface to issue the request
- * to.
- * @vm_arb_gpu_active: Callback to inform arbiter that driver has gone active.
- * arbif_dev: The arbiter interface device to notify.
- * @vm_arb_gpu_idle: Callback to inform the arbiter that driver has gone idle.
- * arbif_dev: The arbiter interface device to notify.
- * @vm_arb_gpu_stopped: Callback to inform arbiter that driver has stopped
- * using the GPU
- * arbif_dev: The arbiter interface device to notify.
- * gpu_required: The GPU is still needed to do more work.
- *
- * This struct contains callbacks used to request operations
- * from the VM to the arbiter.
- * Note that we must not make any synchronous calls back in to the VM
- * (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
- */
-struct arbiter_if_vm_arb_ops {
- int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,
- struct device *dev, struct arbiter_if_arb_vm_ops *ops);
- void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev);
- void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev);
- void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev);
- void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev);
- void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev);
- void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev,
- u8 gpu_required);
-};
-
-/**
- * struct arbiter_if_dev - Arbiter Interface
- * @vm_ops: Callback functions for connecting KBase with
- * arbiter interface device.
- * @priv_data: Internal arbif data not used by KBASE.
- *
- * Arbiter Interface Kernel Module State used for linking KBase
- * with an arbiter interface platform device
- */
-struct arbiter_if_dev {
- struct arbiter_if_vm_arb_ops vm_ops;
- void *priv_data;
-};
-
-#endif /* _MALI_KBASE_ARBITER_INTERFACE_H_ */
diff --git a/mali_kbase/arbitration/Kconfig b/mali_kbase/arbitration/Kconfig
deleted file mode 100644
index 1935c81..0000000
--- a/mali_kbase/arbitration/Kconfig
+++ /dev/null
@@ -1,49 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
-#
-# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
-#
-# This program is free software and is provided to you under the terms of the
-# GNU General Public License version 2 as published by the Free Software
-# Foundation, and any use by you of this program is subject to the terms
-# of such GNU license.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-#
-
-config MALI_XEN
- tristate "Enable Xen Interface reference code"
- depends on MALI_ARBITRATION && XEN
- default n
- help
- Enables the build of xen interface modules used in the reference
- virtualization setup for Mali
- If unsure, say N.
-
-config MALI_ARBITER_MODULES
- tristate "Enable mali arbiter modules"
- depends on MALI_ARBITRATION
- default y
- help
- Enables the build of the arbiter modules used in the reference
- virtualization setup for Mali
- If unsure, say N
-
-config MALI_GPU_POWER_MODULES
- tristate "Enable gpu power modules"
- depends on MALI_ARBITRATION
- default y
- help
- Enables the build of the gpu power modules used in the reference
- virtualization setup for Mali
- If unsure, say N
-
-
-source "drivers/gpu/arm/midgard/arbitration/ptm/Kconfig"
diff --git a/mali_kbase/arbitration/ptm/Kconfig b/mali_kbase/arbitration/ptm/Kconfig
deleted file mode 100644
index 074ebd5..0000000
--- a/mali_kbase/arbitration/ptm/Kconfig
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
-#
-# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
-#
-# This program is free software and is provided to you under the terms of the
-# GNU General Public License version 2 as published by the Free Software
-# Foundation, and any use by you of this program is subject to the terms
-# of such GNU license.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-#
-
-config MALI_PARTITION_MANAGER
- tristate "Enable compilation of partition manager modules"
- depends on MALI_ARBITRATION
- default n
- help
- This option enables the compilation of the partition manager
- modules used to configure the Mali-G78AE GPU.
-
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 611b16b..7df24c3 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -49,8 +49,12 @@ endif
mali_kbase-$(CONFIG_MALI_DEVFREQ) += \
backend/gpu/mali_kbase_devfreq.o
-# Dummy model
+ifneq ($(CONFIG_MALI_REAL_HW),y)
+ mali_kbase-y += backend/gpu/mali_kbase_model_linux.o
+endif
+
+# NO_MALI Dummy model interface
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
-mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
# HW error simulation
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
index 9587c70..7c0abba 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,12 +22,32 @@
#include "backend/gpu/mali_kbase_cache_policy_backend.h"
#include <device/mali_kbase_device.h>
+/**
+ * kbasep_amba_register_present() - Check AMBA_<> register is present
+ * in the GPU.
+ * @kbdev: Device pointer
+ *
+ * Note: Only for arch version 12.x.1 onwards.
+ *
+ * Return: true if AMBA_FEATURES/ENABLE registers are present.
+ */
+static bool kbasep_amba_register_present(struct kbase_device *kbdev)
+{
+ return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >=
+ GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1));
+}
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode)
{
kbdev->current_gpu_coherency_mode = mode;
+ if (kbasep_amba_register_present(kbdev)) {
+ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+
+ val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode);
+ kbase_reg_write(kbdev, AMBA_ENABLE, val);
+ } else
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
}
@@ -35,9 +55,38 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
{
u32 coherency_features;
+ if (kbasep_amba_register_present(kbdev))
+ coherency_features =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES));
+ else
coherency_features = kbase_reg_read(
kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES));
return coherency_features;
}
+void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
+ bool enable)
+{
+ if (kbasep_amba_register_present(kbdev)) {
+ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+
+ val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable);
+ kbase_reg_write(kbdev, AMBA_ENABLE, val);
+
+ } else {
+ WARN(1, "memory_cache_support not supported");
+ }
+}
+
+void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable)
+{
+ if (kbasep_amba_register_present(kbdev)) {
+ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+
+ val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable);
+ kbase_reg_write(kbdev, AMBA_ENABLE, val);
+ } else {
+ WARN(1, "invalidate_hint not supported");
+ }
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
index 13c79d6..8cd8090 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,4 +43,23 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
*/
u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
+/**
+ * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support
+ * in the GPU.
+ * @kbdev: Device pointer
+ * @enable: true for enable.
+ *
+ * Note: Only for arch version 12.x.1 onwards.
+ */
+void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
+ bool enable);
+/**
+ * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint
+ * in the GPU.
+ * @kbdev: Device pointer
+ * @enable: true for enable.
+ *
+ * Note: Only for arch version 12.x.1 onwards.
+ */
+void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable);
#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index 09c1863..a389cd9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -631,7 +631,6 @@ static void kbase_devfreq_work_term(struct kbase_device *kbdev)
destroy_workqueue(workq);
}
-
int kbase_devfreq_init(struct kbase_device *kbdev)
{
struct devfreq_dev_profile *dp;
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
index 7190f42..bd2eb8a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,7 @@
#ifndef _KBASE_INSTR_DEFS_H_
#define _KBASE_INSTR_DEFS_H_
-#include <mali_kbase_hwcnt_gpu.h>
+#include <hwcnt/mali_kbase_hwcnt_gpu.h>
/*
* Instrumentation State Machine States
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
index a29f7ef..ef09c6b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,12 +25,12 @@
#include <linux/interrupt.h>
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
/* GPU IRQ Tags */
-#define JOB_IRQ_TAG 0
-#define MMU_IRQ_TAG 1
-#define GPU_IRQ_TAG 2
+#define JOB_IRQ_TAG 0
+#define MMU_IRQ_TAG 1
+#define GPU_IRQ_TAG 2
static void *kbase_tag(void *ptr, u32 tag)
{
@@ -163,7 +163,6 @@ static irq_handler_t kbase_handler_table[] = {
#ifdef CONFIG_MALI_DEBUG
#define JOB_IRQ_HANDLER JOB_IRQ_TAG
-#define MMU_IRQ_HANDLER MMU_IRQ_TAG
#define GPU_IRQ_HANDLER GPU_IRQ_TAG
/**
@@ -501,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_as.c b/mali_kbase/backend/gpu/mali_kbase_jm_as.c
index 309e5c7..7059c84 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_as.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_as.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
kbase_js_runpool_inc_context_count(kbdev, kctx);
}
-bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
int i;
@@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
return true;
}
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 3062597..72926bc 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,7 +34,7 @@
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_hwaccess_instr.h>
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
@@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
const u64 affinity, const u64 limited_core_mask);
-static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
- base_jd_core_req core_req,
- int js, const u64 limited_core_mask)
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req,
+ unsigned int js, const u64 limited_core_mask)
{
u64 affinity;
bool skip_affinity_check = false;
@@ -191,7 +190,28 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_context *kctx)
+{
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms;
+ s64 diff = 0;
+
+ /* wait for the JS_COMMAND_NEXT register to reach the given status value */
+ do {
+ if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)))
+ return true;
+
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while (diff < max_timeout);
+
+ dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js,
+ kctx->tgid, kctx->id);
+
+ return false;
+}
+
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
{
struct kbase_context *kctx;
u32 cfg;
@@ -204,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
kctx = katom->kctx;
/* Command register must be available */
- if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
- "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
+ if (!kbasep_jm_wait_js_free(kbdev, js, kctx))
return -EPERM;
dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
@@ -355,10 +374,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
* work out the best estimate (which might still result in an over-estimate to
* the calculated time spent)
*/
-static void kbasep_job_slot_update_head_start_timestamp(
- struct kbase_device *kbdev,
- int js,
- ktime_t end_timestamp)
+static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js,
+ ktime_t end_timestamp)
{
ktime_t timestamp_diff;
struct kbase_jd_atom *katom;
@@ -388,8 +405,7 @@ static void kbasep_job_slot_update_head_start_timestamp(
* Make a tracepoint call to the instrumentation module informing that
* softstop happened on given lpu (job slot).
*/
-static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
- int js)
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js)
{
KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
kbdev,
@@ -398,7 +414,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
void kbase_job_done(struct kbase_device *kbdev, u32 done)
{
- int i;
u32 count = 0;
ktime_t end_timestamp;
@@ -409,6 +424,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
end_timestamp = ktime_get_raw();
while (done) {
+ unsigned int i;
u32 failed = done >> 16;
/* treat failed slots as finished slots */
@@ -418,8 +434,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
* numbered interrupts before the higher numbered ones.
*/
i = ffs(finished) - 1;
- if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
- break;
do {
int nr_done;
@@ -618,11 +632,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
}
-void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
- int js,
- u32 action,
- base_jd_core_req core_reqs,
- struct kbase_jd_atom *target_katom)
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
+ u32 action, base_jd_core_req core_reqs,
+ struct kbase_jd_atom *target_katom)
{
#if KBASE_KTRACE_ENABLE
u32 status_reg_before;
@@ -680,6 +692,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
struct kbase_context *head_kctx;
head = kbase_gpu_inspect(kbdev, js, 0);
+ if (unlikely(!head)) {
+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+ return;
+ }
head_kctx = head->kctx;
if (status_reg_before == BASE_JD_EVENT_ACTIVE)
@@ -748,7 +764,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
- int i;
+ unsigned int i;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -760,7 +776,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev;
- int target_js = target_katom->slot_nr;
+ unsigned int target_js = target_katom->slot_nr;
int i;
bool stop_sent = false;
@@ -938,8 +954,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
*
* Where possible any job in the next register is evicted before the soft-stop.
*/
-void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
- struct kbase_jd_atom *target_katom, u32 sw_flags)
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_jd_atom *target_katom, u32 sw_flags)
{
dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
@@ -959,8 +975,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
}
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
- struct kbase_jd_atom *target_katom)
+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
+ struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev = kctx->kbdev;
@@ -1264,7 +1280,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
{
- int i;
+ unsigned int i;
int pending_jobs = 0;
/* Count the number of jobs */
@@ -1444,6 +1460,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
return true;
}
+bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
+{
+ return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING;
+}
+
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
{
wait_event(kbdev->hwaccess.backend.reset_wait,
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index 1ebb843..bfd55a6 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,21 +34,6 @@
#include <device/mali_kbase_device.h>
/**
- * kbase_job_submit_nolock() - Submit a job to a certain job-slot
- * @kbdev: Device pointer
- * @katom: Atom to submit
- * @js: Job slot to submit on
- *
- * The caller must check kbasep_jm_is_submit_slots_free() != false before
- * calling this.
- *
- * The following locking conditions are made on the caller:
- * - it must hold the hwaccess_lock
- */
-void kbase_job_submit_nolock(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom, int js);
-
-/**
* kbase_job_done_slot() - Complete the head job on a particular job-slot
* @kbdev: Device pointer
* @s: Job slot
@@ -60,22 +45,13 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp);
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
-static inline char *kbasep_make_job_slot_string(int js, char *js_string,
- size_t js_size)
+static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
{
- snprintf(js_string, js_size, "job_slot_%i", js);
+ snprintf(js_string, js_size, "job_slot_%u", js);
return js_string;
}
#endif
-#if !MALI_USE_CSF
-static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
- struct kbase_context *kctx)
-{
- return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
-}
-#endif
-
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
@@ -90,7 +66,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
*
* Return: 0 if the job was successfully submitted to hardware, an error otherwise.
*/
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js);
#if !MALI_USE_CSF
/**
@@ -106,11 +82,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
-void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
- int js,
- u32 action,
- base_jd_core_req core_reqs,
- struct kbase_jd_atom *target_katom);
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
+ u32 action, base_jd_core_req core_reqs,
+ struct kbase_jd_atom *target_katom);
#endif /* !MALI_USE_CSF */
/**
@@ -134,11 +108,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
*
* Return: true if an atom was stopped, false otherwise
*/
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js,
- struct kbase_jd_atom *katom,
- u32 action);
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js, struct kbase_jd_atom *katom, u32 action);
/**
* kbase_job_slot_init - Initialise job slot framework
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 4fe8046..f4094a3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,7 +29,7 @@
#include <mali_kbase_jm.h>
#include <mali_kbase_js.h>
#include <tl/mali_kbase_tracepoints.h>
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_kinstr_jm.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
@@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
*
* Return: Atom removed from ringbuffer
*/
-static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
- int js,
- ktime_t *end_timestamp)
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js,
+ ktime_t *end_timestamp)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
struct kbase_jd_atom *katom;
@@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
return katom;
}
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
- int idx)
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
@@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
}
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
- int js)
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
@@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
{
- int js;
- int i;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ int i;
+
for (i = 0; i < SLOT_RB_SIZE; i++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
return false;
}
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js)
{
int nr = 0;
int i;
@@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
return nr;
}
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js)
{
int nr = 0;
int i;
@@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
return nr;
}
-static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
- enum kbase_atom_gpu_rb_state min_rb_state)
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js,
+ enum kbase_atom_gpu_rb_state min_rb_state)
{
int nr = 0;
int i;
@@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
bool secure)
{
- int js, i;
+ unsigned int js;
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ int i;
+
for (i = 0; i < SLOT_RB_SIZE; i++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
js, i);
@@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
return false;
}
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -429,9 +429,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
*
* Return: true if any slots other than @js are busy, false otherwise
*/
-static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js)
{
- int slot;
+ unsigned int slot;
for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
if (slot == js)
@@ -843,7 +843,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
void kbase_backend_slot_update(struct kbase_device *kbdev)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1000,36 +1000,34 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
other_slots_busy(kbdev, js))
break;
-#ifdef CONFIG_MALI_GEM5_BUILD
- if (!kbasep_jm_is_js_free(kbdev, js,
- katom[idx]->kctx))
- break;
-#endif
/* Check if this job needs the cycle counter
* enabled before submission
*/
if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
- kbase_pm_request_gpu_cycle_counter_l2_is_on(
- kbdev);
+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
- if (!kbase_job_hw_submit(kbdev, katom[idx], js))
+ if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
- else
- break;
- kbasep_platform_event_work_begin(katom[idx]);
+ /* Inform power management at start/finish of
+ * atom so it can update its GPU utilisation
+ * metrics.
+ */
+ kbase_pm_metrics_update(kbdev,
+ &katom[idx]->start_timestamp);
+
+ /* Inform platform at start/finish of atom */
+ kbasep_platform_event_work_begin(katom[idx]);
+ } else {
+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+ break;
+ }
/* ***TRANSITION TO HIGHER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_SUBMITTED:
-
- /* Inform power management at start/finish of
- * atom so it can update its GPU utilisation
- * metrics.
- */
- kbase_pm_metrics_update(kbdev,
- &katom[idx]->start_timestamp);
-
break;
case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
@@ -1109,8 +1107,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
*
* Return: true if an atom was evicted, false otherwise.
*/
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
- u32 completion_code)
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code)
{
struct kbase_jd_atom *katom;
struct kbase_jd_atom *next_katom;
@@ -1118,6 +1115,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
lockdep_assert_held(&kbdev->hwaccess_lock);
katom = kbase_gpu_inspect(kbdev, js, 0);
+ if (!katom) {
+ dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js);
+ return false;
+ }
next_katom = kbase_gpu_inspect(kbdev, js, 1);
if (next_katom &&
@@ -1181,13 +1182,19 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
* otherwise we would be in the incorrect state of having an atom both running
* on the HW and returned to the JS.
*/
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
- u32 completion_code,
- u64 job_tail,
- ktime_t *end_timestamp)
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+ u64 job_tail, ktime_t *end_timestamp)
{
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
- struct kbase_context *kctx = katom->kctx;
+ struct kbase_context *kctx = NULL;
+
+ if (unlikely(!katom)) {
+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+ return;
+ }
+
+ kctx = katom->kctx;
dev_dbg(kbdev->dev,
"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
@@ -1240,7 +1247,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
}
} else if (completion_code != BASE_JD_EVENT_DONE) {
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
- int i;
+ unsigned int i;
if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
@@ -1385,7 +1392,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1413,7 +1420,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
kbase_gpu_in_protected_mode(kbdev));
WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
kbase_jd_katom_is_protected(katom),
- "Protected atom on JS%d not supported", js);
+ "Protected atom on JS%u not supported", js);
}
if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
!kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1509,10 +1516,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev,
return ret;
}
-static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
- int js,
- struct kbase_jd_atom *katom,
- u32 action)
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_jd_atom *katom, u32 action)
{
struct kbase_context *kctx = katom->kctx;
u32 hw_action = action & JS_COMMAND_MASK;
@@ -1556,11 +1561,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
return -1;
}
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js,
- struct kbase_jd_atom *katom,
- u32 action)
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js, struct kbase_jd_atom *katom, u32 action)
{
struct kbase_jd_atom *katom_idx0;
struct kbase_context *kctx_idx0 = NULL;
@@ -1813,7 +1815,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
void kbase_gpu_dump_slots(struct kbase_device *kbdev)
{
unsigned long flags;
- int js;
+ unsigned int js;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1828,12 +1830,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
idx);
if (katom)
- dev_info(kbdev->dev,
- " js%d idx%d : katom=%pK gpu_rb_state=%d\n",
- js, idx, katom, katom->gpu_rb_state);
+ dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n",
+ js, idx, katom, katom->gpu_rb_state);
else
- dev_info(kbdev->dev, " js%d idx%d : empty\n",
- js, idx);
+ dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx);
}
}
@@ -1842,7 +1842,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
{
- int js;
+ unsigned int js;
bool tracked = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h
index d3ff203..32be0bf 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,8 +40,7 @@
*
* Return: true if job evicted from NEXT registers, false otherwise
*/
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
- u32 completion_code);
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code);
/**
* kbase_gpu_complete_hw - Complete an atom on job slot js
@@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
* completed
* @end_timestamp: Time of completion
*/
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
- u32 completion_code,
- u64 job_tail,
- ktime_t *end_timestamp);
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+ u64 job_tail, ktime_t *end_timestamp);
/**
* kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
@@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
* Return: The atom at that position in the ringbuffer
* or NULL if no atom present
*/
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
- int idx);
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx);
/**
* kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index 02d7cdb..0ed04bb 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
struct kbase_backend_data *backend;
- int s;
+ unsigned int s;
bool reset_needed = false;
KBASE_DEBUG_ASSERT(timer != NULL);
@@ -365,4 +365,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
backend->timeouts_updated = true;
}
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 961a951..dd16fb2 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -62,8 +62,9 @@
* document
*/
#include <mali_kbase.h>
+#include <device/mali_kbase_device.h>
#include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_mem_linux.h>
#if MALI_USE_CSF
@@ -80,67 +81,23 @@ static bool ipa_control_timer_enabled;
#endif
#define LO_MASK(M) ((M) & 0xFFFFFFFF)
-#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
-
-static u32 get_implementation_register(u32 reg)
-{
- switch (reg) {
- case GPU_CONTROL_REG(SHADER_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
- case GPU_CONTROL_REG(TILER_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
- case GPU_CONTROL_REG(L2_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
- case GPU_CONTROL_REG(STACK_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
-
- case GPU_CONTROL_REG(SHADER_PRESENT_HI):
- case GPU_CONTROL_REG(TILER_PRESENT_HI):
- case GPU_CONTROL_REG(L2_PRESENT_HI):
- case GPU_CONTROL_REG(STACK_PRESENT_HI):
- /* *** FALLTHROUGH *** */
- default:
- return 0;
- }
-}
-
-struct {
- spinlock_t access_lock;
#if !MALI_USE_CSF
- unsigned long prfcnt_base;
-#endif /* !MALI_USE_CSF */
- u32 *prfcnt_base_cpu;
-
- u32 time;
-
- struct gpu_model_prfcnt_en prfcnt_en;
-
- u64 l2_present;
- u64 shader_present;
+#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
+#endif
-#if !MALI_USE_CSF
- u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+/* Construct a value for the THREAD_FEATURES register, *except* the two most
+ * significant bits, which are set to IMPLEMENTATION_MODEL in
+ * midgard_model_read_reg().
+ */
+#if MALI_USE_CSF
+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
#else
- u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
-#endif /* !MALI_USE_CSF */
- u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
- u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
- KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
- u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
- KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
-
-} performance_counters = {
- .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
- .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
-};
+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
+#endif
-struct job_slot {
- int job_active;
- int job_queued;
- int job_complete_irq_asserted;
- int job_irq_mask;
- int job_disabled;
-};
+struct error_status_t hw_error_status;
/**
* struct control_reg_values_t - control register values specific to the GPU being 'emulated'
@@ -158,6 +115,9 @@ struct job_slot {
* @mmu_features: MMU features
* @gpu_features_lo: GPU features (low)
* @gpu_features_hi: GPU features (high)
+ * @shader_present: Available shader bitmap
+ * @stack_present: Core stack present bitmap
+ *
*/
struct control_reg_values_t {
const char *name;
@@ -172,6 +132,16 @@ struct control_reg_values_t {
u32 mmu_features;
u32 gpu_features_lo;
u32 gpu_features_hi;
+ u32 shader_present;
+ u32 stack_present;
+};
+
+struct job_slot {
+ int job_active;
+ int job_queued;
+ int job_complete_irq_asserted;
+ int job_irq_mask;
+ int job_disabled;
};
struct dummy_model_t {
@@ -184,6 +154,10 @@ struct dummy_model_t {
int power_changed; /* 1bit */
bool clean_caches_completed;
bool clean_caches_completed_irq_enabled;
+#if MALI_USE_CSF
+ bool flush_pa_range_completed;
+ bool flush_pa_range_completed_irq_enabled;
+#endif
int power_on; /* 6bits: SHADER[4],TILER,L2 */
u32 stack_power_on_lo;
u32 coherency_enable;
@@ -194,45 +168,6 @@ struct dummy_model_t {
void *data;
};
-void gpu_device_set_data(void *model, void *data)
-{
- struct dummy_model_t *dummy = (struct dummy_model_t *)model;
-
- dummy->data = data;
-}
-
-void *gpu_device_get_data(void *model)
-{
- struct dummy_model_t *dummy = (struct dummy_model_t *)model;
-
- return dummy->data;
-}
-
-#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
-
-/* SCons should pass in a default GPU, but other ways of building (e.g.
- * in-tree) won't, so define one here in case.
- */
-#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
-#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
-#endif
-
-static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
-module_param(no_mali_gpu, charp, 0000);
-MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
-
-/* Construct a value for the THREAD_FEATURES register, *except* the two most
- * significant bits, which are set to IMPLEMENTATION_MODEL in
- * midgard_model_read_reg().
- */
-#if MALI_USE_CSF
-#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
- ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
-#else
-#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
- ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
-#endif
-
/* Array associating GPU names with control register values. The first
* one is used in the case of no match.
*/
@@ -249,6 +184,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tHEx",
@@ -262,6 +199,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tSIx",
@@ -275,6 +214,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tDVx",
@@ -288,6 +229,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tNOx",
@@ -301,6 +244,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGOx_r0p0",
@@ -314,6 +259,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGOx_r1p0",
@@ -328,6 +275,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2823,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tTRx",
@@ -341,6 +290,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tNAx",
@@ -354,6 +305,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tBEx",
@@ -367,6 +320,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tBAx",
@@ -380,19 +335,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
- },
- {
- .name = "tDUx",
- .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
- .as_present = 0xFF,
- .thread_max_threads = 0x180,
- .thread_max_workgroup_size = 0x180,
- .thread_max_barrier_size = 0x180,
- .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
- .tiler_features = 0x809,
- .mmu_features = 0x2830,
- .gpu_features_lo = 0,
- .gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tODx",
@@ -406,6 +350,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGRx",
@@ -420,6 +366,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tVAx",
@@ -434,6 +382,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tTUx",
@@ -448,10 +398,102 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX,
+ .stack_present = 0xF,
+ },
+ {
+ .name = "tTIx",
+ .gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x800,
+ .thread_max_workgroup_size = 0x400,
+ .thread_max_barrier_size = 0x400,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0),
+ .core_features = 0x1, /* core_1e64fma4tex */
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0xf,
+ .gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX,
+ .stack_present = 0xF,
},
};
-struct error_status_t hw_error_status;
+static struct {
+ spinlock_t access_lock;
+#if !MALI_USE_CSF
+ unsigned long prfcnt_base;
+#endif /* !MALI_USE_CSF */
+ u32 *prfcnt_base_cpu;
+
+ u32 time;
+
+ struct gpu_model_prfcnt_en prfcnt_en;
+
+ u64 l2_present;
+ u64 shader_present;
+
+#if !MALI_USE_CSF
+ u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#else
+ u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#endif /* !MALI_USE_CSF */
+ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+ u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+ u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+} performance_counters;
+
+static u32 get_implementation_register(u32 reg,
+ const struct control_reg_values_t *const control_reg_values)
+{
+ switch (reg) {
+ case GPU_CONTROL_REG(SHADER_PRESENT_LO):
+ return LO_MASK(control_reg_values->shader_present);
+ case GPU_CONTROL_REG(TILER_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
+ case GPU_CONTROL_REG(L2_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
+ case GPU_CONTROL_REG(STACK_PRESENT_LO):
+ return LO_MASK(control_reg_values->stack_present);
+
+ case GPU_CONTROL_REG(SHADER_PRESENT_HI):
+ case GPU_CONTROL_REG(TILER_PRESENT_HI):
+ case GPU_CONTROL_REG(L2_PRESENT_HI):
+ case GPU_CONTROL_REG(STACK_PRESENT_HI):
+ /* *** FALLTHROUGH *** */
+ default:
+ return 0;
+ }
+}
+
+void gpu_device_set_data(void *model, void *data)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+
+ dummy->data = data;
+}
+
+void *gpu_device_get_data(void *model)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+
+ return dummy->data;
+}
+
+#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
+
+/* SCons should pass in a default GPU, but other ways of building (e.g.
+ * in-tree) won't, so define one here in case.
+ */
+#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
+#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
+#endif
+
+static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
+module_param(no_mali_gpu, charp, 0000);
+MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
#if MALI_USE_CSF
static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
@@ -474,17 +516,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
/* Currently only primary counter blocks are supported */
- if (WARN_ON(event_index >= 64))
+ if (WARN_ON(event_index >=
+ (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE)))
return 0;
/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
* TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
* IPA counters. If selected, the value returned for them will be zero.
*/
- if (WARN_ON(event_index <= 3))
+ if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))
return 0;
- event_index -= 4;
+ event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
spin_lock_irqsave(&performance_counters.access_lock, flags);
@@ -680,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model)
spin_lock_irqsave(&hw_error_status.access_lock, flags);
hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
- gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
+ gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ);
}
#endif /* !MALI_USE_CSF */
@@ -712,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy)
performance_counters.time = 0;
}
-static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
+static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot)
{
lockdep_assert_held(&hw_error_status.access_lock);
@@ -1011,6 +1054,21 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
size_t i;
const struct control_reg_values_t *ret = NULL;
+ /* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1
+ * revisions respectively. As none of them are named "tGOx" the name comparison
+ * needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0"
+ * or "r1p0" and is derived from the DDK's build configuration. In cases
+ * where it is unavailable, it defaults to tGOx r1p0.
+ */
+ if (!strcmp(gpu, "tGOx")) {
+#ifdef CONFIG_GPU_HWVER
+ if (!strcmp(CONFIG_GPU_HWVER, "r0p0"))
+ gpu = "tGOx_r0p0";
+ else if (!strcmp(CONFIG_GPU_HWVER, "r1p0"))
+#endif /* CONFIG_GPU_HWVER defined */
+ gpu = "tGOx_r1p0";
+ }
+
for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
@@ -1030,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
return ret;
}
-void *midgard_model_create(const void *config)
+void *midgard_model_create(struct kbase_device *kbdev)
{
struct dummy_model_t *dummy = NULL;
@@ -1043,7 +1101,16 @@ void *midgard_model_create(const void *config)
dummy->job_irq_js_state = 0;
init_register_statuses(dummy);
dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
+ performance_counters.l2_present = get_implementation_register(
+ GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
+ performance_counters.shader_present = get_implementation_register(
+ GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
+
+ gpu_device_set_data(dummy, kbdev);
+
+ dev_info(kbdev->dev, "Using Dummy Model");
}
+
return dummy;
}
@@ -1059,19 +1126,21 @@ static void midgard_model_get_outputs(void *h)
lockdep_assert_held(&hw_error_status.access_lock);
if (hw_error_status.job_irq_status)
- gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ);
if ((dummy->power_changed && dummy->power_changed_mask) ||
(dummy->reset_completed & dummy->reset_completed_mask) ||
hw_error_status.gpu_error_irq ||
#if !MALI_USE_CSF
dummy->prfcnt_sample_completed ||
+#else
+ (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
#endif
(dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
- gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ);
if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
- gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ);
}
static void midgard_model_update(void *h)
@@ -1138,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
}
}
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
+void midgard_model_write_reg(void *h, u32 addr, u32 value)
{
unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -1148,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
#if !MALI_USE_CSF
if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
- int slot_idx = (addr >> 7) & 0xf;
+ unsigned int slot_idx = (addr >> 7) & 0xf;
KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
@@ -1235,6 +1304,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->reset_completed_mask = (value >> 8) & 0x01;
dummy->power_changed_mask = (value >> 9) & 0x03;
dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
+#if MALI_USE_CSF
+ dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u;
+#endif
} else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
dummy->coherency_enable = value;
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
@@ -1247,10 +1319,17 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
if (value & (1 << 17))
dummy->clean_caches_completed = false;
-#if !MALI_USE_CSF
- if (value & PRFCNT_SAMPLE_COMPLETED)
+
+#if MALI_USE_CSF
+ if (value & (1u << 20))
+ dummy->flush_pa_range_completed = false;
+#endif /* MALI_USE_CSF */
+
+#if !MALI_USE_CSF
+ if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */
dummy->prfcnt_sample_completed = 0;
#endif /* !MALI_USE_CSF */
+
/*update error status */
hw_error_status.gpu_error_irq &= ~(value);
} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
@@ -1274,7 +1353,15 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
pr_debug("clean caches requested");
dummy->clean_caches_completed = true;
break;
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2:
+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC:
+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL:
+ pr_debug("pa range flush requested");
+ dummy->flush_pa_range_completed = true;
+ break;
+#endif /* MALI_USE_CSF */
+#if !MALI_USE_CSF
case GPU_COMMAND_PRFCNT_SAMPLE:
midgard_model_dump_prfcnt();
dummy->prfcnt_sample_completed = 1;
@@ -1282,6 +1369,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
default:
break;
}
+#if MALI_USE_CSF
+ } else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) &&
+ addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) {
+ /* Writes ignored */
+#endif
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
dummy->l2_config = value;
}
@@ -1291,6 +1383,12 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
(CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
+ } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
+ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
+ /* Do nothing */
+ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
+ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
+ /* Do nothing */
} else if (addr == IPA_CONTROL_REG(COMMAND)) {
pr_debug("Received IPA_CONTROL command");
} else if (addr == IPA_CONTROL_REG(TIMER)) {
@@ -1315,8 +1413,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
hw_error_status.mmu_irq_mask = value;
} else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
hw_error_status.mmu_irq_rawstat &= (~value);
- } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
- (addr <= MMU_AS_REG(15, AS_STATUS))) {
+ } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) {
int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
>> 6;
@@ -1443,7 +1540,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->power_changed = 1;
break;
case SHADER_PWRON_LO:
- dummy->power_on |= (value & 0xF) << 2;
+ dummy->power_on |=
+ (value & dummy->control_reg_values->shader_present) << 2;
dummy->power_changed = 1;
break;
case L2_PWRON_LO:
@@ -1459,7 +1557,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->power_changed = 1;
break;
case SHADER_PWROFF_LO:
- dummy->power_on &= ~((value & 0xF) << 2);
+ dummy->power_on &=
+ ~((value & dummy->control_reg_values->shader_present) << 2);
dummy->power_changed = 1;
break;
case L2_PWROFF_LO:
@@ -1500,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
-
- return 1;
}
-u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
{
unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -1546,6 +1643,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |
((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
+#if MALI_USE_CSF
+ ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) |
+#endif
(dummy->power_changed_mask << 9) | (1 << 7) | 1;
pr_debug("GPU_IRQ_MASK read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
@@ -1555,6 +1655,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
(dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
#endif /* !MALI_USE_CSF */
((dummy->clean_caches_completed ? 1u : 0u) << 17) |
+#if MALI_USE_CSF
+ ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) |
+#endif
hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
@@ -1569,6 +1672,13 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
1u :
0u)
<< 17) |
+#if MALI_USE_CSF
+ (((dummy->flush_pa_range_completed &&
+ dummy->flush_pa_range_completed_irq_enabled) ?
+ 1u :
+ 0u)
+ << 20) |
+#endif
hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_STAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
@@ -1581,8 +1691,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = hw_error_status.gpu_fault_status;
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
*value = dummy->l2_config;
- } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
- (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
+ }
+#if MALI_USE_CSF
+ else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
+ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
+ *value = 0;
+ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
+ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
+ *value = 0;
+ }
+#endif
+ else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
+ (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
switch (addr) {
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
@@ -1592,27 +1712,27 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
case GPU_CONTROL_REG(L2_PRESENT_HI):
case GPU_CONTROL_REG(STACK_PRESENT_LO):
case GPU_CONTROL_REG(STACK_PRESENT_HI):
- *value = get_implementation_register(addr);
+ *value = get_implementation_register(addr, dummy->control_reg_values);
break;
case GPU_CONTROL_REG(SHADER_READY_LO):
*value = (dummy->power_on >> 0x02) &
- get_implementation_register(
- GPU_CONTROL_REG(SHADER_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(TILER_READY_LO):
*value = (dummy->power_on >> 0x01) &
- get_implementation_register(
- GPU_CONTROL_REG(TILER_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(L2_READY_LO):
*value = dummy->power_on &
- get_implementation_register(
- GPU_CONTROL_REG(L2_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(STACK_READY_LO):
*value = dummy->stack_power_on_lo &
- get_implementation_register(
- GPU_CONTROL_REG(STACK_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(SHADER_READY_HI):
@@ -1904,8 +2024,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
counter_index, is_low_word);
- } else if (addr == USER_REG(LATEST_FLUSH)) {
- *value = 0;
}
#endif
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
@@ -1921,8 +2039,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
CSTD_UNUSED(dummy);
-
- return 1;
}
static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
@@ -2098,3 +2214,16 @@ int gpu_model_control(void *model,
return 0;
}
+
+/**
+ * kbase_is_gpu_removed - Has the GPU been removed.
+ * @kbdev: Kbase device pointer
+ *
+ * This function would return true if the GPU has been removed.
+ * It is stubbed here
+ * Return: Always false
+ */
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+ return false;
+}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
index 8eaf1b0..2a3351b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
@@ -21,11 +21,24 @@
/*
* Dummy Model interface
+ *
+ * Support for NO_MALI dummy Model interface.
+ *
+ * +-----------------------------------+
+ * | Kbase read/write/IRQ |
+ * +-----------------------------------+
+ * | Model Linux Framework |
+ * +-----------------------------------+
+ * | Model Dummy interface definitions |
+ * +-----------------+-----------------+
+ * | Fake R/W | Fake IRQ |
+ * +-----------------+-----------------+
*/
#ifndef _KBASE_MODEL_DUMMY_H_
#define _KBASE_MODEL_DUMMY_H_
+#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h>
#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h>
#define model_error_log(module, ...) pr_err(__VA_ARGS__)
@@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en {
u32 shader;
};
-void *midgard_model_create(const void *config);
-void midgard_model_destroy(void *h);
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
-u8 midgard_model_read_reg(void *h, u32 addr,
- u32 * const value);
void midgard_set_error(int job_slot);
int job_atom_inject_error(struct kbase_error_params *params);
int gpu_model_control(void *h,
@@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt
void gpu_model_glb_request_job_irq(void *model);
#endif /* MALI_USE_CSF */
-enum gpu_dummy_irq {
- GPU_DUMMY_JOB_IRQ,
- GPU_DUMMY_GPU_IRQ,
- GPU_DUMMY_MMU_IRQ
-};
-
-void gpu_device_raise_irq(void *model,
- enum gpu_dummy_irq irq);
-void gpu_device_set_data(void *model, void *data);
-void *gpu_device_get_data(void *model);
-
extern struct error_status_t hw_error_status;
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
index 3440460..f310cc7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,21 @@
#include <mali_kbase.h>
#include <linux/random.h>
-#include "backend/gpu/mali_kbase_model_dummy.h"
+#include "backend/gpu/mali_kbase_model_linux.h"
+
+static struct kbase_error_atom *error_track_list;
+
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+
+/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+#define prandom_u32 get_random_u32
+#endif
+
+/*following error probability are set quite high in order to stress the driver*/
+static unsigned int error_probability = 50; /* to be set between 0 and 100 */
+/* probability to have multiple error give that there is an error */
+static unsigned int multiple_error_probability = 50;
/* all the error conditions supported by the model */
#define TOTAL_FAULTS 27
@@ -30,16 +44,6 @@
/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
#define MAX_CONCURRENT_FAULTS 3
-static struct kbase_error_atom *error_track_list;
-
-unsigned int rand_seed;
-
-/*following error probability are set quite high in order to stress the driver*/
-unsigned int error_probability = 50; /* to be set between 0 and 100 */
-/* probability to have multiple error give that there is an error */
-unsigned int multiple_error_probability = 50;
-
-#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
/**
* gpu_generate_error - Generate GPU error
*/
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index 7887cb2..e90e4df 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -20,12 +20,12 @@
*/
/*
- * Model interface
+ * Model Linux Framework interfaces.
*/
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+
#include "backend/gpu/mali_kbase_model_linux.h"
#include "device/mali_kbase_device.h"
#include "mali_kbase_irq_internal.h"
@@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
kmem_cache_free(kbdev->irq_slab, data);
}
-void gpu_device_raise_irq(void *model,
- enum gpu_dummy_irq irq)
+void gpu_device_raise_irq(void *model, u32 irq)
{
struct model_irq_data *data;
struct kbase_device *kbdev = gpu_device_get_data(model);
@@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model,
data->kbdev = kbdev;
switch (irq) {
- case GPU_DUMMY_JOB_IRQ:
+ case MODEL_LINUX_JOB_IRQ:
INIT_WORK(&data->work, serve_job_irq);
atomic_set(&kbdev->serving_job_irq, 1);
break;
- case GPU_DUMMY_GPU_IRQ:
+ case MODEL_LINUX_GPU_IRQ:
INIT_WORK(&data->work, serve_gpu_irq);
atomic_set(&kbdev->serving_gpu_irq, 1);
break;
- case GPU_DUMMY_MMU_IRQ:
+ case MODEL_LINUX_MMU_IRQ:
INIT_WORK(&data->work, serve_mmu_irq);
atomic_set(&kbdev->serving_mmu_irq, 1);
break;
@@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
-
KBASE_EXPORT_TEST_API(kbase_reg_read);
-/**
- * kbase_is_gpu_removed - Has the GPU been removed.
- * @kbdev: Kbase device pointer
- *
- * This function would return true if the GPU has been removed.
- * It is stubbed here
- * Return: Always false
- */
-bool kbase_is_gpu_removed(struct kbase_device *kbdev)
-{
- return false;
-}
-
int kbase_install_interrupts(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
@@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
int kbase_gpu_device_create(struct kbase_device *kbdev)
{
- kbdev->model = midgard_model_create(NULL);
+ kbdev->model = midgard_model_create(kbdev);
if (kbdev->model == NULL)
return -ENOMEM;
- gpu_device_set_data(kbdev->model, kbdev);
-
spin_lock_init(&kbdev->reg_op_lock);
- dev_warn(kbdev->dev, "Using Dummy Model");
-
return 0;
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
index dcb2e7c..4cf1235 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,13 +20,132 @@
*/
/*
- * Model interface
+ * Model Linux Framework interfaces.
+ *
+ * This framework is used to provide generic Kbase Models interfaces.
+ * Note: Backends cannot be used together; the selection is done at build time.
+ *
+ * - Without Model Linux Framework:
+ * +-----------------------------+
+ * | Kbase read/write/IRQ |
+ * +-----------------------------+
+ * | HW interface definitions |
+ * +-----------------------------+
+ *
+ * - With Model Linux Framework:
+ * +-----------------------------+
+ * | Kbase read/write/IRQ |
+ * +-----------------------------+
+ * | Model Linux Framework |
+ * +-----------------------------+
+ * | Model interface definitions |
+ * +-----------------------------+
*/
#ifndef _KBASE_MODEL_LINUX_H_
#define _KBASE_MODEL_LINUX_H_
+/*
+ * Include Model definitions
+ */
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* IS_ENABLED(CONFIG_MALI_NO_MALI) */
+
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+/**
+ * kbase_gpu_device_create() - Generic create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * Specific model hook is implemented by midgard_model_create()
+ *
+ * Return: 0 on success, error code otherwise.
+ */
int kbase_gpu_device_create(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_device_destroy() - Generic create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * Specific model hook is implemented by midgard_model_destroy()
+ */
void kbase_gpu_device_destroy(struct kbase_device *kbdev);
-#endif /* _KBASE_MODEL_LINUX_H_ */
+/**
+ * midgard_model_create() - Private create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * This hook is specific to the model built in Kbase.
+ *
+ * Return: Model handle.
+ */
+void *midgard_model_create(struct kbase_device *kbdev);
+
+/**
+ * midgard_model_destroy() - Private destroy function.
+ *
+ * @h: Model handle.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_destroy(void *h);
+
+/**
+ * midgard_model_write_reg() - Private model write function.
+ *
+ * @h: Model handle.
+ * @addr: Address at which to write.
+ * @value: value to write.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_write_reg(void *h, u32 addr, u32 value);
+
+/**
+ * midgard_model_read_reg() - Private model read function.
+ *
+ * @h: Model handle.
+ * @addr: Address from which to read.
+ * @value: Pointer where to store the read value.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
+
+/**
+ * gpu_device_raise_irq() - Private IRQ raise function.
+ *
+ * @model: Model handle.
+ * @irq: IRQ type to raise.
+ *
+ * This hook is global to the model Linux framework.
+ */
+void gpu_device_raise_irq(void *model, u32 irq);
+
+/**
+ * gpu_device_set_data() - Private model set data function.
+ *
+ * @model: Model handle.
+ * @data: Data carried by model.
+ *
+ * This hook is global to the model Linux framework.
+ */
+void gpu_device_set_data(void *model, void *data);
+
+/**
+ * gpu_device_get_data() - Private model get data function.
+ *
+ * @model: Model handle.
+ *
+ * This hook is global to the model Linux framework.
+ *
+ * Return: Pointer to the data carried by model.
+ */
+void *gpu_device_get_data(void *model);
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+
+#endif /* _KBASE_MODEL_LINUX_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index f496ed5..abbb9c8 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -36,7 +36,7 @@
#include <linux/pm_runtime.h>
#include <mali_kbase_reset_gpu.h>
#endif /* !MALI_USE_CSF */
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_devfreq.h>
#include <mali_kbase_dummy_job_wa.h>
@@ -712,7 +712,7 @@ void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
//callchains go through this function though holding that lock
//so just print without locking.
dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
- dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev));
+ dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0));
#endif
//Attempt another state machine transition prompt.
dev_err(kbdev->dev, "Attempt to prompt state machine");
@@ -1030,7 +1030,7 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
if (!kbdev->arb.arb_if)
return;
- mutex_lock(&kbdev->pm.lock);
+ rt_mutex_lock(&kbdev->pm.lock);
mutex_lock(&arb_vm_state->vm_state_lock);
if (kbdev->pm.backend.gpu_powered &&
!kbase_pm_is_gpu_lost(kbdev)) {
@@ -1070,7 +1070,7 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
mutex_unlock(&arb_vm_state->vm_state_lock);
- mutex_unlock(&kbdev->pm.lock);
+ rt_mutex_unlock(&kbdev->pm.lock);
}
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
@@ -1286,50 +1286,3 @@ out:
return ret;
}
#endif
-
-#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
-void kbase_pm_turn_on_sc_power_rails_locked(struct kbase_device *kbdev)
-{
- unsigned long flags;
-
- lockdep_assert_held(&kbdev->pm.lock);
- WARN_ON(!kbdev->pm.backend.gpu_powered);
- if (kbdev->pm.backend.sc_power_rails_off) {
- if (kbdev->pm.backend.callback_power_on_sc_rails) {
- kbdev->pm.backend.callback_power_on_sc_rails(kbdev);
- KBASE_KTRACE_ADD(kbdev, PM_RAIL_ON, NULL, 0);
- }
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->pm.backend.sc_power_rails_off = false;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- }
-}
-
-void kbase_pm_turn_on_sc_power_rails(struct kbase_device *kbdev)
-{
- kbase_pm_lock(kbdev);
- kbase_pm_turn_on_sc_power_rails_locked(kbdev);
- kbase_pm_unlock(kbdev);
-}
-
-void kbase_pm_turn_off_sc_power_rails(struct kbase_device *kbdev)
-{
- unsigned long flags;
-
- kbase_pm_lock(kbdev);
- WARN_ON(!kbdev->pm.backend.gpu_powered);
- if (!kbdev->pm.backend.sc_power_rails_off) {
- bool abort;
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->pm.backend.sc_power_rails_off = true;
- /* Work around for b/234962632 */
- abort = WARN_ON(!kbdev->pm.backend.sc_pwroff_safe);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- if (kbdev->pm.backend.callback_power_off_sc_rails && !abort) {
- kbdev->pm.backend.callback_power_off_sc_rails(kbdev);
- KBASE_KTRACE_ADD(kbdev, PM_RAIL_OFF, NULL, 0);
- }
- }
- kbase_pm_unlock(kbdev);
-}
-#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index a4d7168..b02f77f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -26,9 +26,7 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_dummy_job_wa.h>
int kbase_pm_ca_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 83dd741..7f4f476 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -39,7 +39,7 @@
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_ctx_sched.h>
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <mali_kbase_pbha.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <device/mali_kbase_device.h>
@@ -539,6 +539,14 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG))
return;
+#if MALI_USE_CSF
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) {
+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG),
+ L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits));
+ }
+#endif /* MALI_USE_CSF */
+
/*
* Skip if size and hash are not given explicitly,
* which means default values are used.
@@ -600,6 +608,21 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state)
return strings[state];
}
+static
+void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state)
+{
+#if KBASE_KTRACE_ENABLE
+ switch (state) {
+#define KBASEP_MCU_STATE(n) \
+ case KBASE_MCU_ ## n: \
+ KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \
+ break;
+#include "mali_kbase_pm_mcu_states.h"
+#undef KBASEP_MCU_STATE
+ }
+#endif
+}
+
static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -794,6 +817,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
KBASE_MCU_HCTL_SHADERS_PEND_ON;
} else
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
+ } else if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
+ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
+ }
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
}
break;
@@ -822,8 +856,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbase_hwcnt_context_enable(
- kbdev->hwcnt_gpu_ctx);
+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
backend->hwcnt_disabled = false;
}
@@ -844,9 +877,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
backend->mcu_state =
KBASE_MCU_HCTL_MCU_ON_RECHECK;
}
- } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) {
+ } else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ else if (kbdev->csf.coresight.disable_on_pmode_enter) {
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
+ backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE;
+ } else if (kbdev->csf.coresight.enable_on_pmode_exit) {
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+ backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE;
}
+#endif
break;
case KBASE_MCU_HCTL_MCU_ON_RECHECK:
@@ -937,12 +980,46 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
#ifdef KBASE_PM_RUNTIME
if (backend->gpu_sleep_mode_active)
backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE;
- else
+ else {
#endif
backend->mcu_state = KBASE_MCU_ON_HALT;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_state_request(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
+ backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+ }
}
break;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
+ backend->mcu_state = KBASE_MCU_ON;
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ }
+ break;
+ case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
+ backend->mcu_state = KBASE_MCU_ON;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+ }
+ break;
+ case KBASE_MCU_CORESIGHT_DISABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED))
+ backend->mcu_state = KBASE_MCU_ON_HALT;
+ break;
+
+ case KBASE_MCU_CORESIGHT_ENABLE:
+ if (kbase_debug_coresight_csf_state_check(
+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+ break;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
case KBASE_MCU_ON_HALT:
if (!kbase_pm_is_mcu_desired(kbdev)) {
kbase_csf_firmware_trigger_mcu_halt(kbdev);
@@ -1035,6 +1112,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
/* Reset complete */
if (!backend->in_reset)
backend->mcu_state = KBASE_MCU_OFF;
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
break;
default:
@@ -1052,6 +1134,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n",
kbase_mcu_state_to_string(prev_state),
kbase_mcu_state_to_string(backend->mcu_state));
+ kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state);
}
} while (backend->mcu_state != prev_state);
@@ -1125,6 +1208,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
return strings[state];
}
+static
+void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state)
+{
+#if KBASE_KTRACE_ENABLE
+ switch (state) {
+#define KBASEP_L2_STATE(n) \
+ case KBASE_L2_ ## n: \
+ KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \
+ break;
+#include "mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
+ }
+#endif
+}
+
#if !MALI_USE_CSF
/* On powering on the L2, the tracked kctx becomes stale and can be cleared.
* This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER
@@ -1195,13 +1293,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
#if MALI_USE_CSF
/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
* whilst MMU command is in progress.
+ * Also defer the power-down if MMU is in process of page migration.
*/
- return !kbdev->mmu_hw_operation_in_progress;
+ return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress;
#else
- return true;
+ return !kbdev->mmu_page_migrate_in_progress;
#endif
}
+static bool can_power_up_l2(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* Avoiding l2 transition if MMU is undergoing page migration */
+ return !kbdev->mmu_page_migrate_in_progress;
+}
+
static bool need_tiler_control(struct kbase_device *kbdev)
{
#if MALI_USE_CSF
@@ -1230,18 +1337,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
KBASE_PM_CORE_L2);
u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_L2);
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
- u64 tiler_trans = kbase_pm_get_trans_cores(
- kbdev, KBASE_PM_CORE_TILER);
- u64 tiler_ready = kbase_pm_get_ready_cores(
- kbdev, KBASE_PM_CORE_TILER);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
/*
* kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
* are vulnerable to corruption if gpu is lost
*/
- if (kbase_is_gpu_removed(kbdev)
- || kbase_pm_is_gpu_lost(kbdev)) {
+ if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
backend->shaders_state =
KBASE_SHADERS_OFF_CORESTACK_OFF;
backend->hwcnt_desired = false;
@@ -1255,16 +1357,19 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
*/
backend->l2_state =
KBASE_L2_ON_HWCNT_DISABLE;
+ KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL,
+ backend->l2_state);
kbase_pm_trigger_hwcnt_disable(kbdev);
}
if (backend->hwcnt_disabled) {
backend->l2_state = KBASE_L2_OFF;
+ KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state);
dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n");
}
break;
}
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+#endif
/* mask off ready from trans in case transitions finished
* between the register reads
@@ -1275,7 +1380,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
switch (backend->l2_state) {
case KBASE_L2_OFF:
- if (kbase_pm_is_l2_desired(kbdev)) {
+ if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) {
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
// Workaround: give a short pause here before starting L2 transition.
udelay(200);
@@ -1323,14 +1428,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
l2_power_up_done = false;
if (!l2_trans && l2_ready == l2_present) {
if (need_tiler_control(kbdev)) {
-#ifndef CONFIG_MALI_ARBITER_SUPPORT
u64 tiler_trans = kbase_pm_get_trans_cores(
kbdev, KBASE_PM_CORE_TILER);
u64 tiler_ready = kbase_pm_get_ready_cores(
kbdev, KBASE_PM_CORE_TILER);
-#endif
-
tiler_trans &= ~tiler_ready;
+
if (!tiler_trans && tiler_ready == tiler_present) {
KBASE_KTRACE_ADD(kbdev,
PM_CORES_CHANGE_AVAILABLE_TILER,
@@ -1591,6 +1694,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
dev_warn(kbdev->dev, "transition to l2 off without waking waiter");
}
#endif
+ kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state);
}
} while (backend->l2_state != prev_state);
@@ -2282,6 +2386,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
backend->in_reset = true;
backend->l2_state = KBASE_L2_RESET_WAIT;
+ KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state);
#if !MALI_USE_CSF
backend->shaders_state = KBASE_SHADERS_RESET_WAIT;
#else
@@ -2290,6 +2395,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
*/
if (likely(kbdev->csf.firmware_inited)) {
backend->mcu_state = KBASE_MCU_RESET_WAIT;
+ KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state);
#ifdef KBASE_PM_RUNTIME
backend->exit_gpu_sleep_mode = true;
#endif
@@ -2649,31 +2755,37 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
#if MALI_USE_CSF
+/**
+ * update_user_reg_page_mapping - Update the mapping for USER Register page
+ *
+ * @kbdev: The kbase device structure for the device.
+ *
+ * This function must be called to unmap the dummy or real page from USER Register page
+ * mapping whenever GPU is powered up or down. The dummy or real page would get
+ * appropriately mapped in when Userspace reads the LATEST_FLUSH value.
+ */
static void update_user_reg_page_mapping(struct kbase_device *kbdev)
{
+ struct kbase_context *kctx, *n;
+
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
-
- /* Only if the mappings for USER page exist, update all PTEs associated to it */
- if (kbdev->csf.nr_user_page_mapped > 0) {
- if (likely(kbdev->csf.mali_file_inode)) {
- /* This would zap the pte corresponding to the mapping of User
- * register page for all the Kbase contexts.
- */
- unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
- BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
- } else {
- dev_err(kbdev->dev,
- "Device file inode not exist even if USER page previously mapped");
- }
+ list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) {
+ /* This would zap the PTE corresponding to the mapping of User
+ * Register page of the kbase context. The mapping will be reestablished
+ * when the context (user process) needs to access to the page.
+ */
+ unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping,
+ kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
+ list_del_init(&kctx->csf.user_reg.link);
+ dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid,
+ kctx->id);
}
-
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif
-
/*
* pmu layout:
* 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -2811,7 +2923,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
backend->gpu_idled = false;
}
#endif
-
}
KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index d959f45..9e29236 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd
}
#endif
+/**
+ * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Check whether the L2 state is in power transition phase or not. If it is, the MMU
+ * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU
+ * page migration is intended, immediately start the MMU migration action without
+ * dropping the lock. When page migration begins, a flag is set in kbdev that would
+ * prevent the L2 state machine traversing into power transition phases, until
+ * the MMU migration action ends.
+ *
+ * Return: true if MMU page migration is allowed
+ */
+static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev)
+{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF);
+}
+
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
index 5e57c9d..3b448e3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,6 +66,13 @@
* is being put to sleep.
* @ON_PEND_SLEEP: MCU sleep is in progress.
* @IN_SLEEP: Sleep request is completed and MCU has halted.
+ * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to
+ * be requested, Coresight is being disabled.
+ * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened
+ * Coresight is being enabled.
+ * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled.
+ * @CORESIGHT_ENABLE: The MCU is on, host does not have control and
+ * Coresight is being enabled.
*/
KBASEP_MCU_STATE(OFF)
KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND)
KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
KBASEP_MCU_STATE(ON_PEND_SLEEP)
KBASEP_MCU_STATE(IN_SLEEP)
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+/* Additional MCU states for Coresight */
+KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE)
+KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE)
+KBASEP_MCU_STATE(CORESIGHT_DISABLE)
+KBASEP_MCU_STATE(CORESIGHT_ENABLE)
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index 2b3e4e4..5d98bd7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -38,11 +38,13 @@
#include <backend/gpu/mali_kbase_pm_defs.h>
#include <mali_linux_trace.h>
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) || !MALI_USE_CSF
/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
* This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
* under 11s. Exceeding this will cause overflow
*/
#define KBASE_PM_TIME_SHIFT 8
+#endif
#if MALI_USE_CSF
/* To get the GPU_ACTIVE value in nano seconds unit */
@@ -480,7 +482,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev)
*/
static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 5110e3d..7a4d662 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
#if MALI_USE_CSF
+#include <asm/arch_timer.h>
+#include <linux/gcd.h>
#include <csf/mali_kbase_csf_timeout.h>
#endif
#include <device/mali_kbase_device.h>
@@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
/* Only for debug messages, safe default in case it's mis-maintained */
const char *selector_str = "(unknown)";
- if (WARN(!kbdev->lowest_gpu_freq_khz,
- "Lowest frequency uninitialized! Using reference frequency for scaling")) {
+ if (!kbdev->lowest_gpu_freq_khz) {
+ dev_dbg(kbdev->dev,
+ "Lowest frequency uninitialized! Using reference frequency for scaling");
freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
} else {
freq_khz = kbdev->lowest_gpu_freq_khz;
}
switch (selector) {
+ case MMU_AS_INACTIVE_WAIT_TIMEOUT:
+ selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
+ nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
+ break;
case KBASE_TIMEOUT_SELECTOR_COUNT:
default:
#if !MALI_USE_CSF
WARN(1, "Invalid timeout selector used! Using default value");
nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
break;
+ case JM_DEFAULT_JS_FREE_TIMEOUT:
+ selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
+ nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
+ break;
#else
/* Use Firmware timeout if invalid selection */
WARN(1,
@@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
return lo | (((u64) hi1) << 32);
}
+
+#if MALI_USE_CSF
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts)
+{
+ if (WARN_ON(!kbdev))
+ return 0;
+
+ return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) +
+ kbdev->backend_time.offset;
+}
+
+/**
+ * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
+ *
+ * @kbdev: Kbase device.
+ * @cpu_ts: Output CPU timestamp.
+ * @gpu_ts: Output GPU timestamp.
+ * @gpu_cycle: Output GPU cycle counts.
+ */
+static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle)
+{
+ struct timespec64 ts;
+
+ kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
+
+ if (cpu_ts)
+ *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+#endif
+
+int kbase_backend_time_init(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+ u64 cpu_ts = 0;
+ u64 gpu_ts = 0;
+ u64 freq;
+ u64 common_factor;
+
+ get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
+ freq = arch_timer_get_cntfrq();
+
+ if (!freq) {
+ dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
+ return -EINVAL;
+ }
+
+ common_factor = gcd(NSEC_PER_SEC, freq);
+
+ kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor);
+ kbdev->backend_time.divisor = div64_u64(freq, common_factor);
+
+ if (!kbdev->backend_time.divisor) {
+ dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
+ return -EINVAL;
+ }
+
+ kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
+ kbdev->backend_time.divisor);
+#endif
+
+ return 0;
+}
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index 96aa329..e82dd12 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,10 +28,11 @@ bob_defaults {
defaults: [
"kernel_defaults",
],
- no_mali: {
+ mali_no_mali: {
kbuild_options: [
"CONFIG_MALI_NO_MALI=y",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
+ "CONFIG_GPU_HWVER={{.hwver}}",
],
},
mali_platform_dt_pin_rst: {
@@ -52,9 +53,6 @@ bob_defaults {
mali_midgard_enable_trace: {
kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"],
},
- mali_dma_fence: {
- kbuild_options: ["CONFIG_MALI_DMA_FENCE=y"],
- },
mali_arbiter_support: {
kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"],
},
@@ -64,8 +62,11 @@ bob_defaults {
mali_dma_buf_legacy_compat: {
kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
},
- mali_2mb_alloc: {
- kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+ large_page_alloc_override: {
+ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"],
+ },
+ large_page_alloc: {
+ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
},
mali_memory_fully_backed: {
kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
@@ -88,9 +89,6 @@ bob_defaults {
mali_error_inject: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
},
- mali_gem5_build: {
- kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
- },
mali_debug: {
kbuild_options: [
"CONFIG_MALI_DEBUG=y",
@@ -142,8 +140,8 @@ bob_defaults {
platform_is_fpga: {
kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
},
- mali_fw_core_dump: {
- kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
+ mali_coresight: {
+ kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
},
kbuild_options: [
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
@@ -166,9 +164,7 @@ bob_defaults {
// (catch-all for experimental CS code without separating it into
// different features).
"MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}",
- "MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}",
"MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}",
- "MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}",
],
}
@@ -187,6 +183,10 @@ bob_kernel_module {
"context/*.c",
"context/*.h",
"context/Kbuild",
+ "hwcnt/*.c",
+ "hwcnt/*.h",
+ "hwcnt/backend/*.h",
+ "hwcnt/Kbuild",
"ipa/*.c",
"ipa/*.h",
"ipa/Kbuild",
@@ -194,6 +194,15 @@ bob_kernel_module {
"platform/*/*.c",
"platform/*/*.h",
"platform/*/Kbuild",
+ "platform/*/*/*.c",
+ "platform/*/*/*.h",
+ "platform/*/*/Kbuild",
+ "platform/*/*/*.c",
+ "platform/*/*/*.h",
+ "platform/*/*/Kbuild",
+ "platform/*/*/*/*.c",
+ "platform/*/*/*/*.h",
+ "platform/*/*/*/Kbuild",
"thirdparty/*.c",
"thirdparty/Kbuild",
"debug/*.c",
@@ -220,6 +229,10 @@ bob_kernel_module {
"device/backend/*_jm.c",
"gpu/backend/*_jm.c",
"gpu/backend/*_jm.h",
+ "hwcnt/backend/*_jm.c",
+ "hwcnt/backend/*_jm.h",
+ "hwcnt/backend/*_jm_*.c",
+ "hwcnt/backend/*_jm_*.h",
"jm/*.h",
"tl/backend/*_jm.c",
"mmu/backend/*_jm.c",
@@ -241,6 +254,10 @@ bob_kernel_module {
"device/backend/*_csf.c",
"gpu/backend/*_csf.c",
"gpu/backend/*_csf.h",
+ "hwcnt/backend/*_csf.c",
+ "hwcnt/backend/*_csf.h",
+ "hwcnt/backend/*_csf_*.c",
+ "hwcnt/backend/*_csf_*.h",
"tl/backend/*_csf.c",
"mmu/backend/*_csf.c",
"ipa/backend/*_csf.c",
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index ef0b973..9aa661a 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -26,7 +26,6 @@
#include <context/mali_kbase_context_internal.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <mali_kbase.h>
-#include <mali_kbase_dma_fence.h>
#include <mali_kbase_mem_linux.h>
#include <mali_kbase_mem_pool_group.h>
#include <mmu/mali_kbase_mmu.h>
@@ -36,20 +35,24 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <csf/mali_kbase_csf_csg_debugfs.h>
#include <csf/mali_kbase_csf_kcpu_debugfs.h>
+#include <csf/mali_kbase_csf_sync_debugfs.h>
#include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
#include <mali_kbase_debug_mem_view.h>
#include <mali_kbase_debug_mem_zones.h>
+#include <mali_kbase_debug_mem_allocs.h>
#include <mali_kbase_mem_pool_debugfs.h>
void kbase_context_debugfs_init(struct kbase_context *const kctx)
{
kbase_debug_mem_view_init(kctx);
kbase_debug_mem_zones_init(kctx);
+ kbase_debug_mem_allocs_init(kctx);
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
kbase_jit_debugfs_init(kctx);
kbase_csf_queue_group_debugfs_init(kctx);
kbase_csf_kcpu_debugfs_init(kctx);
+ kbase_csf_sync_debugfs_init(kctx);
kbase_csf_tiler_heap_debugfs_init(kctx);
kbase_csf_tiler_heap_total_debugfs_init(kctx);
kbase_csf_cpu_queue_debugfs_init(kctx);
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 4e58ed6..7acb3f6 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -27,7 +27,6 @@
#include <gpu/mali_kbase_gpu_regmap.h>
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
-#include <mali_kbase_dma_fence.h>
#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_mem_linux.h>
#include <mali_kbase_mem_pool_group.h>
@@ -37,12 +36,14 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <mali_kbase_debug_mem_view.h>
#include <mali_kbase_debug_mem_zones.h>
+#include <mali_kbase_debug_mem_allocs.h>
#include <mali_kbase_mem_pool_debugfs.h>
void kbase_context_debugfs_init(struct kbase_context *const kctx)
{
kbase_debug_mem_view_init(kctx);
kbase_debug_mem_zones_init(kctx);
+ kbase_debug_mem_allocs_init(kctx);
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
kbase_jit_debugfs_init(kctx);
kbasep_jd_debugfs_ctx_init(kctx);
@@ -128,8 +129,6 @@ static const struct kbase_context_init context_init[] = {
{ NULL, kbase_context_free, NULL },
{ kbase_context_common_init, kbase_context_common_term,
"Common context initialization failed" },
- { kbase_dma_fence_init, kbase_dma_fence_term,
- "DMA fence initialization failed" },
{ kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term,
"Memory pool group initialization failed" },
{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 8787a56..84d56f7 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -182,7 +182,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
- spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
@@ -223,6 +222,9 @@ int kbase_context_common_init(struct kbase_context *kctx)
if (unlikely(err))
return err;
+
+ kbase_mem_mmgrab();
+ kctx->process_mm = current->mm;
}
atomic_set(&kctx->used_pages, 0);
@@ -250,7 +252,9 @@ int kbase_context_common_init(struct kbase_context *kctx)
atomic64_set(&kctx->num_fixed_allocs, 0);
#endif
+ kbase_gpu_vm_lock(kctx);
bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
+ kbase_gpu_vm_unlock(kctx);
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
@@ -260,8 +264,10 @@ int kbase_context_common_init(struct kbase_context *kctx)
if (err) {
dev_err(kctx->kbdev->dev,
"(err:%d) failed to insert kctx to kbase_process", err);
- if (likely(kctx->filp))
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
put_task_struct(kctx->task);
+ }
}
return err;
@@ -350,18 +356,18 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
- if (likely(kctx->filp))
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
put_task_struct(kctx->task);
+ }
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
}
int kbase_context_mem_pool_group_init(struct kbase_context *kctx)
{
- return kbase_mem_pool_group_init(&kctx->mem_pools,
- kctx->kbdev,
- &kctx->kbdev->mem_pool_defaults,
- &kctx->kbdev->mem_pools);
+ return kbase_mem_pool_group_init(&kctx->mem_pools, kctx->kbdev,
+ &kctx->kbdev->mem_pool_defaults, &kctx->kbdev->mem_pools);
}
void kbase_context_mem_pool_group_term(struct kbase_context *kctx)
diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h
index a0c51c9..7c90e27 100644
--- a/mali_kbase/context/mali_kbase_context.h
+++ b/mali_kbase/context/mali_kbase_context.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -93,6 +93,19 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
}
/**
+ * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate
+ * in compatibility mode for 32-bit userspace.
+ * @kctx: kbase context
+ *
+ * Return: True if needs to maintain compatibility, False otherwise.
+ */
+static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
+{
+ return !IS_ENABLED(CONFIG_64BIT) ||
+ (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
+}
+
+/**
* kbase_ctx_flag_clear - Clear @flag on @kctx
* @kctx: Pointer to kbase context
* @flag: Flag to clear
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index 11672a1..c5438f0 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -31,16 +31,23 @@ mali_kbase-y += \
csf/mali_kbase_csf_reset_gpu.o \
csf/mali_kbase_csf_csg_debugfs.o \
csf/mali_kbase_csf_kcpu_debugfs.o \
+ csf/mali_kbase_csf_sync_debugfs.o \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
csf/mali_kbase_csf_cpu_queue_debugfs.o \
csf/mali_kbase_csf_event.o \
- csf/mali_kbase_csf_firmware_log.o
+ csf/mali_kbase_csf_firmware_log.o \
+ csf/mali_kbase_csf_firmware_core_dump.o \
+ csf/mali_kbase_csf_tiler_heap_reclaim.o \
+ csf/mali_kbase_csf_mcu_shared_reg.o
-mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
-
-mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+mali_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o
+else
+mali_kbase-y += csf/mali_kbase_csf_firmware.o
+endif
+mali_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
ifeq ($(KBUILD_EXTMOD),)
# in-tree
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index 2772cfa..4336705 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -28,8 +28,6 @@
* Status flags from the STATUS register of the IPA Control interface.
*/
#define STATUS_COMMAND_ACTIVE ((u32)1 << 0)
-#define STATUS_TIMER_ACTIVE ((u32)1 << 1)
-#define STATUS_AUTO_ACTIVE ((u32)1 << 2)
#define STATUS_PROTECTED_MODE ((u32)1 << 8)
#define STATUS_RESET ((u32)1 << 9)
#define STATUS_TIMER_ENABLED ((u32)1 << 31)
@@ -37,9 +35,7 @@
/*
* Commands for the COMMAND register of the IPA Control interface.
*/
-#define COMMAND_NOP ((u32)0)
#define COMMAND_APPLY ((u32)1)
-#define COMMAND_CLEAR ((u32)2)
#define COMMAND_SAMPLE ((u32)3)
#define COMMAND_PROTECTED_ACK ((u32)4)
#define COMMAND_RESET_ACK ((u32)5)
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index fce6aaa..639c6da 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,10 +36,16 @@
#include "mali_kbase_csf_event.h"
#include <mali_linux_trace.h>
#include <linux/protected_memory_allocator.h>
+#include <tl/mali_kbase_tracepoints.h>
+#include "mali_kbase_csf_mcu_shared_reg.h"
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
+
+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
+
+#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
KBASE_QUEUE_GROUP_PRIORITY_HIGH,
@@ -71,6 +77,38 @@ struct irq_idle_and_protm_track {
s8 idle_slot;
};
+/**
+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
+ *
+ * @kctx: Pointer to the kbase context
+ */
+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ if (unlikely(kctx->csf.user_reg.vma))
+ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
+ kctx->tgid, kctx->id);
+ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
+ list_del_init(&kctx->csf.user_reg.link);
+}
+
+/**
+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
+ *
+ * @kctx: Pointer to the kbase context
+ *
+ * @return: 0 on success.
+ */
+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
+{
+ INIT_LIST_HEAD(&kctx->csf.user_reg.link);
+ kctx->csf.user_reg.vma = NULL;
+ kctx->csf.user_reg.file_offset = 0;
+
+ return 0;
+}
+
static void put_user_pages_mmap_handle(struct kbase_context *kctx,
struct kbase_queue *queue)
{
@@ -131,21 +169,6 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
return 0;
}
-static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
- struct tagged_addr *phys)
-{
- size_t num_pages = 2;
-
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys,
- num_pages, MCU_AS_NR);
-
- WARN_ON(reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(kctx->kbdev, reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-}
-
static void init_user_io_pages(struct kbase_queue *queue)
{
u32 *input_addr = (u32 *)(queue->user_io_addr);
@@ -163,76 +186,15 @@ static void init_user_io_pages(struct kbase_queue *queue)
output_addr[CS_ACTIVE/4] = 0;
}
-/* Map the input/output pages in the shared interface segment of MCU firmware
- * address space.
- */
-static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
- struct tagged_addr *phys, struct kbase_va_region *reg)
-{
- unsigned long mem_flags = KBASE_REG_GPU_RD;
- const size_t num_pages = 2;
- int ret;
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
- if (kbdev->system_coherency == COHERENCY_NONE) {
- mem_flags |=
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
- } else {
- mem_flags |= KBASE_REG_SHARE_BOTH |
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
- }
-
- mutex_lock(&kbdev->csf.reg_lock);
- ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
-
- if (ret)
- return ret;
-
- /* Map input page */
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
- &phys[0], 1, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_IO, mmu_sync_info);
- if (ret)
- goto bad_insert;
-
- /* Map output page, it needs rw access */
- mem_flags |= KBASE_REG_GPU_WR;
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn + 1, &phys[1], 1, mem_flags,
- MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO,
- mmu_sync_info);
- if (ret)
- goto bad_insert_output_page;
-
- return 0;
-
-bad_insert_output_page:
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR);
-bad_insert:
- mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(kbdev, reg);
- mutex_unlock(&kbdev->csf.reg_lock);
-
- return ret;
-}
-
static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
struct kbase_queue *queue)
{
- const size_t num_pages = 2;
-
kbase_gpu_vm_lock(kctx);
vunmap(queue->user_io_addr);
- WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
- atomic_sub(num_pages, &kctx->permanent_mapped_pages);
+ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
+ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
kbase_gpu_vm_unlock(kctx);
}
@@ -308,69 +270,62 @@ static void release_queue(struct kbase_queue *queue);
* If an explicit or implicit unbind was missed by the userspace then the
* mapping will persist. On process exit kernel itself will remove the mapping.
*/
-static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
- struct kbase_queue *queue)
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
{
- const size_t num_pages = 2;
-
- gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]);
kernel_unmap_user_io_pages(kctx, queue);
kbase_mem_pool_free_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, true, false);
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
+ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
- kfree(queue->reg);
- queue->reg = NULL;
+ /* The user_io_gpu_va should have been unmapped inside the scheduler */
+ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
/* If the queue has already been terminated by userspace
* then the ref count for queue object will drop to 0 here.
*/
release_queue(queue);
}
+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
-int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
- struct kbase_queue *queue)
+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_va_region *reg;
- const size_t num_pages = 2;
int ret;
lockdep_assert_held(&kctx->csf.lock);
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
- if (!reg)
- return -ENOMEM;
-
ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false, kctx->task);
-
- if (ret != num_pages)
- goto phys_alloc_failed;
+ KBASEP_NUM_CS_USER_IO_PAGES,
+ queue->phys, false, kctx->task);
+ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
+ /* Marking both the phys to zero for indicating there is no phys allocated */
+ queue->phys[0].tagged_addr = 0;
+ queue->phys[1].tagged_addr = 0;
+ return -ENOMEM;
+ }
ret = kernel_map_user_io_pages(kctx, queue);
if (ret)
goto kernel_map_failed;
+ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
init_user_io_pages(queue);
- ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
- if (ret)
- goto gpu_mmap_failed;
-
- queue->reg = reg;
+ /* user_io_gpu_va is only mapped when scheduler decides to put the queue
+ * on slot at runtime. Initialize it to 0, signalling no mapping.
+ */
+ queue->user_io_gpu_va = 0;
mutex_lock(&kbdev->csf.reg_lock);
- if (kbdev->csf.db_file_offsets >
- (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
+ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
kbdev->csf.db_file_offsets = 0;
queue->db_file_offset = kbdev->csf.db_file_offsets;
kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
-
- WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
+ WARN(kbase_refcount_read(&queue->refcount) != 1,
+ "Incorrect refcounting for queue object\n");
/* This is the second reference taken on the queue object and
* would be dropped only when the IO mapping is removed either
* explicitly by userspace or implicitly by kernel on process exit.
@@ -381,23 +336,16 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
return 0;
-gpu_mmap_failed:
- kernel_unmap_user_io_pages(kctx, queue);
-
kernel_map_failed:
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false, false);
-
-phys_alloc_failed:
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
/* Marking both the phys to zero for indicating there is no phys allocated */
queue->phys[0].tagged_addr = 0;
queue->phys[1].tagged_addr = 0;
- kfree(reg);
-
- return -ENOMEM;
+ return ret;
}
+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
u8 group_handle)
@@ -415,6 +363,12 @@ static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
return NULL;
}
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle)
+{
+ return find_queue_group(kctx, group_handle);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
+
int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
u8 group_handle)
{
@@ -443,19 +397,20 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
static void get_queue(struct kbase_queue *queue)
{
- WARN_ON(!atomic_inc_not_zero(&queue->refcount));
+ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
}
static void release_queue(struct kbase_queue *queue)
{
lockdep_assert_held(&queue->kctx->csf.lock);
-
- WARN_ON(atomic_read(&queue->refcount) <= 0);
-
- if (atomic_dec_and_test(&queue->refcount)) {
+ if (kbase_refcount_dec_and_test(&queue->refcount)) {
/* The queue can't still be on the per context list. */
WARN_ON(!list_empty(&queue->link));
WARN_ON(queue->group);
+ dev_dbg(queue->kctx->kbdev->dev,
+ "Remove any pending command queue fatal from ctx %d_%d",
+ queue->kctx->tgid, queue->kctx->id);
+ kbase_csf_event_remove_error(queue->kctx, &queue->error);
/* After this the Userspace would be able to free the
* memory for GPU queue. In case the Userspace missed
@@ -464,7 +419,7 @@ static void release_queue(struct kbase_queue *queue)
* would free up the GPU queue memory.
*/
kbase_gpu_vm_lock(queue->kctx);
- kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+ kbase_va_region_no_user_free_dec(queue->queue_reg);
kbase_gpu_vm_unlock(queue->kctx);
kfree(queue);
@@ -472,7 +427,7 @@ static void release_queue(struct kbase_queue *queue)
}
static void oom_event_worker(struct work_struct *data);
-static void fatal_event_worker(struct work_struct *data);
+static void cs_error_worker(struct work_struct *data);
/* Between reg and reg_ex, one and only one must be null */
static int csf_queue_register_internal(struct kbase_context *kctx,
@@ -570,13 +525,16 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->kctx = kctx;
queue->base_addr = queue_addr;
- queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
+
+ queue->queue_reg = region;
+ kbase_va_region_no_user_free_inc(region);
+
queue->size = (queue_size << PAGE_SHIFT);
queue->csi_index = KBASEP_IF_NR_INVALID;
queue->enabled = false;
queue->priority = reg->priority;
- atomic_set(&queue->refcount, 1);
+ kbase_refcount_set(&queue->refcount, 1);
queue->group = NULL;
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
@@ -599,7 +557,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
INIT_LIST_HEAD(&queue->link);
INIT_LIST_HEAD(&queue->error.link);
INIT_WORK(&queue->oom_event_work, oom_event_worker);
- INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
+ INIT_WORK(&queue->cs_error_work, cs_error_worker);
list_add(&queue->link, &kctx->csf.queue_list);
queue->extract_ofs = 0;
@@ -633,6 +591,13 @@ out:
int kbase_csf_queue_register(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_register *reg)
{
+ /* Validate the ring buffer configuration parameters */
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+ reg->buffer_gpu_addr & ~PAGE_MASK)
+ return -EINVAL;
+
return csf_queue_register_internal(kctx, reg, NULL);
}
@@ -651,6 +616,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
if (glb_version < kbase_csf_interface_version(1, 1, 0))
return -EINVAL;
+ /* Validate the ring buffer configuration parameters */
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+ reg->buffer_gpu_addr & ~PAGE_MASK)
+ return -EINVAL;
+
/* Validate the cs_trace configuration parameters */
if (reg->ex_buffer_size &&
((reg->ex_event_size > max_size) ||
@@ -701,11 +673,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
queue->queue_reg->user_data = NULL;
kbase_gpu_vm_unlock(kctx);
- dev_dbg(kctx->kbdev->dev,
- "Remove any pending command queue fatal from context %pK\n",
- (void *)kctx);
- kbase_csf_event_remove_error(kctx, &queue->error);
-
release_queue(queue);
}
@@ -786,6 +753,11 @@ static struct kbase_queue_group *get_bound_queue_group(
return group;
}
+static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
+{
+ kthread_queue_work(&kctx->csf.pending_submission_worker, &kctx->csf.pending_submission_work);
+}
+
/**
* pending_submission_worker() - Work item to process pending kicked GPU command queues.
*
@@ -815,11 +787,21 @@ static void pending_submission_worker(struct kthread_work *work)
list_for_each_entry(queue, &kctx->csf.queue_list, link) {
if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
struct kbase_queue_group *group = get_bound_queue_group(queue);
+ int ret;
- if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
+ if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
dev_dbg(kbdev->dev, "queue is not bound to a group");
- else if (kbase_csf_scheduler_queue_start(queue))
+ continue;
+ }
+
+ ret = kbase_csf_scheduler_queue_start(queue);
+ if (unlikely(ret)) {
dev_dbg(kbdev->dev, "Failed to start queue");
+ if (ret == -EBUSY) {
+ atomic_cmpxchg(&queue->pending, 0, 1);
+ enqueue_gpu_submission_work(kctx);
+ }
+ }
}
}
@@ -916,11 +898,6 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
}
-static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
-{
- kthread_queue_work(&kctx->csf.pending_submission_worker, &kctx->csf.pending_submission_work);
-}
-
int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_kick *kick)
{
@@ -929,6 +906,8 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_va_region *region;
int err = 0;
+ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
+
/* GPU work submission happening asynchronously to prevent the contention with
* scheduler lock and as the result blocking application thread. For this reason,
* the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
@@ -963,6 +942,9 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
{
lockdep_assert_held(&kctx->csf.lock);
+ if (WARN_ON(queue->csi_index < 0))
+ return;
+
if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
unsigned long flags;
@@ -976,6 +958,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
put_user_pages_mmap_handle(kctx, queue);
+ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
}
}
@@ -1017,6 +1000,15 @@ static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
}
}
+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
+{
+ /* The queue's phys are zeroed when allocation fails. Both of them being
+ * zero is an impossible condition for a successful allocated set of phy pages.
+ */
+
+ return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr);
+}
+
void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
{
struct kbase_context *kctx = queue->kctx;
@@ -1042,8 +1034,8 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
unbind_queue(kctx, queue);
}
- /* Free the resources, if allocated for this queue. */
- if (queue->reg)
+ /* Free the resources, if allocated phys for this queue */
+ if (kbase_csf_queue_phys_allocated(queue))
kbase_csf_free_command_stream_user_pages(kctx, queue);
}
@@ -1056,8 +1048,8 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
unbind_stopped_queue(kctx, queue);
- /* Free the resources, if allocated for this queue. */
- if (queue->reg)
+ /* Free the resources, if allocated phys for this queue */
+ if (kbase_csf_queue_phys_allocated(queue))
kbase_csf_free_command_stream_user_pages(kctx, queue);
}
@@ -1120,168 +1112,39 @@ static bool iface_has_enough_streams(struct kbase_device *const kbdev,
* @kctx: Pointer to kbase context where the queue group is created at
* @s_buf: Pointer to suspend buffer that is attached to queue group
*
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- * MMU page table. Otherwise -ENOMEM.
+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
+ * Otherwise -ENOMEM or error code.
*/
static int create_normal_suspend_buffer(struct kbase_context *const kctx,
struct kbase_normal_suspend_buffer *s_buf)
{
- struct kbase_va_region *reg = NULL;
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
const size_t nr_pages =
PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
- int err = 0;
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ int err;
lockdep_assert_held(&kctx->csf.lock);
- /* Allocate and initialize Region Object */
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
- if (!reg)
- return -ENOMEM;
+ /* The suspend buffer's mapping address is valid only when the CSG is to
+ * run on slot, initializing it 0, signalling the buffer is not mapped.
+ */
+ s_buf->gpu_va = 0;
s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
- if (!s_buf->phy) {
- err = -ENOMEM;
- goto phy_alloc_failed;
- }
+ if (!s_buf->phy)
+ return -ENOMEM;
/* Get physical page for a normal suspend buffer */
err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
&s_buf->phy[0], false, kctx->task);
-
- if (err < 0)
- goto phy_pages_alloc_failed;
-
- /* Insert Region Object into rbtree and make virtual address available
- * to map it to physical page
- */
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
- if (err)
- goto add_va_region_failed;
-
- /* Update MMU table */
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- reg->start_pfn, &s_buf->phy[0], nr_pages,
- mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
- if (err)
- goto mmu_insert_failed;
-
- s_buf->reg = reg;
-
- return 0;
-
-mmu_insert_failed:
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(kctx->kbdev, reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
-add_va_region_failed:
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
- &s_buf->phy[0], false, false);
-
-phy_pages_alloc_failed:
- kfree(s_buf->phy);
-phy_alloc_failed:
- kfree(reg);
-
- return err;
-}
-
-/**
- * create_protected_suspend_buffer() - Create protected-mode suspend buffer
- * per queue group
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to suspend buffer that is attached to queue group
- *
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- * MMU page table. Otherwise -ENOMEM.
- */
-static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
- struct kbase_protected_suspend_buffer *s_buf)
-{
- struct kbase_va_region *reg = NULL;
- struct tagged_addr *phys = NULL;
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
- const size_t nr_pages =
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
- int err = 0;
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
- /* Allocate and initialize Region Object */
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
- if (!reg)
- return -ENOMEM;
-
- phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
- if (!phys) {
- err = -ENOMEM;
- goto phy_alloc_failed;
- }
-
- s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
- nr_pages, true);
- if (s_buf->pma == NULL) {
- err = -ENOMEM;
- goto pma_alloc_failed;
+ if (err < 0) {
+ kfree(s_buf->phy);
+ return err;
}
- /* Insert Region Object into rbtree and make virtual address available
- * to map it to physical page
- */
- mutex_lock(&kbdev->csf.reg_lock);
- err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
-
- if (err)
- goto add_va_region_failed;
-
- /* Update MMU table */
- err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
- phys, nr_pages, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
- if (err)
- goto mmu_insert_failed;
-
- s_buf->reg = reg;
- kfree(phys);
+ kbase_process_page_usage_inc(kctx, nr_pages);
return 0;
-
-mmu_insert_failed:
- mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(kbdev, reg);
- mutex_unlock(&kbdev->csf.reg_lock);
-
-add_va_region_failed:
- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
-pma_alloc_failed:
- kfree(phys);
-phy_alloc_failed:
- kfree(reg);
-
- return err;
}
static void timer_event_worker(struct work_struct *data);
@@ -1302,26 +1165,17 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
static int create_suspend_buffers(struct kbase_context *const kctx,
struct kbase_queue_group * const group)
{
- int err = 0;
-
if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
return -ENOMEM;
}
- if (kctx->kbdev->csf.pma_dev) {
- err = create_protected_suspend_buffer(kctx->kbdev,
- &group->protected_suspend_buf);
- if (err) {
- term_normal_suspend_buffer(kctx,
- &group->normal_suspend_buf);
- dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
- }
- } else {
- group->protected_suspend_buf.reg = NULL;
- }
+ /* Protected suspend buffer, runtime binding so just initialize it */
+ group->protected_suspend_buf.gpu_va = 0;
+ group->protected_suspend_buf.pma = NULL;
+ group->protected_suspend_buf.alloc_retries = 0;
- return err;
+ return 0;
}
/**
@@ -1387,6 +1241,14 @@ static int create_queue_group(struct kbase_context *const kctx,
group->cs_unrecoverable = false;
group->reevaluate_idle_status = false;
+ group->csg_reg = NULL;
+ group->csg_reg_bind_retries = 0;
+
+ group->dvs_buf = create->in.dvs_buf;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ group->deschedule_deferred_cnt = 0;
+#endif
group->group_uid = generate_group_uid();
create->out.group_uid = group->group_uid;
@@ -1402,6 +1264,9 @@ static int create_queue_group(struct kbase_context *const kctx,
MAX_SUPPORTED_STREAMS_PER_GROUP);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
+ group->run_state);
+
err = create_suspend_buffers(kctx, group);
if (err < 0) {
@@ -1421,6 +1286,17 @@ static int create_queue_group(struct kbase_context *const kctx,
return group_handle;
}
+static bool dvs_supported(u32 csf_version)
+{
+ if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
+ return false;
+
+ if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
+ if (GLB_VERSION_MINOR_GET(csf_version) < 2)
+ return false;
+
+ return true;
+}
int kbase_csf_queue_group_create(struct kbase_context *const kctx,
union kbase_ioctl_cs_queue_group_create *const create)
@@ -1459,8 +1335,17 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
err = -EINVAL;
- } else if (create->in.reserved) {
- dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
+ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
+ create->in.dvs_buf) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "GPU does not support DVS but userspace is trying to use it");
+ err = -EINVAL;
+ } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
+ !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
+ CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
+ dev_warn(kctx->kbdev->dev,
+ "DVS buffer pointer is null but size is not 0");
err = -EINVAL;
} else {
/* For the CSG which satisfies the condition for having
@@ -1490,65 +1375,39 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
* @s_buf: Pointer to queue group suspend buffer to be freed
*/
static void term_normal_suspend_buffer(struct kbase_context *const kctx,
- struct kbase_normal_suspend_buffer *s_buf)
+ struct kbase_normal_suspend_buffer *s_buf)
{
- const size_t nr_pages =
- PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
+ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
lockdep_assert_held(&kctx->csf.lock);
- WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR));
+ /* The group should not have a bind remaining on any suspend buf region */
+ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(kctx->kbdev, s_buf->reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- nr_pages, &s_buf->phy[0], false, false);
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
+ &s_buf->phy[0], false, false);
+ kbase_process_page_usage_dec(kctx, nr_pages);
kfree(s_buf->phy);
s_buf->phy = NULL;
- kfree(s_buf->reg);
- s_buf->reg = NULL;
}
/**
- * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
* queue group
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to queue group suspend buffer to be freed
+ * @sbuf: Pointer to queue group suspend buffer to be freed
*/
static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
- struct kbase_protected_suspend_buffer *s_buf)
+ struct kbase_protected_suspend_buffer *sbuf)
{
- const size_t nr_pages =
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
- struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
- size_t i = 0;
-
- for (i = 0; phys && i < nr_pages; i++)
- phys[i] = as_tagged(s_buf->pma[i]->pa);
-
- WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys,
- nr_pages, MCU_AS_NR));
-
- kfree(phys);
-
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(kbdev, s_buf->reg);
- mutex_unlock(&kbdev->csf.reg_lock);
-
- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
- s_buf->pma = NULL;
- kfree(s_buf->reg);
- s_buf->reg = NULL;
+ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
+ if (sbuf->pma) {
+ const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
+ sbuf->pma = NULL;
+ }
}
void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
@@ -1580,6 +1439,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
&group->protected_suspend_buf);
group->run_state = KBASE_CSF_GROUP_TERMINATED;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
}
/**
@@ -1610,6 +1470,34 @@ static void term_queue_group(struct kbase_queue_group *group)
kbase_csf_term_descheduled_queue_group(group);
}
+/**
+ * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
+ * become 0 that was taken when the group deschedule had to be deferred.
+ *
+ * @group: Pointer to GPU command queue group that is being deleted.
+ *
+ * This function is called when Userspace deletes the group and after the group
+ * has been descheduled. The function synchronizes with the other threads that were
+ * also trying to deschedule the group whilst the dumping was going on for a fault.
+ * Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
+ * for more details.
+ */
+static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_context *kctx = group->kctx;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (likely(!group->deschedule_deferred_cnt))
+ return;
+
+ rt_mutex_unlock(&kctx->csf.lock);
+ wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
+ rt_mutex_lock(&kctx->csf.lock);
+#endif
+}
+
static void cancel_queue_group_events(struct kbase_queue_group *group)
{
cancel_work_sync(&group->timer_event_work);
@@ -1651,11 +1539,20 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
group = find_queue_group(kctx, group_handle);
if (group) {
+ kctx->csf.queue_groups[group_handle] = NULL;
/* Stop the running of the given group */
term_queue_group(group);
- kctx->csf.queue_groups[group_handle] = NULL;
rt_mutex_unlock(&kctx->csf.lock);
+ if (reset_prevented) {
+ /* Allow GPU reset before cancelling the group specific
+ * work item to avoid potential deadlock.
+ * Reset prevention isn't needed after group termination.
+ */
+ kbase_reset_gpu_allow(kbdev);
+ reset_prevented = false;
+ }
+
/* Cancel any pending event callbacks. If one is in progress
* then this thread waits synchronously for it to complete (which
* is why we must unlock the context first). We already ensured
@@ -1667,6 +1564,8 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
/* Clean up after the termination */
remove_pending_group_fatal_error(group);
+
+ wait_group_deferred_deschedule_completion(group);
}
rt_mutex_unlock(&kctx->csf.lock);
@@ -1675,7 +1574,9 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
kfree(group);
}
+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
struct kbase_suspend_copy_buffer *sus_buf,
u8 group_handle)
@@ -1706,6 +1607,7 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
return err;
}
+#endif
void kbase_csf_add_group_fatal_error(
struct kbase_queue_group *const group,
@@ -1774,8 +1676,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
kbase_csf_event_init(kctx);
- kctx->csf.user_reg_vma = NULL;
-
/* Mark all the cookies as 'free' */
bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@@ -1807,6 +1707,10 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
kthread_init_work(&kctx->csf.pending_submission_work,
pending_submission_worker);
+ err = kbasep_ctx_user_reg_page_mapping_init(kctx);
+ if (unlikely(err))
+ goto out_err_tiler_heap_context;
+
return err;
out_err_tiler_heap_context:
@@ -1900,8 +1804,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
else
reset_prevented = true;
- kthread_cancel_work_sync(&kctx->csf.pending_submission_work);
-
rt_mutex_lock(&kctx->csf.lock);
/* Iterate through the queue groups that were not terminated by
@@ -1920,6 +1822,8 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
+ kthread_cancel_work_sync(&kctx->csf.pending_submission_work);
+
/* Now that all queue groups have been terminated, there can be no
* more OoM or timer event interrupts but there can be inflight work
* items. Destroying the wq will implicitly flush those work items.
@@ -1964,7 +1868,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* only one reference left that was taken when queue was
* registered.
*/
- WARN_ON(atomic_read(&queue->refcount) != 1);
+ WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
list_del_init(&queue->link);
release_queue(queue);
}
@@ -1973,6 +1877,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
+ kbasep_ctx_user_reg_page_mapping_term(kctx);
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
kbase_csf_scheduler_context_term(kctx);
@@ -2080,6 +1985,36 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
kbase_event_wakeup_sync(group->kctx);
}
+static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
+{
+ int err;
+ const unsigned int cache_flush_wait_timeout_ms = 2000;
+
+ kbase_pm_lock(kbdev);
+ /* With the advent of partial cache flush, dirty cache lines could
+ * be left in the GPU L2 caches by terminating the queue group here
+ * without waiting for proper cache maintenance. A full cache flush
+ * here will prevent these dirty cache lines from being arbitrarily
+ * evicted later and possible causing memory corruption.
+ */
+ if (kbdev->pm.backend.gpu_powered) {
+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+ err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
+
+ if (err) {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Timeout waiting for cache clean to complete after fatal error",
+ kbase_backend_get_cycle_cnt(kbdev));
+
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
+ }
+ }
+
+ kbase_pm_unlock(kbdev);
+}
+
/**
* kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
*
@@ -2092,8 +2027,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
* notification to allow the firmware to report out-of-memory again in future.
* If the out-of-memory condition was successfully handled then this function
* rings the relevant doorbell to notify the firmware; otherwise, it terminates
- * the GPU command queue group to which the queue is bound. See
- * term_queue_group() for details.
+ * the GPU command queue group to which the queue is bound and notify a waiting
+ * user space client of the failure.
*/
static void kbase_queue_oom_event(struct kbase_queue *const queue)
{
@@ -2164,12 +2099,14 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
- if (err) {
+ if (unlikely(err)) {
dev_warn(
kbdev->dev,
"Queue group to be terminated, couldn't handle the OoM event\n");
+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
kbase_csf_scheduler_unlock(kbdev);
term_queue_group(group);
+ flush_gpu_cache_on_fatal_error(kbdev);
report_tiler_oom_error(group);
return;
}
@@ -2194,6 +2131,7 @@ static void oom_event_worker(struct work_struct *data)
struct kbase_device *const kbdev = kctx->kbdev;
int err = kbase_reset_gpu_try_prevent(kbdev);
+
/* Regardless of whether reset failed or is currently happening, exit
* early
*/
@@ -2246,12 +2184,13 @@ static void timer_event_worker(struct work_struct *data)
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, timer_event_work);
struct kbase_context *const kctx = group->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
bool reset_prevented = false;
- int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
dev_warn(
- kctx->kbdev->dev,
+ kbdev->dev,
"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
group->handle);
else
@@ -2260,11 +2199,12 @@ static void timer_event_worker(struct work_struct *data)
rt_mutex_lock(&kctx->csf.lock);
term_queue_group(group);
+ flush_gpu_cache_on_fatal_error(kbdev);
report_group_timeout_error(group);
rt_mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
- kbase_reset_gpu_allow(kctx->kbdev);
+ kbase_reset_gpu_allow(kbdev);
}
/**
@@ -2272,15 +2212,94 @@ static void timer_event_worker(struct work_struct *data)
*
* @group: Pointer to GPU queue group for which the timeout event is received.
*
+ * Notify a waiting user space client of the timeout.
* Enqueue a work item to terminate the group and notify the event notification
* thread of progress timeout fault for the GPU command queue group.
*/
static void handle_progress_timer_event(struct kbase_queue_group *const group)
{
+ kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
+ DF_PROGRESS_TIMER_TIMEOUT);
+
queue_work(group->kctx->csf.wq, &group->timer_event_work);
}
/**
+ * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected
+ * memory for the protected mode suspend buffer.
+ * @group: Pointer to the GPU queue group.
+ *
+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
+ * negative error value.
+ */
+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
+{
+ struct kbase_device *const kbdev = group->kctx->kbdev;
+ struct kbase_context *kctx = group->kctx;
+ struct tagged_addr *phys = NULL;
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+ size_t nr_pages;
+ int err = 0;
+
+ if (likely(sbuf->pma))
+ return 0;
+
+ nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
+ if (unlikely(!phys)) {
+ err = -ENOMEM;
+ goto phys_free;
+ }
+
+ rt_mutex_lock(&kctx->csf.lock);
+ kbase_csf_scheduler_lock(kbdev);
+
+ if (unlikely(!group->csg_reg)) {
+ /* The only chance of the bound csg_reg is removed from the group is
+ * that it has been put off slot by the scheduler and the csg_reg resource
+ * is contended by other groups. In this case, it needs another occasion for
+ * mapping the pma, which needs a bound csg_reg. Since the group is already
+ * off-slot, returning no error is harmless as the scheduler, when place the
+ * group back on-slot again would do the required MMU map operation on the
+ * allocated and retained pma.
+ */
+ WARN_ON(group->csg_nr >= 0);
+ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
+ group->kctx->tgid, group->kctx->id, group->handle);
+ goto unlock;
+ }
+
+ /* Allocate the protected mode pages */
+ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
+ if (unlikely(!sbuf->pma)) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ /* Map the bound susp_reg to the just allocated pma pages */
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+
+unlock:
+ kbase_csf_scheduler_unlock(kbdev);
+ rt_mutex_unlock(&kctx->csf.lock);
+phys_free:
+ kfree(phys);
+ return err;
+}
+
+static void report_group_fatal_error(struct kbase_queue_group *const group)
+{
+ struct base_gpu_queue_group_error const
+ err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = { .fatal_group = {
+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
+ } } };
+
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ kbase_event_wakeup_sync(group->kctx);
+}
+
+/**
* protm_event_worker - Protected mode switch request event handler
* called from a workqueue.
*
@@ -2292,10 +2311,26 @@ static void protm_event_worker(struct work_struct *data)
{
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, protm_event_work);
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+ int err = 0;
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
group, 0u);
- kbase_csf_scheduler_group_protm_enter(group);
+
+ err = alloc_grp_protected_suspend_buffer_pages(group);
+ if (!err) {
+ kbase_csf_scheduler_group_protm_enter(group);
+ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
+ sbuf->alloc_retries++;
+ /* try again to allocate pages */
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
+ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
+ dev_err(group->kctx->kbdev->dev,
+ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
+ group->handle, group->kctx->tgid, group->kctx->id);
+ report_group_fatal_error(group);
+ }
+
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
group, 0u);
}
@@ -2304,16 +2339,20 @@ static void protm_event_worker(struct work_struct *data)
* handle_fault_event - Handler for CS fault.
*
* @queue: Pointer to queue for which fault event was received.
- * @stream: Pointer to the structure containing info provided by the
- * firmware about the CSI.
- *
- * Prints meaningful CS fault information.
+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
+ * the queue.
*
+ * Print required information about the CS fault and notify the user space client
+ * about the fault.
*/
static void
-handle_fault_event(struct kbase_queue *const queue,
- struct kbase_csf_cmd_stream_info const *const stream)
+handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
{
+ struct kbase_device *const kbdev = queue->kctx->kbdev;
+ struct kbase_csf_cmd_stream_group_info const *ginfo =
+ &kbdev->csf.global_iface.groups[queue->group->csg_nr];
+ struct kbase_csf_cmd_stream_info const *stream =
+ &ginfo->streams[queue->csi_index];
const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
const u64 cs_fault_info =
kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
@@ -2325,7 +2364,6 @@ handle_fault_event(struct kbase_queue *const queue,
CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
const u64 cs_fault_info_exception_data =
CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
- struct kbase_device *const kbdev = queue->kctx->kbdev;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
@@ -2340,6 +2378,36 @@ handle_fault_event(struct kbase_queue *const queue,
kbase_gpu_exception_name(cs_fault_exception_type),
cs_fault_exception_data, cs_fault_info_exception_data);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ /* CS_RESOURCE_TERMINATED type fault event can be ignored from the
+ * standpoint of dump on error. It is used to report fault for the CSIs
+ * that are associated with the same CSG as the CSI for which the actual
+ * fault was reported by the Iterator.
+ * Dumping would be triggered when the actual fault is reported.
+ *
+ * CS_INHERIT_FAULT can also be ignored. It could happen due to the error
+ * in other types of queues (cpu/kcpu). If a fault had occurred in some
+ * other GPU queue then the dump would have been performed anyways when
+ * that fault was reported.
+ */
+ if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
+ (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
+ if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
+ get_queue(queue);
+ queue->cs_error = cs_fault;
+ queue->cs_error_info = cs_fault_info;
+ queue->cs_error_fatal = false;
+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
+ release_queue(queue);
+ return;
+ }
+ }
+#endif
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
}
static void report_queue_fatal_error(struct kbase_queue *const queue,
@@ -2371,22 +2439,25 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
}
/**
- * fatal_event_worker - Handle the fatal error for the GPU queue
+ * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
*
* @data: Pointer to a work_struct embedded in GPU command queue.
*
* Terminate the CSG and report the error to userspace.
*/
-static void fatal_event_worker(struct work_struct *const data)
+static void cs_error_worker(struct work_struct *const data)
{
struct kbase_queue *const queue =
- container_of(data, struct kbase_queue, fatal_event_work);
+ container_of(data, struct kbase_queue, cs_error_work);
struct kbase_context *const kctx = queue->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_queue_group *group;
u8 group_handle;
bool reset_prevented = false;
- int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ int err;
+
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
dev_warn(
@@ -2403,9 +2474,35 @@ static void fatal_event_worker(struct work_struct *const data)
goto unlock;
}
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (!queue->cs_error_fatal) {
+ unsigned long flags;
+ int slot_num;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
+ if (slot_num >= 0) {
+ struct kbase_csf_cmd_stream_group_info const *ginfo =
+ &kbdev->csf.global_iface.groups[slot_num];
+ struct kbase_csf_cmd_stream_info const *stream =
+ &ginfo->streams[queue->csi_index];
+ u32 const cs_ack =
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
+ slot_num, true);
+ }
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ goto unlock;
+ }
+#endif
+
group_handle = group->handle;
term_queue_group(group);
- report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
+ flush_gpu_cache_on_fatal_error(kbdev);
+ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
group_handle);
unlock:
@@ -2421,14 +2518,18 @@ unlock:
* @queue: Pointer to queue for which fatal event was received.
* @stream: Pointer to the structure containing info provided by the
* firmware about the CSI.
+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
+ * the queue.
*
- * Prints meaningful CS fatal information.
+ * Notify a waiting user space client of the CS fatal and prints meaningful
+ * information.
* Enqueue a work item to terminate the group and report the fatal error
* to user space.
*/
static void
handle_fatal_event(struct kbase_queue *const queue,
- struct kbase_csf_cmd_stream_info const *const stream)
+ struct kbase_csf_cmd_stream_info const *const stream,
+ u32 cs_ack)
{
const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
const u64 cs_fatal_info =
@@ -2458,57 +2559,26 @@ handle_fatal_event(struct kbase_queue *const queue,
if (cs_fatal_exception_type ==
CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
queue_work(system_wq, &kbdev->csf.fw_error_work);
} else {
+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
queue->group->cs_unrecoverable = true;
if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(queue->kctx->kbdev);
}
get_queue(queue);
- queue->cs_fatal = cs_fatal;
- queue->cs_fatal_info = cs_fatal_info;
- if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
+ queue->cs_error = cs_fatal;
+ queue->cs_error_info = cs_fatal_info;
+ queue->cs_error_fatal = true;
+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
release_queue(queue);
}
-}
-
-/**
- * handle_queue_exception_event - Handler for CS fatal/fault exception events.
- *
- * @queue: Pointer to queue for which fatal/fault event was received.
- * @cs_req: Value of the CS_REQ register from the CS's input page.
- * @cs_ack: Value of the CS_ACK register from the CS's output page.
- */
-static void handle_queue_exception_event(struct kbase_queue *const queue,
- const u32 cs_req, const u32 cs_ack)
-{
- struct kbase_csf_cmd_stream_group_info const *ginfo;
- struct kbase_csf_cmd_stream_info const *stream;
- struct kbase_context *const kctx = queue->kctx;
- struct kbase_device *const kbdev = kctx->kbdev;
- struct kbase_queue_group *group = queue->group;
- int csi_index = queue->csi_index;
- int slot_num = group->csg_nr;
-
- kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
- ginfo = &kbdev->csf.global_iface.groups[slot_num];
- stream = &ginfo->streams[csi_index];
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FATAL_MASK);
- if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
- handle_fatal_event(queue, stream);
- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
- CS_REQ_FATAL_MASK);
- }
-
- if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
- handle_fault_event(queue, stream);
- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
- CS_REQ_FAULT_MASK);
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
- }
}
/**
@@ -2561,11 +2631,16 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
kbase_csf_firmware_cs_output(stream, CS_ACK);
struct workqueue_struct *wq = group->kctx->csf.wq;
- if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
- (cs_ack & CS_ACK_EXCEPTION_MASK)) {
+ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
+ group, queue, cs_req ^ cs_ack);
+ handle_fatal_event(queue, stream, cs_ack);
+ }
+
+ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
group, queue, cs_req ^ cs_ack);
- handle_queue_exception_event(queue, cs_req, cs_ack);
+ handle_fault_event(queue, cs_ack);
}
/* PROTM_PEND and TILER_OOM can be safely ignored
@@ -2588,12 +2663,17 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
get_queue(queue);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
group, queue, cs_req ^ cs_ack);
- if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
+ if (!queue_work(wq, &queue->oom_event_work)) {
/* The work item shall not have been
* already queued, there can be only
* one pending OoM event for a
* queue.
*/
+ dev_warn(
+ kbdev->dev,
+ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
+ queue->csi_index, group->handle, queue->kctx->tgid,
+ queue->kctx->id);
release_queue(queue);
}
}
@@ -2624,9 +2704,14 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
track->protm_grp = group;
}
+ if (!group->protected_suspend_buf.pma)
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
+
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
clear_bit(group->csg_nr,
scheduler->csg_slots_idle_mask);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
+ scheduler->csg_slots_idle_mask[0]);
dev_dbg(kbdev->dev,
"Group-%d on slot %d de-idled by protm request",
group->handle, group->csg_nr);
@@ -2663,8 +2748,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
return;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
-
ginfo = &kbdev->csf.global_iface.groups[csg_nr];
req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
@@ -2673,7 +2756,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
/* There may not be any pending CSG/CS interrupts to process */
if ((req == ack) && (irqreq == irqack))
- goto out;
+ return;
/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
* examining the CS_ACK & CS_REQ bits. This would ensure that Host
@@ -2694,10 +2777,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
* slot scheduler spinlock is required.
*/
if (!group)
- goto out;
+ return;
if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
- goto out;
+ return;
+
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
kbase_csf_firmware_csg_input_mask(ginfo,
@@ -2714,6 +2799,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
+
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
CSG_REQ_IDLE_MASK);
@@ -2728,7 +2816,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
/* If there are non-idle CSGs waiting for a slot, fire
* a tock for a replacement.
*/
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
+ group, req ^ ack);
kbase_csf_scheduler_invoke_tock(kbdev);
+ } else {
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
+ group, req ^ ack);
}
if (group->scan_seq_num < track->idle_seq) {
@@ -2739,22 +2832,21 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
- CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
+ CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT,
- group, req ^ ack);
- dev_info(kbdev->dev,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
+ req ^ ack);
+ dev_info(
+ kbdev->dev,
"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
- kbase_backend_get_cycle_cnt(kbdev),
- group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
+ kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
+ group->kctx->id, csg_nr);
handle_progress_timer_event(group);
}
process_cs_interrupts(group, ginfo, irqreq, irqack, track);
-out:
- /* group may still be NULL here */
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
}
@@ -2916,6 +3008,10 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
kbase_ipa_control_protm_exited(kbdev);
kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
}
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
}
static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
@@ -2991,94 +3087,124 @@ static void order_job_irq_clear_with_iface_mem_read(void)
* <barrier> <barrier>
* Write to IRQ_RAWSTAT to raise new IRQ Read interface memory
*/
-#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE
- __iomb();
-#else
+
/* CPU and GPU would be in the same Outer shareable domain */
dmb(osh);
-#endif
}
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
- unsigned long flags;
- u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
- struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
+ bool deferred_handling_glb_idle_irq = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
- order_job_irq_clear_with_iface_mem_read();
- if (csg_interrupts != 0) {
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
- /* Looping through and track the highest idle and protm groups */
- while (csg_interrupts != 0) {
- int const csg_nr = ffs(csg_interrupts) - 1;
+ do {
+ unsigned long flags;
+ u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
+ struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
+ bool glb_idle_irq_received = false;
- process_csg_interrupts(kbdev, csg_nr, &track);
- csg_interrupts &= ~(1 << csg_nr);
- }
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+ order_job_irq_clear_with_iface_mem_read();
- /* Handle protm from the tracked information */
- process_tracked_info_for_protm(kbdev, &track);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
- }
+ if (csg_interrupts != 0) {
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ /* Looping through and track the highest idle and protm groups */
+ while (csg_interrupts != 0) {
+ int const csg_nr = ffs(csg_interrupts) - 1;
- if (val & JOB_IRQ_GLOBAL_IF) {
- const struct kbase_csf_global_iface *const global_iface =
- &kbdev->csf.global_iface;
+ process_csg_interrupts(kbdev, csg_nr, &track);
+ csg_interrupts &= ~(1 << csg_nr);
+ }
- kbdev->csf.interrupt_received = true;
+ /* Handle protm from the tracked information */
+ process_tracked_info_for_protm(kbdev, &track);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ }
- if (!kbdev->csf.firmware_reloaded)
- kbase_csf_firmware_reload_completed(kbdev);
- else if (global_iface->output) {
- u32 glb_req, glb_ack;
+ if (val & JOB_IRQ_GLOBAL_IF) {
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
- glb_req = kbase_csf_firmware_global_input_read(
- global_iface, GLB_REQ);
- glb_ack = kbase_csf_firmware_global_output(
- global_iface, GLB_ACK);
- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack);
+ kbdev->csf.interrupt_received = true;
+
+ if (!kbdev->csf.firmware_reloaded)
+ kbase_csf_firmware_reload_completed(kbdev);
+ else if (global_iface->output) {
+ u32 glb_req, glb_ack;
- check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ glb_req =
+ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+ glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
+ glb_req ^ glb_ack);
- if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
- process_protm_exit(kbdev, glb_ack);
+ check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
- /* Handle IDLE Hysteresis notification event */
- if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
- dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
+ process_protm_exit(kbdev, glb_ack);
+
+ /* Handle IDLE Hysteresis notification event */
+ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
+ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
- if (kbase_csf_scheduler_process_gpu_idle_event(kbdev)) {
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_REQ, glb_ack,
- GLB_REQ_IDLE_EVENT_MASK);
- }
+ if (kbase_csf_scheduler_process_gpu_idle_event(kbdev)) {
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_IDLE_EVENT_MASK);
+ }
#else
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_REQ, glb_ack,
- GLB_REQ_IDLE_EVENT_MASK);
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_IDLE_EVENT_MASK);
- kbase_csf_scheduler_process_gpu_idle_event(kbdev);
+ kbase_csf_scheduler_process_gpu_idle_event(kbdev);
#endif
- }
- process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
+ glb_idle_irq_received = true;
+ /* Defer handling this IRQ to account for a race condition
+ * where the idle worker could be executed before we have
+ * finished handling all pending IRQs (including CSG IDLE
+ * IRQs).
+ */
+ deferred_handling_glb_idle_irq = true;
+ }
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
- /* Invoke the MCU state machine as a state transition
- * might have completed.
- */
- kbase_pm_update_state(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ /* Invoke the MCU state machine as a state transition
+ * might have completed.
+ */
+ kbase_pm_update_state(kbdev);
+ }
}
+
+ if (!glb_idle_irq_received)
+ break;
+ /* Attempt to serve potential IRQs that might have occurred
+ * whilst handling the previous IRQ. In case we have observed
+ * the GLB IDLE IRQ without all CSGs having been marked as
+ * idle, the GPU would be treated as no longer idle and left
+ * powered on.
+ */
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+ } while (val);
+
+ if (deferred_handling_glb_idle_irq) {
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_scheduler_process_gpu_idle_event(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
wake_up_all(&kbdev->csf.event_wait);
+
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
}
@@ -3101,7 +3227,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
struct file *filp;
int ret;
- filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
+ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
if (IS_ERR(filp))
return PTR_ERR(filp);
@@ -3122,29 +3248,34 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
{
- if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
- struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+ if (kbdev->csf.user_reg.filp) {
+ struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
- kbase_mem_pool_free(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
- false);
+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+ fput(kbdev->csf.user_reg.filp);
}
}
int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
{
struct tagged_addr phys;
+ struct file *filp;
struct page *page;
u32 *addr;
- int ret;
- kbdev->csf.dummy_user_reg_page = as_tagged(0);
+ kbdev->csf.user_reg.filp = NULL;
- ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
- false, NULL);
+ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
+ if (IS_ERR(filp)) {
+ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
+ return PTR_ERR(filp);
+ }
- if (ret <= 0)
- return ret;
+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+ false, NULL) <= 0) {
+ fput(filp);
+ return -ENOMEM;
+ }
page = as_page(phys);
addr = kmap_atomic(page);
@@ -3154,12 +3285,13 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
*/
addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
- kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
DMA_BIDIRECTIONAL);
kunmap_atomic(addr);
- kbdev->csf.dummy_user_reg_page = phys;
-
+ kbdev->csf.user_reg.filp = filp;
+ kbdev->csf.user_reg.dummy_page = phys;
+ kbdev->csf.user_reg.file_offset = 0;
return 0;
}
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 0b87f50..d2d4163 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,14 +40,17 @@
*/
#define KBASEP_USER_DB_NR_INVALID ((s8)-1)
+/* Number of pages used for GPU command queue's User input & output data */
+#define KBASEP_NUM_CS_USER_IO_PAGES (2)
+
/* Indicates an invalid value for the scan out sequence number, used to
* signify there is no group that has protected mode execution pending.
*/
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
/* 60ms optimizes power while minimizing latency impact for UI test cases. */
-#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (60)
#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US (600)
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */
/* Idle hysteresis time can be scaled down when GPU sleep feature is used */
#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -126,6 +129,25 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_terminate *term);
/**
+ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
+ * for a queue at the time of bind.
+ *
+ * @kctx: Address of the kbase context within which the queue was created.
+ * @queue: Pointer to the queue to be unlinked.
+ *
+ * This function will free the pair of physical pages allocated for a GPU
+ * command queue, and also release the hardware doorbell page, that were mapped
+ * into the process address space to enable direct submission of commands to
+ * the hardware. Also releases the reference taken on the queue when the mapping
+ * was created.
+ *
+ * If an explicit or implicit unbind was missed by the userspace then the
+ * mapping will persist. On process exit kernel itself will remove the mapping.
+ */
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
+ struct kbase_queue *queue);
+
+/**
* kbase_csf_alloc_command_stream_user_pages - Allocate resources for a
* GPU command queue.
*
@@ -188,6 +210,20 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_kick *kick);
/**
+ * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding
+ * to the indicated handle.
+ *
+ * @kctx: The kbase context under which the queue group exists.
+ * @group_handle: Handle for the group which uniquely identifies it within
+ * the context with which it was created.
+ *
+ * This function is used to find the queue group when passed a handle.
+ *
+ * Return: Pointer to a queue group on success, NULL on failure
+ */
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle);
+
+/**
* kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
* is valid.
*
@@ -240,6 +276,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
*/
void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
/**
* kbase_csf_queue_group_suspend - Suspend a GPU command queue group
*
@@ -257,6 +294,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
*/
int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
+#endif
/**
* kbase_csf_add_group_fatal_error - Report a fatal group error to userspace
@@ -466,4 +504,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
return 0;
#endif
}
+
#endif /* _KBASE_CSF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index e598f8b..a45b588 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -29,6 +29,132 @@
#include "mali_kbase_csf_tl_reader.h"
#include <linux/version_compat_defs.h>
+/* Wait time to be used cumulatively for all the CSG slots.
+ * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be
+ * any other Host request pending on the FW side and usually FW would be responsive
+ * to the Doorbell IRQs as it won't do any polling for a long time and also it won't
+ * have to wait for any HW state transition to complete for publishing the status.
+ * So it is reasonable to expect that handling of STATUS_UPDATE request would be
+ * relatively very quick.
+ */
+#define STATUS_UPDATE_WAIT_TIMEOUT 500
+
+/* The bitmask of CSG slots for which the STATUS_UPDATE request completed.
+ * The access to it is serialized with scheduler lock, so at a time it would
+ * get used either for "active_groups" or per context "groups" debugfs file.
+ */
+static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS);
+
+static
+bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr)
+{
+ struct kbase_csf_cmd_stream_group_info const *const ginfo =
+ &kbdev->csf.global_iface.groups[csg_nr];
+
+ return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
+ kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
+ CSG_REQ_STATUS_UPDATE_MASK);
+}
+
+static
+bool csg_slots_status_update_finish(struct kbase_device *kbdev,
+ const unsigned long *slots_mask)
+{
+ const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+ bool changed = false;
+ u32 csg_nr;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ for_each_set_bit(csg_nr, slots_mask, max_csg_slots) {
+ if (csg_slot_status_update_finish(kbdev, csg_nr)) {
+ set_bit(csg_nr, csg_slots_status_updated);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
+ unsigned long *slots_mask)
+{
+ const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+ long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT);
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ bitmap_zero(csg_slots_status_updated, max_csg_slots);
+
+ while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) {
+ remaining = wait_event_timeout(kbdev->csf.event_wait,
+ csg_slots_status_update_finish(kbdev, slots_mask),
+ remaining);
+ if (likely(remaining)) {
+ bitmap_andnot(slots_mask, slots_mask,
+ csg_slots_status_updated, max_csg_slots);
+ } else {
+ dev_warn(kbdev->dev,
+ "STATUS_UPDATE request timed out for slots 0x%lx",
+ slots_mask[0]);
+ }
+ }
+}
+
+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev)
+{
+ u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+ DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
+ u32 csg_nr;
+ unsigned long flags;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
+ * ring for Extract offset update, shall not be made when MCU has been
+ * put to sleep otherwise it will undesirably make MCU exit the sleep
+ * state. Also it isn't really needed as FW will implicitly update the
+ * status of all on-slot groups when MCU sleep request is sent to it.
+ */
+ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
+ /* Wait for the MCU sleep request to complete. */
+ kbase_pm_wait_for_desired_state(kbdev);
+ bitmap_copy(csg_slots_status_updated,
+ kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
+ return;
+ }
+
+ for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) {
+ struct kbase_queue_group *const group =
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+ if (!group)
+ continue;
+ /* Ring the User doorbell for FW to update the Extract offset */
+ kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
+ set_bit(csg_nr, used_csgs);
+ }
+
+ /* Return early if there are no on-slot groups */
+ if (bitmap_empty(used_csgs, max_csg_slots))
+ return;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ for_each_set_bit(csg_nr, used_csgs, max_csg_slots) {
+ struct kbase_csf_cmd_stream_group_info const *const ginfo =
+ &kbdev->csf.global_iface.groups[csg_nr];
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
+ ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
+ CSG_REQ_STATUS_UPDATE_MASK);
+ }
+
+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE));
+ kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ wait_csg_slots_status_update_finish(kbdev, used_csgs);
+ /* Wait for the User doobell ring to take effect */
+ msleep(100);
+}
+
#define MAX_SCHED_STATE_STRING_LEN (16)
static const char *scheduler_state_to_string(struct kbase_device *kbdev,
enum kbase_csf_scheduler_state sched_state)
@@ -287,54 +413,6 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
seq_puts(file, "\n");
}
-static void update_active_group_status(struct seq_file *file,
- struct kbase_queue_group *const group)
-{
- struct kbase_device *const kbdev = group->kctx->kbdev;
- struct kbase_csf_cmd_stream_group_info const *const ginfo =
- &kbdev->csf.global_iface.groups[group->csg_nr];
- long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
- unsigned long flags;
-
- /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
- * ring for Extract offset update, shall not be made when MCU has been
- * put to sleep otherwise it will undesirably make MCU exit the sleep
- * state. Also it isn't really needed as FW will implicitly update the
- * status of all on-slot groups when MCU sleep request is sent to it.
- */
- if (kbdev->csf.scheduler.state == SCHED_SLEEPING)
- return;
-
- /* Ring the User doobell shared between the queues bound to this
- * group, to have FW update the CS_EXTRACT for all the queues
- * bound to the group. Ring early so that FW gets adequate time
- * for the handling.
- */
- kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
-
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
- ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
- CSG_REQ_STATUS_UPDATE_MASK);
- kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
- remaining = wait_event_timeout(kbdev->csf.event_wait,
- !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
- kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
- CSG_REQ_STATUS_UPDATE_MASK), remaining);
-
- if (!remaining) {
- dev_err(kbdev->dev,
- "Timed out for STATUS_UPDATE on group %d on slot %d",
- group->handle, group->csg_nr);
-
- seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
- group->csg_nr);
- seq_puts(file, "*** The following group-record is likely stale\n");
- }
-}
-
static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
struct kbase_queue_group *const group)
{
@@ -348,8 +426,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
u8 slot_priority =
kbdev->csf.scheduler.csg_slots[group->csg_nr].priority;
- update_active_group_status(file, group);
-
ep_c = kbase_csf_firmware_csg_output(ginfo,
CSG_STATUS_EP_CURRENT);
ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ);
@@ -365,6 +441,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
CSG_STATUS_STATE_IDLE_MASK)
idle = 'Y';
+ if (!test_bit(group->csg_nr, csg_slots_status_updated)) {
+ seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
+ group->csg_nr);
+ seq_puts(file, "*** The following group-record is likely stale\n");
+ }
+
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
group->handle,
@@ -380,10 +462,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
exclusive,
idle);
-
- /* Wait for the User doobell ring to take effect */
- if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
- msleep(100);
} else {
seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
seq_printf(file, "%7d, %6d, %9d, %8d\n",
@@ -421,22 +499,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
{
u32 gr;
struct kbase_context *const kctx = file->private;
- struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_device *kbdev;
if (WARN_ON(!kctx))
return -EINVAL;
+ kbdev = kctx->kbdev;
+
seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n",
MALI_CSF_CSG_DEBUGFS_VERSION);
rt_mutex_lock(&kctx->csf.lock);
kbase_csf_scheduler_lock(kbdev);
- if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
- /* Wait for the MCU sleep request to complete. Please refer the
- * update_active_group_status() function for the explanation.
- */
- kbase_pm_wait_for_desired_state(kbdev);
- }
+ kbase_csf_debugfs_update_active_groups_status(kbdev);
for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
struct kbase_queue_group *const group =
kctx->csf.queue_groups[gr];
@@ -470,12 +545,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
MALI_CSF_CSG_DEBUGFS_VERSION);
kbase_csf_scheduler_lock(kbdev);
- if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
- /* Wait for the MCU sleep request to complete. Please refer the
- * update_active_group_status() function for the explanation.
- */
- kbase_pm_wait_for_desired_state(kbdev);
- }
+ kbase_csf_debugfs_update_active_groups_status(kbdev);
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h
index 397e657..16a548b 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,4 +44,11 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx);
*/
void kbase_csf_debugfs_init(struct kbase_device *kbdev);
+/**
+ * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses
+ *
+ * @kbdev: Pointer to the device
+ */
+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev);
+
#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index 520a41b..b742f97 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,13 @@
#include <linux/wait.h>
#include "mali_kbase_csf_firmware.h"
+#include "mali_kbase_refcount_defs.h"
#include "mali_kbase_csf_event.h"
+#include <uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h>
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
/* Maximum number of KCPU command queues to be created per GPU address space.
*/
@@ -264,6 +270,8 @@ enum kbase_queue_group_priority {
* @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
* to a ping from KBase.
* @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ * of a MMU operation
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
@@ -275,6 +283,7 @@ enum kbase_timeout_selector {
CSF_FIRMWARE_BOOT_TIMEOUT,
CSF_FIRMWARE_PING_TIMEOUT,
CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
+ MMU_AS_INACTIVE_WAIT_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -297,9 +306,9 @@ struct kbase_csf_notification {
*
* @kctx: Pointer to the base context with which this GPU command queue
* is associated.
- * @reg: Pointer to the region allocated from the shared
- * interface segment for mapping the User mode
- * input/output pages in MCU firmware address space.
+ * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only
+ * valid (i.e. not 0 ) when the queue is enabled and its owner
+ * group has a runtime bound csg_reg (group region).
* @phys: Pointer to the physical pages allocated for the
* pair or User mode input/output page
* @user_io_addr: Pointer to the permanent kernel mapping of User mode
@@ -355,14 +364,19 @@ struct kbase_csf_notification {
* @trace_buffer_size: CS trace buffer size for the queue.
* @trace_cfg: CS trace configuration parameters.
* @error: GPU command queue fatal information to pass to user space.
- * @fatal_event_work: Work item to handle the CS fatal event reported for this
- * queue.
- * @cs_fatal_info: Records additional information about the CS fatal event.
- * @cs_fatal: Records information about the CS fatal event.
+ * @cs_error_work: Work item to handle the CS fatal event reported for this
+ * queue or the CS fault event if dump on fault is enabled
+ * and acknowledgment for CS fault event needs to be done
+ * after dumping is complete.
+ * @cs_error_info: Records additional information about the CS fatal event or
+ * about CS fault event if dump on fault is enabled.
+ * @cs_error: Records information about the CS fatal event or
+ * about CS fault event if dump on fault is enabled.
+ * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred.
* @pending: Indicating whether the queue has new submitted work.
- * @extract_ofs: The current EXTRACT offset, this is updated during certain
- * events such as GPU idle IRQ in order to help detect a
- * queue's true idle status.
+ * @extract_ofs: The current EXTRACT offset, this is only updated when handling
+ * the GLB IDLE IRQ if the idle timeout value is non-0 in order
+ * to help detect a queue's true idle status.
* @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
* group to which queue is bound is suspended.
* This can be useful in certain cases to know that till which
@@ -370,14 +384,14 @@ struct kbase_csf_notification {
*/
struct kbase_queue {
struct kbase_context *kctx;
- struct kbase_va_region *reg;
+ u64 user_io_gpu_va;
struct tagged_addr phys[2];
char *user_io_addr;
u64 handle;
int doorbell_nr;
unsigned long db_file_offset;
struct list_head link;
- atomic_t refcount;
+ kbase_refcount_t refcount;
struct kbase_queue_group *group;
struct kbase_va_region *queue_reg;
struct work_struct oom_event_work;
@@ -397,39 +411,47 @@ struct kbase_queue {
u32 trace_buffer_size;
u32 trace_cfg;
struct kbase_csf_notification error;
- struct work_struct fatal_event_work;
- u64 cs_fatal_info;
- u32 cs_fatal;
+ struct work_struct cs_error_work;
+ u64 cs_error_info;
+ u32 cs_error;
+ bool cs_error_fatal;
atomic_t pending;
u64 extract_ofs;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u64 saved_cmd_ptr;
-#endif
+#endif /* CONFIG_DEBUG_FS */
};
/**
* struct kbase_normal_suspend_buffer - Object representing a normal
* suspend buffer for queue group.
- * @reg: Memory region allocated for the normal-mode suspend buffer.
+ * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this
+ * field is only valid when the owner group has a region bound at
+ * runtime.
* @phy: Array of physical memory pages allocated for the normal-
* mode suspend buffer.
*/
struct kbase_normal_suspend_buffer {
- struct kbase_va_region *reg;
+ u64 gpu_va;
struct tagged_addr *phy;
};
/**
* struct kbase_protected_suspend_buffer - Object representing a protected
* suspend buffer for queue group.
- * @reg: Memory region allocated for the protected-mode suspend buffer.
+ * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer.
+ * Note, this field is only valid when the owner group has a region
+ * bound at runtime.
* @pma: Array of pointer to protected mode allocations containing
* information about memory pages allocated for protected mode
* suspend buffer.
+ * @alloc_retries: Number of times we retried allocing physical pages
+ * for protected suspend buffers.
*/
struct kbase_protected_suspend_buffer {
- struct kbase_va_region *reg;
+ u64 gpu_va;
struct protected_memory_allocation **pma;
+ u8 alloc_retries;
};
/**
@@ -498,6 +520,16 @@ struct kbase_protected_suspend_buffer {
* to be returned to userspace if such an error has occurred.
* @timer_event_work: Work item to handle the progress timeout fatal event
* for the group.
+ * @deschedule_deferred_cnt: Counter keeping a track of the number of threads
+ * that tried to deschedule the group and had to defer
+ * the descheduling due to the dump on fault.
+ * @csg_reg: An opaque pointer to the runtime bound shared regions. It is
+ * dynamically managed by the scheduler and can be NULL if the
+ * group is off-slot.
+ * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts.
+ * It is accumulated on consecutive mapping attempt failures. On
+ * reaching a preset limit, the group is regarded as suffered
+ * a fatal error and triggers a fatal error notification.
*/
struct kbase_queue_group {
struct kbase_context *kctx;
@@ -539,6 +571,17 @@ struct kbase_queue_group {
struct work_struct timer_event_work;
+ /**
+ * @dvs_buf: Address and size of scratch memory.
+ *
+ * Used to store intermediate DVS data by the GPU.
+ */
+ u64 dvs_buf;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ u32 deschedule_deferred_cnt;
+#endif
+ void *csg_reg;
+ u8 csg_reg_bind_retries;
};
/**
@@ -548,10 +591,10 @@ struct kbase_queue_group {
* @lock: Lock preventing concurrent access to @array and the @in_use bitmap.
* @array: Array of pointers to kernel CPU command queues.
* @in_use: Bitmap which indicates which kernel CPU command queues are in use.
- * @num_cmds: The number of commands that have been enqueued across
- * all the KCPU command queues. This could be used as a
- * timestamp to determine the command's enqueueing time.
- * @jit_lock: Lock protecting jit_cmds_head and jit_blocked_queues.
+ * @cmd_seq_num: The sequence number assigned to an enqueued command,
+ * in incrementing order (older commands shall have a
+ * smaller number).
+ * @jit_lock: Lock to serialise JIT operations.
* @jit_cmds_head: A list of the just-in-time memory commands, both
* allocate & free, in submission order, protected
* by kbase_csf_kcpu_queue_context.lock.
@@ -564,8 +607,9 @@ struct kbase_csf_kcpu_queue_context {
struct mutex lock;
struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES];
DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
- atomic64_t num_cmds;
- spinlock_t jit_lock;
+ atomic64_t cmd_seq_num;
+
+ struct mutex jit_lock;
struct list_head jit_cmds_head;
struct list_head jit_blocked_queues;
};
@@ -626,8 +670,6 @@ struct kbase_csf_heap_context_allocator {
* @ctx_alloc: Allocator for heap context structures.
* @nr_of_heaps: Total number of tiler heaps that were added during the
* life time of the context.
- * @est_count_pages: Estimated potentially freeable pages from all the heaps
- * on the @list.
*
* This contains all of the CSF state relating to chunked tiler heaps for one
* @kbase_context. It is not the same as a heap context structure allocated by
@@ -638,38 +680,27 @@ struct kbase_csf_tiler_heap_context {
struct list_head list;
struct kbase_csf_heap_context_allocator ctx_alloc;
u64 nr_of_heaps;
- atomic_t est_count_pages;
};
-#define CSF_CTX_RECLAIM_CANDI_FLAG (1ul << 0)
-#define CSF_CTX_RECLAIM_SCAN_FLAG (1ul << 1)
/**
- * struct kbase_kctx_heap_info - Object representing the data section of a kctx
- * for tiler heap reclaim manger
- * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists
- * @attach_jiffies: jiffies when the kctx is attached to the reclaim manager.
- * @nr_scan_pages: Number of a better estimated freeable pages from the kctx
- * after all its CSGs are off-slots and have been properly
- * gone through the freeable pages count process. This field
- * is updated when the kctx is moved to the reclaim manager's
- * pending scan (freeing) action list, after the counting.
- * @nr_est_pages: Estimated number of pages of the kctx when all its CSGs are
- * off-slot. This is a nominal value used for estimating an
- * available page counts from the kctx. The kctx is on the
- * reclaim manager's candidate list, waiting for count.
- * @flags: reflecting the kctx's internal state in relation to the
- * scheduler's heap reclaim manager.
- * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a
- * kctx has groups on-slot, the scheduler will detach it from
- * the tiler heap reclaim manager, i.e. no tiler heap memory
- * reclaiming operations on the kctx.
+ * struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of
+ * a kctx for tiler heap reclaim manger
+ * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists
+ * @nr_freed_pages: Number of freed pages from the the kctx, after its attachment
+ * to the reclaim manager. This is used for tracking reclaim's
+ * free operation progress.
+ * @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx
+ * when all its CSGs are off-slot, on attaching to the reclaim
+ * manager.
+ * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a
+ * kctx has groups on-slot, the scheduler will detach it from
+ * the tiler heap reclaim manager, i.e. no tiler heap memory
+ * reclaiming operations on the kctx.
*/
-struct kbase_kctx_heap_info {
+struct kbase_csf_ctx_heap_reclaim_info {
struct list_head mgr_link;
- unsigned long attach_jiffies;
- u32 nr_scan_pages;
- u32 nr_est_pages;
- u16 flags;
+ u32 nr_freed_pages;
+ u32 nr_est_unused_pages;
u8 on_slot_grps;
};
@@ -696,8 +727,8 @@ struct kbase_kctx_heap_info {
* 'groups_to_schedule' list of scheduler instance.
* @heap_info: Heap reclaim information data of the kctx. As the
* reclaim action needs to be coordinated with the scheduler
- * operations, the data is placed inside the scheduler's
- * context object for this linkage.
+ * operations, any manipulations on the data needs holding
+ * the scheduler's mutex lock.
*/
struct kbase_csf_scheduler_context {
struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
@@ -707,7 +738,7 @@ struct kbase_csf_scheduler_context {
struct kthread_worker sync_update_worker;
struct kthread_work sync_update_work;
u32 ngrp_to_schedule;
- struct kbase_kctx_heap_info heap_info;
+ struct kbase_csf_ctx_heap_reclaim_info heap_info;
};
/**
@@ -748,6 +779,23 @@ struct kbase_csf_event {
};
/**
+ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping
+ * of USER Register page for a context.
+ *
+ * @vma: Pointer to the VMA corresponding to the virtual mapping
+ * of the USER register page.
+ * @file_offset: File offset value that is assigned to userspace mapping
+ * of the USER Register page. It is in page units.
+ * @link: Links the context to the device list when mapping is pointing to
+ * either the dummy or the real Register page.
+ */
+struct kbase_csf_user_reg_context {
+ struct vm_area_struct *vma;
+ u32 file_offset;
+ struct list_head link;
+};
+
+/**
* struct kbase_csf_context - Object representing CSF for a GPU address space.
*
* @event_pages_head: A list of pages allocated for the event memory used by
@@ -785,14 +833,12 @@ struct kbase_csf_event {
* used by GPU command queues, and progress timeout events.
* @link: Link to this csf context in the 'runnable_kctxs' list of
* the scheduler instance
- * @user_reg_vma: Pointer to the vma corresponding to the virtual mapping
- * of the USER register page. Currently used only for sanity
- * checking.
* @sched: Object representing the scheduler's context
* @pending_submission_worker: Worker for the pending submission work item
* @pending_submission_work: Work item to process pending kicked GPU command queues.
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
+ * @user_reg: Collective information to support mapping to USER Register page.
*/
struct kbase_csf_context {
struct list_head event_pages_head;
@@ -807,13 +853,13 @@ struct kbase_csf_context {
struct kbase_csf_tiler_heap_context tiler_heaps;
struct workqueue_struct *wq;
struct list_head link;
- struct vm_area_struct *user_reg_vma;
struct kbase_csf_scheduler_context sched;
struct kthread_worker pending_submission_worker;
struct kthread_work pending_submission_work;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_cpu_queue_context cpu_queue;
#endif
+ struct kbase_csf_user_reg_context user_reg;
};
/**
@@ -858,19 +904,43 @@ struct kbase_csf_csg_slot {
* struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
* kctx lists inside the CSF device's scheduler.
*
- * @candidate_ctxs: List of kctxs that have all their CSGs off-slots. Candidates
- * are ready for reclaim count examinations.
- * @scan_list_ctxs: List counted kctxs, ready for reclaim scan operations.
- * @est_cand_pages: Estimated pages based on chunks that could be free-able from the
- * candidate list. For each addition of an acandidate, the number is
- * increased with an estimate, and decreased vice versa.
- * @mgr_scan_pages: Number of pagess free-able in the scan list, device wide.
+ * @heap_reclaim: Tiler heap reclaim shrinker object.
+ * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The
+ * lists track the kctxs attached to the reclaim manager.
+ * @unused_pages: Estimated number of unused pages from the @ctxlist array. The
+ * number is indicative for use with reclaim shrinker's count method.
*/
struct kbase_csf_sched_heap_reclaim_mgr {
- struct list_head candidate_ctxs;
- struct list_head scan_list_ctxs;
- atomic_t est_cand_pages;
- atomic_t mgr_scan_pages;
+ struct shrinker heap_reclaim;
+ struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+ atomic_t unused_pages;
+};
+
+/**
+ * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared
+ * interface segment regions for scheduler
+ * operations
+ *
+ * @array_csg_regs: Base pointer of an internally created array_csg_regs[].
+ * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a
+ * group that is placed onto on-slot by the scheduler, it is dropped
+ * from the list (i.e busy active). The Scheduler will put an active
+ * item back when it's becoming off-slot (not in use).
+ * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal
+ * and pmode suspend buffers, as a default replacement of a CSG's pages
+ * for the MMU mapping when the csg_reg is not bound to a group.
+ * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with
+ * protected suspend buffer MMU map operations.
+ * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags.
+ * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true.
+ */
+struct kbase_csf_mcu_shared_regions {
+ void *array_csg_regs;
+ struct list_head unused_csg_regs;
+ struct tagged_addr *dummy_phys;
+ struct tagged_addr *pma_phys;
+ unsigned long userio_mem_rd_flags;
+ bool dummy_phys_allocated;
};
/**
@@ -968,6 +1038,13 @@ struct kbase_csf_sched_heap_reclaim_mgr {
* handler.
* @gpu_idle_work: Work item for facilitating the scheduler to bring
* the GPU to a low-power mode on becoming idle.
+ * @fast_gpu_idle_handling: Indicates whether to relax many of the checks
+ * normally done in the GPU idle worker. This is
+ * set to true when handling the GLB IDLE IRQ if the
+ * idle hysteresis timeout is 0, since it makes it
+ * possible to receive this IRQ before the extract
+ * offset is published (which would cause more
+ * extensive GPU idle checks to fail).
* @gpu_no_longer_idle: Effective only when the GPU idle worker has been
* queued for execution, this indicates whether the
* GPU has become non-idle since the last time the
@@ -1015,6 +1092,9 @@ struct kbase_csf_sched_heap_reclaim_mgr {
* needs to be done by the Host.
* @protm_enter_time: GPU protected mode enter time.
* @reclaim_mgr: CSGs tiler heap manager object.
+ * @mcu_regs_data: Scheduler MCU shared regions data for managing the
+ * shared interface mappings for on-slot queues and
+ * CSG suspend buffers.
*/
struct kbase_csf_scheduler {
struct mutex lock;
@@ -1046,8 +1126,13 @@ struct kbase_csf_scheduler {
struct kbase_context *top_ctx;
struct kbase_queue_group *top_grp;
struct kbase_queue_group *active_protm_grp;
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
struct delayed_work gpu_idle_work;
+#else
+ struct work_struct gpu_idle_work;
+#endif
struct workqueue_struct *idle_wq;
+ bool fast_gpu_idle_handling;
atomic_t gpu_no_longer_idle;
atomic_t non_idle_offslot_grps;
u32 non_idle_scanout_grps;
@@ -1059,10 +1144,11 @@ struct kbase_csf_scheduler {
struct work_struct sc_rails_off_work;
bool sc_power_rails_off;
bool gpu_idle_work_pending;
-#endif
bool gpu_idle_fw_timer_enabled;
+#endif
ktime_t protm_enter_time;
struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
+ struct kbase_csf_mcu_shared_regions mcu_regs_data;
};
/*
@@ -1249,6 +1335,7 @@ struct kbase_ipa_control {
* @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
* @data_start: Offset into firmware image at which the interface data starts
* @data_end: Offset into firmware image at which the interface data ends
+ * @virtual_exe_start: Starting GPU execution virtual address of this interface
* @kernel_map: A kernel mapping of the memory or NULL if not required to be
* mapped in the kernel
* @pma: Array of pointers to protected memory allocations.
@@ -1265,6 +1352,7 @@ struct kbase_csf_firmware_interface {
u32 flags;
u32 data_start;
u32 data_end;
+ u32 virtual_exe_start;
void *kernel_map;
struct protected_memory_allocation **pma;
};
@@ -1326,12 +1414,91 @@ enum kbase_csf_firmware_log_mode {
* at regular intervals to perform any periodic
* activities required by current log mode.
* @dump_buf: Buffer used for dumping the log.
+ * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions.
+ * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions.
*/
struct kbase_csf_firmware_log {
enum kbase_csf_firmware_log_mode mode;
atomic_t busy;
struct delayed_work poll_work;
u8 *dump_buf;
+ u32 func_call_list_va_start;
+ u32 func_call_list_va_end;
+};
+
+/**
+ * struct kbase_csf_firmware_core_dump - Object containing members for handling
+ * firmware core dump.
+ *
+ * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer
+ * in Firmware.
+ * @version: Version of the FW image header core dump data format. Bits
+ * 7:0 specify version minor and 15:8 specify version major.
+ * @available: Flag to identify if the FW core dump buffer is available.
+ * True if entry is available in the FW image header and version
+ * is supported, False otherwise.
+ */
+struct kbase_csf_firmware_core_dump {
+ u32 mcu_regs_addr;
+ u16 version;
+ bool available;
+};
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+/**
+ * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
+ *
+ * @error_code: Error code.
+ * @kctx_tgid: tgid value of the Kbase context for which the fault happened.
+ * @kctx_id: id of the Kbase context for which the fault happened.
+ * @enabled: Flag to indicate that 'csf_fault' debugfs has been opened
+ * so dump on fault is enabled.
+ * @fault_wait_wq: Waitqueue on which user space client is blocked till kbase
+ * reports a fault.
+ * @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client
+ * completes the dump on fault.
+ * @lock: Lock to protect this struct members from concurrent access.
+ */
+struct kbase_csf_dump_on_fault {
+ enum dumpfault_error_type error_code;
+ u32 kctx_tgid;
+ u32 kctx_id;
+ atomic_t enabled;
+ wait_queue_head_t fault_wait_wq;
+ wait_queue_head_t dump_wait_wq;
+ spinlock_t lock;
+};
+#endif /* CONFIG_DEBUG_FS*/
+
+/**
+ * struct kbase_csf_user_reg - Object containing members to manage the mapping
+ * of USER Register page for all contexts
+ *
+ * @dummy_page: Address of a dummy page that is mapped in place
+ * of the real USER Register page just before the GPU
+ * is powered down. The USER Register page is mapped
+ * in the address space of every process, that created
+ * a Base context, to enable the access to LATEST_FLUSH
+ * register from userspace.
+ * @filp: Pointer to a dummy file, that along with @file_offset,
+ * facilitates the use of unique file offset for the userspace mapping
+ * created for USER Register page.
+ * The userspace mapping is made to point to this file
+ * inside the mmap handler.
+ * @file_offset: Counter that is incremented every time Userspace creates a mapping of
+ * USER Register page, to provide a unique file offset range for
+ * @filp file, so that the CPU PTE of the Userspace mapping can be zapped
+ * through the kernel function unmap_mapping_range().
+ * It is incremented in page units.
+ * @list: Linked list to maintain user processes(contexts)
+ * having the mapping to USER Register page.
+ * It's protected by &kbase_csf_device.reg_lock.
+ */
+struct kbase_csf_user_reg {
+ struct tagged_addr dummy_page;
+ struct file *filp;
+ u32 file_offset;
+ struct list_head list;
};
/**
@@ -1371,20 +1538,6 @@ struct kbase_csf_firmware_log {
* of the real Hw doorbell page for the active GPU
* command queues after they are stopped or after the
* GPU is powered down.
- * @dummy_user_reg_page: Address of the dummy page that is mapped in place
- * of the real User register page just before the GPU
- * is powered down. The User register page is mapped
- * in the address space of every process, that created
- * a Base context, to enable the access to LATEST_FLUSH
- * register from userspace.
- * @nr_user_page_mapped: The number of clients using the mapping of USER page.
- * This is used to maintain backward compatibility.
- * It's protected by @reg_lock.
- * @mali_file_inode: Pointer to the inode corresponding to mali device
- * file. This is needed in order to switch to the
- * @dummy_user_reg_page on GPU power down.
- * All instances of the mali device file will point to
- * the same inode. It's protected by @reg_lock.
* @reg_lock: Lock to serialize the MCU firmware related actions
* that affect all contexts such as allocation of
* regions from shared interface area, assignment of
@@ -1439,9 +1592,9 @@ struct kbase_csf_firmware_log {
* the glb_pwoff register. This is separated from
* the @p mcu_core_pwroff_dur_count as an update
* to the latter is asynchronous.
- * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
- * window in unit of ms. The firmware does not use it
- * directly.
+ * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
+ * window in unit of microseconds. The firmware does not
+ * use it directly.
* @gpu_idle_dur_count: The counterpart of the hysteresis time window in
* interface required format, ready to be used
* directly in the firmware.
@@ -1451,7 +1604,11 @@ struct kbase_csf_firmware_log {
* HW counters.
* @fw: Copy of the loaded MCU firmware image.
* @fw_log: Contain members required for handling firmware log.
- * @tiler_heap_reclaim: Tiler heap reclaim shrinker object.
+ * @fw_core_dump: Contain members required for handling the firmware
+ * core dump.
+ * @dof: Structure for dump on fault.
+ * @user_reg: Collective information to support the mapping to
+ * USER Register page for user processes.
*/
struct kbase_csf_device {
struct kbase_mmu_table mcu_mmu;
@@ -1465,9 +1622,6 @@ struct kbase_csf_device {
struct file *db_filp;
u32 db_file_offsets;
struct tagged_addr dummy_db_page;
- struct tagged_addr dummy_user_reg_page;
- u32 nr_user_page_mapped;
- struct inode *mali_file_inode;
struct mutex reg_lock;
wait_queue_head_t event_wait;
bool interrupt_received;
@@ -1489,13 +1643,23 @@ struct kbase_csf_device {
u32 mcu_core_pwroff_dur_us;
u32 mcu_core_pwroff_dur_count;
u32 mcu_core_pwroff_reg_shadow;
- u32 gpu_idle_hysteresis_ms;
+ u32 gpu_idle_hysteresis_us;
u32 gpu_idle_dur_count;
unsigned int fw_timeout_ms;
struct kbase_csf_hwcnt hwcnt;
struct kbase_csf_mcu_fw fw;
struct kbase_csf_firmware_log fw_log;
- struct shrinker tiler_heap_reclaim;
+ struct kbase_csf_firmware_core_dump fw_core_dump;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_csf_dump_on_fault dof;
+#endif /* CONFIG_DEBUG_FS */
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ /**
+ * @coresight: Coresight device structure.
+ */
+ struct kbase_debug_coresight_device coresight;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+ struct kbase_csf_user_reg user_reg;
};
/**
@@ -1512,6 +1676,10 @@ struct kbase_csf_device {
* @bf_data: Data relating to Bus fault.
* @gf_data: Data relating to GPU fault.
* @current_setup: Stores the MMU configuration for this address space.
+ * @is_unresponsive: Flag to indicate MMU is not responding.
+ * Set if a MMU command isn't completed within
+ * &kbase_device:mmu_as_inactive_wait_time_ms.
+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -1523,6 +1691,7 @@ struct kbase_as {
struct kbase_fault bf_data;
struct kbase_fault gf_data;
struct kbase_mmu_setup current_setup;
+ bool is_unresponsive;
};
#endif /* _KBASE_CSF_DEFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_event.c b/mali_kbase/csf/mali_kbase_csf_event.c
index 52a6b10..63e6c15 100644
--- a/mali_kbase/csf/mali_kbase_csf_event.c
+++ b/mali_kbase/csf/mali_kbase_csf_event.c
@@ -169,7 +169,8 @@ void kbase_csf_event_term(struct kbase_context *kctx)
kfree(event_cb);
}
- WARN_ON(!list_empty(&kctx->csf.event.error_list));
+ WARN(!list_empty(&kctx->csf.event.error_list),
+ "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id);
spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
}
@@ -244,6 +245,14 @@ bool kbase_csf_event_error_pending(struct kbase_context *kctx)
bool error_pending = false;
unsigned long flags;
+ /* Withhold the error event if the dump on fault is ongoing.
+ * This would prevent the Userspace from taking error recovery actions
+ * (which can potentially affect the state that is being dumped).
+ * Event handling thread would eventually notice the error event.
+ */
+ if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev)))
+ return false;
+
spin_lock_irqsave(&kctx->csf.event.lock, flags);
error_pending = !list_empty(&kctx->csf.event.error_list);
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 3e63952..9d1b515 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -22,6 +22,7 @@
#include "mali_kbase.h"
#include "mali_kbase_csf_firmware_cfg.h"
#include "mali_kbase_csf_firmware_log.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
#include "mali_kbase_csf_trace_buffer.h"
#include "mali_kbase_csf_timeout.h"
#include "mali_kbase_mem.h"
@@ -38,7 +39,6 @@
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <csf/mali_kbase_csf_registers.h>
-
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/firmware.h>
@@ -78,9 +78,11 @@ MODULE_PARM_DESC(fw_debug,
"Enables effective use of a debugger for debugging firmware code.");
#endif
-#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
-#define FIRMWARE_HEADER_VERSION (0ul)
-#define FIRMWARE_HEADER_LENGTH (0x14ul)
+
+#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
+#define FIRMWARE_HEADER_VERSION_MAJOR (0ul)
+#define FIRMWARE_HEADER_VERSION_MINOR (3ul)
+#define FIRMWARE_HEADER_LENGTH (0x14ul)
#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
(CSF_FIRMWARE_ENTRY_READ | \
@@ -91,12 +93,13 @@ MODULE_PARM_DESC(fw_debug,
CSF_FIRMWARE_ENTRY_ZERO | \
CSF_FIRMWARE_ENTRY_CACHE_MODE)
-#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0)
-#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
-#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2)
-#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
-#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
+#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0)
+#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
+#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
+#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
+#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9)
#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3)
@@ -118,7 +121,6 @@ MODULE_PARM_DESC(fw_debug,
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
-
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -199,8 +201,8 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
if (!interface)
return -EINVAL;
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
+ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
+ interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
if (reg) {
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -284,24 +286,52 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
{
kbase_csf_firmware_enable_mcu(kbdev);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+
+ if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+ dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled");
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
wait_for_firmware_boot(kbdev);
}
-static void wait_ready(struct kbase_device *kbdev)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev: Kbase device to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command times out.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int wait_ready(struct kbase_device *kbdev)
{
- u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
- u32 val;
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+ s64 diff;
- val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+ do {
+ unsigned int i;
- /* Wait for a while for the update command to take effect */
- while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
- val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+ for (i = 0; i < 1000; i++) {
+ /* Wait for the MMU status to indicate there is no active command */
+ if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+ AS_STATUS_AS_ACTIVE))
+ return 0;
+ }
- if (max_loops == 0) {
- dev_err(kbdev->dev, "AS_ACTIVE bit stuck when enabling AS0 for MCU, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
- queue_work(system_highpri_wq, &kbdev->csf.coredump_work);
- }
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while (diff < mmu_as_inactive_wait_time_ms);
+
+ dev_err(kbdev->dev,
+ "AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system");
+ queue_work(system_highpri_wq, &kbdev->csf.coredump_work);
+
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
+
+ return -ETIMEDOUT;
}
static void unload_mmu_tables(struct kbase_device *kbdev)
@@ -316,7 +346,7 @@ static void unload_mmu_tables(struct kbase_device *kbdev)
mutex_unlock(&kbdev->mmu_hw_mutex);
}
-static void load_mmu_tables(struct kbase_device *kbdev)
+static int load_mmu_tables(struct kbase_device *kbdev)
{
unsigned long irq_flags;
@@ -327,7 +357,7 @@ static void load_mmu_tables(struct kbase_device *kbdev)
mutex_unlock(&kbdev->mmu_hw_mutex);
/* Wait for a while for the update command to take effect */
- wait_ready(kbdev);
+ return wait_ready(kbdev);
}
/**
@@ -434,8 +464,8 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
memset(p + copy_len, 0, zi_len);
}
- kbase_sync_single_for_device(kbdev, kbase_dma_addr(page),
- PAGE_SIZE, DMA_TO_DEVICE);
+ kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]),
+ PAGE_SIZE, DMA_TO_DEVICE);
kunmap_atomic(p);
}
}
@@ -488,6 +518,7 @@ out:
* @kbdev: Kbase device structure
* @virtual_start: Start of the virtual address range required for an entry allocation
* @virtual_end: End of the virtual address range required for an entry allocation
+ * @flags: Firmware entry flags for comparison with the reusable pages found
* @phys: Pointer to the array of physical (tagged) addresses making up the new
* FW interface entry. It is an output parameter which would be made to
* point to an already existing array allocated for the previously parsed
@@ -508,10 +539,12 @@ out:
*
* Return: true if a large page can be reused, false otherwise.
*/
-static inline bool entry_find_large_page_to_reuse(
- struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
- struct tagged_addr **phys, struct protected_memory_allocation ***pma,
- u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
+static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
+ const u32 virtual_start, const u32 virtual_end,
+ const u32 flags, struct tagged_addr **phys,
+ struct protected_memory_allocation ***pma,
+ u32 num_pages, u32 *num_pages_aligned,
+ bool *is_small_page)
{
struct kbase_csf_firmware_interface *interface = NULL;
struct kbase_csf_firmware_interface *target_interface = NULL;
@@ -528,6 +561,58 @@ static inline bool entry_find_large_page_to_reuse(
*pma = NULL;
+ /* If the section starts at 2MB aligned boundary,
+ * then use 2MB page(s) for it.
+ */
+ if (!(virtual_start & (SZ_2M - 1))) {
+ *num_pages_aligned =
+ round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE);
+ *is_small_page = false;
+ goto out;
+ }
+
+ /* If the section doesn't lie within the same 2MB aligned boundary,
+ * then use 4KB pages as it would be complicated to use a 2MB page
+ * for such section.
+ */
+ if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1)))
+ goto out;
+
+ /* Find the nearest 2MB aligned section which comes before the current
+ * section.
+ */
+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+ const u32 virtual_diff = virtual_start - interface->virtual;
+
+ if (interface->virtual > virtual_end)
+ continue;
+
+ if (interface->virtual & (SZ_2M - 1))
+ continue;
+
+ if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) {
+ target_interface = interface;
+ virtual_diff_min = virtual_diff;
+ }
+ }
+
+ if (target_interface) {
+ const u32 page_index = virtual_diff_min >> PAGE_SHIFT;
+
+ if (page_index >= target_interface->num_pages_aligned)
+ goto out;
+
+ if (target_interface->phys)
+ *phys = &target_interface->phys[page_index];
+
+ if (target_interface->pma)
+ *pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE];
+
+ *is_small_page = false;
+ reuse_large_page = true;
+ }
+
+out:
return reuse_large_page;
}
@@ -558,6 +643,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
u32 num_pages;
u32 num_pages_aligned;
char *name;
+ void *name_entry;
+ unsigned int name_len;
struct tagged_addr *phys = NULL;
struct kbase_csf_firmware_interface *interface = NULL;
bool allocated_pages = false, protected_mode = false;
@@ -566,6 +653,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
struct protected_memory_allocation **pma = NULL;
bool reuse_pages = false;
bool is_small_page = true;
+ bool ignore_page_migration = true;
if (data_end < data_start) {
dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -608,9 +696,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
num_pages = (virtual_end - virtual_start)
>> PAGE_SHIFT;
- reuse_pages = entry_find_large_page_to_reuse(
- kbdev, virtual_start, virtual_end, &phys, &pma,
- num_pages, &num_pages_aligned, &is_small_page);
+ reuse_pages =
+ entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys,
+ &pma, num_pages, &num_pages_aligned, &is_small_page);
if (!reuse_pages)
phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
@@ -628,9 +716,10 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
} else {
if (!reuse_pages) {
ret = kbase_mem_pool_alloc_pages(
- kbase_mem_pool_group_select(
- kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
+ kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
+ is_small_page),
num_pages_aligned, phys, false, NULL);
+ ignore_page_migration = false;
}
}
@@ -646,21 +735,24 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
data_start, data_end);
/* Allocate enough memory for the struct kbase_csf_firmware_interface and
- * the name of the interface. An extra byte is allocated to place a
- * NUL-terminator in. This should already be included according to the
- * specification but here we add it anyway to be robust against a
- * corrupt firmware image.
+ * the name of the interface.
*/
- interface = kmalloc(sizeof(*interface) +
- size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL);
+ name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET;
+ name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET);
+ if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) {
+ dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL);
if (!interface) {
ret = -ENOMEM;
goto out;
}
name = (void *)(interface + 1);
- memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)),
- size - INTERFACE_ENTRY_NAME_OFFSET);
- name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0;
+ memcpy(name, name_entry, name_len);
+ name[name_len] = 0;
interface->name = name;
interface->phys = phys;
@@ -675,6 +767,11 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
interface->data_end = data_end;
interface->pma = pma;
+ /* Discover the virtual execution address field after the end of the name
+ * field taking into account the NULL-termination character.
+ */
+ interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1));
+
mem_flags = convert_mem_flags(kbdev, flags, &cache_mode);
if (flags & CSF_FIRMWARE_ENTRY_SHARED) {
@@ -732,7 +829,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
virtual_start >> PAGE_SHIFT, phys,
num_pages_aligned, mem_flags,
- KBASE_MEM_GROUP_CSF_FW, NULL);
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
+ ignore_page_migration);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -959,13 +1057,28 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
return -EINVAL;
}
return parse_build_info_metadata_entry(kbdev, fw, entry, size);
- }
-
- if (!optional) {
- dev_err(kbdev->dev,
- "Unsupported non-optional entry type %u in firmware\n",
- type);
- return -EINVAL;
+ case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
+ /* Function call list section */
+ if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) {
+ dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
+ size);
+ return -EINVAL;
+ }
+ kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
+ return 0;
+ case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP:
+ /* Core Dump section */
+ if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) {
+ dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size);
+ return -EINVAL;
+ }
+ return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry);
+ default:
+ if (!optional) {
+ dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n",
+ type);
+ return -EINVAL;
+ }
}
return 0;
@@ -1182,40 +1295,80 @@ static int parse_capabilities(struct kbase_device *kbdev)
return 0;
}
+static inline void access_firmware_memory_common(struct kbase_device *kbdev,
+ struct kbase_csf_firmware_interface *interface, u32 offset_bytes,
+ u32 *value, const bool read)
+{
+ u32 page_num = offset_bytes >> PAGE_SHIFT;
+ u32 offset_in_page = offset_bytes & ~PAGE_MASK;
+ struct page *target_page = as_page(interface->phys[page_num]);
+ uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page);
+ u32 *addr = (u32 *)(cpu_addr + offset_in_page);
+
+ if (read) {
+ kbase_sync_single_for_device(kbdev,
+ kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
+ sizeof(u32), DMA_BIDIRECTIONAL);
+ *value = *addr;
+ } else {
+ *addr = *value;
+ kbase_sync_single_for_device(kbdev,
+ kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
+ sizeof(u32), DMA_BIDIRECTIONAL);
+ }
+
+ kunmap_atomic((u32 *)cpu_addr);
+}
+
static inline void access_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value, const bool read)
{
- struct kbase_csf_firmware_interface *interface;
+ struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
+ u32 offset_bytes = 0;
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
if ((gpu_addr >= interface->virtual) &&
(gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) {
- u32 offset_bytes = gpu_addr - interface->virtual;
- u32 page_num = offset_bytes >> PAGE_SHIFT;
- u32 offset_in_page = offset_bytes & ~PAGE_MASK;
- struct page *target_page = as_page(
- interface->phys[page_num]);
- u32 *cpu_addr = kmap_atomic(target_page);
-
- if (read) {
- kbase_sync_single_for_device(kbdev,
- kbase_dma_addr(target_page) + offset_in_page,
- sizeof(u32), DMA_BIDIRECTIONAL);
-
- *value = cpu_addr[offset_in_page >> 2];
- } else {
- cpu_addr[offset_in_page >> 2] = *value;
+ offset_bytes = gpu_addr - interface->virtual;
+ access_interface = interface;
+ break;
+ }
+ }
- kbase_sync_single_for_device(kbdev,
- kbase_dma_addr(target_page) + offset_in_page,
- sizeof(u32), DMA_BIDIRECTIONAL);
- }
+ if (access_interface)
+ access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
+ else
+ dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
+}
- kunmap_atomic(cpu_addr);
- return;
+static inline void access_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 *value, const bool read)
+{
+ struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
+ u32 offset_bytes = 0;
+
+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+ if ((gpu_addr >= interface->virtual_exe_start) &&
+ (gpu_addr < interface->virtual_exe_start +
+ (interface->num_pages << PAGE_SHIFT))) {
+ offset_bytes = gpu_addr - interface->virtual_exe_start;
+ access_interface = interface;
+
+ /* If there's an overlap in execution address range between a moved and a
+ * non-moved areas, always prefer the moved one. The idea is that FW may
+ * move sections around during init time, but after the layout is settled,
+ * any moved sections are going to override non-moved areas at the same
+ * location.
+ */
+ if (interface->virtual_exe_start != interface->virtual)
+ break;
}
}
- dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr);
+
+ if (access_interface)
+ access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
+ else
+ dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
}
void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
@@ -1230,6 +1383,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
access_firmware_memory(kbdev, gpu_addr, &value, false);
}
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 *value)
+{
+ access_firmware_memory_exe(kbdev, gpu_addr, value, true);
+}
+
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 value)
+{
+ access_firmware_memory_exe(kbdev, gpu_addr, &value, false);
+}
+
void kbase_csf_firmware_cs_input(
const struct kbase_csf_cmd_stream_info *const info, const u32 offset,
const u32 value)
@@ -1467,11 +1632,10 @@ static bool global_request_complete(struct kbase_device *const kbdev,
return complete;
}
-static int wait_for_global_request(struct kbase_device *const kbdev,
- u32 const req_mask)
+static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev,
+ u32 const req_mask, unsigned int timeout_ms)
{
- const long wait_timeout =
- kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+ const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms);
long remaining;
int err = 0;
@@ -1480,10 +1644,9 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
wait_timeout);
if (!remaining) {
- dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for global request %x to complete",
- kbase_backend_get_cycle_cnt(kbdev),
- kbdev->csf.fw_timeout_ms,
- req_mask);
+ dev_warn(kbdev->dev,
+ "[%llu] Timeout (%d ms) waiting for global request %x to complete",
+ kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask);
err = -ETIMEDOUT;
}
@@ -1491,6 +1654,11 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
return err;
}
+static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask)
+{
+ return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms);
+}
+
static void set_global_request(
const struct kbase_csf_global_iface *const global_iface,
u32 const req_mask)
@@ -1563,6 +1731,90 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ bool complete = false;
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+ complete = true;
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+ u32 const req_mask)
+{
+ u32 glb_debug_req;
+
+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_debug_req ^= req_mask;
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+ const struct kbase_csf_global_iface *const global_iface)
+{
+ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ unsigned long flags;
+ int ret;
+
+ /* Serialize CORE_DUMP requests. */
+ mutex_lock(&kbdev->csf.reg_lock);
+
+ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ request_fw_core_dump(global_iface);
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+ if (!ret)
+ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ return ret;
+}
+
+/**
+ * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
+ *
+ * @kbdev: The kbase device structure of the device
+ *
+ * This function needs to be called to enable the Ray Tracing Unit
+ * by writing SHADER_PWRFEATURES only when host controls shader cores power.
+ */
+static void kbasep_enable_rtu(struct kbase_device *kbdev)
+{
+ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+
+ if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0))
+ return;
+
+ if (kbdev->csf.firmware_hctl_core_pwr)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1);
+}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
@@ -1571,7 +1823,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
- GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
+ GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -1586,16 +1838,34 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbasep_enable_rtu(kbdev);
+
/* Update shader core allocation enable mask */
enable_endpoints_global(global_iface, core_mask);
enable_shader_poweroff_timer(kbdev, global_iface);
+#ifndef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ /* The GPU idle timer is always enabled for simplicity. Checks will be
+ * done before scheduling the GPU idle worker to see if it is
+ * appropriate for the current power policy.
+ */
+ enable_gpu_idle_timer(kbdev);
+#endif
+
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
/* Unmask the interrupts */
kbase_csf_firmware_global_input(global_iface,
GLB_ACK_IRQ_MASK, ack_irq_mask);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ /* Enable FW MCU read/write debug interfaces */
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_DEBUG_ACK_IRQ_MASK,
+ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK,
+ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -1710,6 +1980,12 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
if (WARN_ON(err))
return;
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ err = kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(kbdev);
+ if (WARN_ON(err))
+ return;
+#endif
+
/* Reboot the firmware */
kbase_csf_firmware_enable_mcu(kbdev);
}
@@ -1752,14 +2028,6 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
}
-/**
- * Converts the dur_us provided to the idle count the firmware can use.
- *
- * Return: the firmware count corresponding to dur_us to use in MCU.
- *
- * @kbdev: Kernel base device pointer
- * @dur_us: The duration in microseconds.
- */
static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
{
#define MICROSECONDS_PER_SECOND 1000000u
@@ -1784,9 +2052,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u
"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
}
- /* Formula for dur_val = ((dur_us/MICROSECONDS_PER_SECOND) * freq_HZ) >> 10) */
+ /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
- dur_val = div_u64(dur_val, MICROSECONDS_PER_SECOND);
+ dur_val = div_u64(dur_val, 1000000);
/* Interface limits the value field to S32_MAX */
cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -1809,7 +2077,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- dur = kbdev->csf.gpu_idle_hysteresis_ms;
+ dur = kbdev->csf.gpu_idle_hysteresis_us;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
@@ -1839,7 +2107,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
@@ -1855,8 +2123,21 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
return kbdev->csf.gpu_idle_dur_count;
}
+#ifndef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ /* The 'reg_lock' is also taken and is held till the update is not
+ * complete, to ensure the update of idle timer value by multiple Users
+ * gets serialized.
+ */
+ mutex_lock(&kbdev->csf.reg_lock);
+ /* The firmware only reads the new idle timer value when the timer is
+ * disabled.
+ */
+#endif
+
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
kbase_csf_scheduler_lock(kbdev);
if (kbdev->csf.scheduler.gpu_idle_fw_timer_enabled) {
+#endif
/* The firmware only reads the new idle timer value when the timer is
* disabled.
*/
@@ -1867,24 +2148,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
} else {
/* Record the new values. Would be used later when timer is
* enabled
*/
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
kbase_csf_scheduler_unlock(kbdev);
+#else
+ mutex_unlock(&kbdev->csf.reg_lock);
+#endif
dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i ms (0x%.8x)",
- kbdev->csf.gpu_idle_hysteresis_ms,
+ kbdev->csf.gpu_idle_hysteresis_us,
kbdev->csf.gpu_idle_dur_count);
kbase_csf_scheduler_pm_idle(kbdev);
@@ -1897,7 +2182,6 @@ end:
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
{
-#define PWROFF_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
u64 dur_val = dur_us;
@@ -2041,10 +2325,22 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ /* Set to the lowest possible value for FW to immediately write
+ * to the power off register to disable the cores.
+ */
+ kbdev->csf.mcu_core_pwroff_dur_count = 1;
+#else
+ kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+ kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
+ kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+#endif
+
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -2057,14 +2353,19 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
return 0;
}
+void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
+{
+ mutex_destroy(&kbdev->csf.reg_lock);
+}
+
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
- kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
- kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
- WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
@@ -2076,7 +2377,7 @@ int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
kbdev->csf.mcu_core_pwroff_dur_count = 1;
#else
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, msec_to_usec_saturate(kbdev->csf.gpu_idle_hysteresis_ms));
+ kbdev, kbdev->csf.gpu_idle_hysteresis_us);
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
@@ -2158,7 +2459,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
version_minor = mcu_fw->data[4];
version_major = mcu_fw->data[5];
- if (version_major != FIRMWARE_HEADER_VERSION) {
+ if (version_major != FIRMWARE_HEADER_VERSION_MAJOR ||
+ version_minor != FIRMWARE_HEADER_VERSION_MINOR) {
dev_err(kbdev->dev,
"Firmware header version %d.%d not understood\n",
version_major, version_minor);
@@ -2219,11 +2521,21 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
goto err_out;
}
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ ret = kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(kbdev);
+ if (ret != 0) {
+ dev_err(kbdev->dev, "Failed to enable SC PM WA");
+ goto error;
+ }
+#endif
+
/* Make sure L2 cache is powered up */
kbase_pm_wait_for_l2_powered(kbdev);
/* Load the MMU tables into the selected address space */
- load_mmu_tables(kbdev);
+ ret = load_mmu_tables(kbdev);
+ if (ret != 0)
+ goto err_out;
boot_csf_firmware(kbdev);
@@ -2265,6 +2577,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
goto err_out;
}
+ if (kbdev->csf.fw_core_dump.available)
+ kbase_csf_firmware_core_dump_init(kbdev);
+
/* Firmware loaded successfully, ret = 0 */
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
(((u64)version_hash) << 32) |
@@ -2376,14 +2691,125 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
*/
kbase_mcu_shared_interface_region_tracker_term(kbdev);
- mutex_destroy(&kbdev->csf.reg_lock);
-
kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
/* Release the address space */
kbdev->as_free |= MCU_AS_BITMASK;
}
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const reg_val)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+ int err;
+ u32 glb_req;
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Set the address and value to write */
+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val);
+
+ /* Set the Global Debug request for FW MCU write */
+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+ GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ /* Notify FW about the Global Debug request */
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val);
+
+ return err;
+}
+
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 *reg_val)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+ int err;
+ u32 glb_req;
+
+ if (WARN_ON(reg_val == NULL))
+ return -EINVAL;
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Set the address to read */
+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+
+ /* Set the Global Debug request for FW MCU read */
+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+ GLB_DEBUG_REQ_FW_AS_READ_MASK);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ /* Notify FW about the Global Debug request */
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+ if (!err) {
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ }
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val);
+
+ return err;
+}
+
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const val_mask, u32 const reg_val)
+{
+ unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies;
+ u32 read_val;
+
+ dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask);
+
+ while (time_before(jiffies, remaining)) {
+ int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val);
+
+ if (err) {
+ dev_err(kbdev->dev,
+ "Error reading MCU register value (read_val = %u, expect = %u)\n",
+ read_val, reg_val);
+ return err;
+ }
+
+ if ((read_val & val_mask) == reg_val)
+ return 0;
+ }
+
+ dev_err(kbdev->dev,
+ "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n",
+ read_val, reg_val);
+
+ return -ETIMEDOUT;
+}
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -2429,10 +2855,11 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
-int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
{
kbase_csf_firmware_ping(kbdev);
- return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
+
+ return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms);
}
int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev,
@@ -2471,7 +2898,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
-void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
{
int err;
@@ -2511,12 +2938,14 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
}
}
- if (err) {
+ if (unlikely(err)) {
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
+
+ return err;
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@@ -2730,9 +3159,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!page_list)
goto page_list_alloc_error;
- ret = kbase_mem_pool_alloc_pages(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false, NULL);
+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+ phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
@@ -2743,8 +3171,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
@@ -2760,7 +3188,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
- KBASE_MEM_GROUP_CSF_FW, NULL);
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
if (ret)
goto mmu_insert_pages_error;
@@ -2821,4 +3249,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}
-
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 733057e..9d7ed64 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask(
u32 kbase_csf_firmware_csg_output(
const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
-
/**
* struct kbase_csf_global_iface - Global CSF interface
* provided by the firmware.
@@ -364,6 +363,44 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 value);
/**
+ * kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the
+ * region of its final execution location.
+ *
+ * @kbdev: Device pointer
+ * @gpu_addr: GPU address to read
+ * @value: Output pointer to which the read value will be written
+ *
+ * This function read a value in a GPU address that belongs to a private loaded
+ * firmware memory region based on its final execution location. The function
+ * assumes that the location is not permanently mapped on the CPU address space,
+ * therefore it maps it and then unmaps it to access it independently. This function
+ * needs to be used when accessing firmware memory regions which will be moved to
+ * their final execution location during firmware boot using an address based on the
+ * final execution location.
+ */
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 *value);
+
+/**
+ * kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the
+ * region of its final execution location.
+ *
+ * @kbdev: Device pointer
+ * @gpu_addr: GPU address to write
+ * @value: Value to write
+ *
+ * This function writes a value in a GPU address that belongs to a private loaded
+ * firmware memory region based on its final execution location. The function
+ * assumes that the location is not permanently mapped on the CPU address space,
+ * therefore it maps it and then unmaps it to access it independently. This function
+ * needs to be used when accessing firmware memory regions which will be moved to
+ * their final execution location during firmware boot using an address based on the
+ * final execution location.
+ */
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 value);
+
+/**
* kbase_csf_firmware_early_init() - Early initialization for the firmware.
* @kbdev: Kbase device
*
@@ -374,6 +411,16 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
int kbase_csf_firmware_early_init(struct kbase_device *kbdev);
/**
+ * kbase_csf_firmware_early_term() - Terminate resources related to the firmware
+ * after the firmware unload has been done.
+ *
+ * @kbdev: Device pointer
+ *
+ * This should be called only when kbase probe fails or gets rmmoded.
+ */
+void kbase_csf_firmware_early_term(struct kbase_device *kbdev);
+
+/**
* kbase_csf_firmware_late_init() - Late initialization for the firmware.
* @kbdev: Kbase device
*
@@ -402,6 +449,50 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
*/
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+/**
+ * kbase_csf_firmware_mcu_register_write - Write to MCU register
+ *
+ * @kbdev: Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to write into
+ * @reg_val: Value to be written
+ *
+ * Write a desired value to a register in MCU address space.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const reg_val);
+/**
+ * kbase_csf_firmware_mcu_register_read - Read from MCU register
+ *
+ * @kbdev: Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to read from
+ * @reg_val: Value as present in reg_addr register
+ *
+ * Read a value from MCU address space.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 *reg_val);
+
+/**
+ * kbase_csf_firmware_mcu_register_poll - Poll MCU register
+ *
+ * @kbdev: Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to read from
+ * @val_mask: Value to mask the read value for comparison
+ * @reg_val: Value to be compared against
+ *
+ * Continue to read a value from MCU address space until it matches given mask and value.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+ u32 const val_mask, u32 const reg_val);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
*
@@ -415,13 +506,14 @@ void kbase_csf_firmware_ping(struct kbase_device *kbdev);
* kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @wait_timeout_ms: Timeout to get the acknowledgment for PING request from FW.
*
* The function sends the ping request to firmware and waits to confirm it is
* alive.
*
* Return: 0 on success, or negative on failure.
*/
-int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
+int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev, unsigned int wait_timeout_ms);
/**
* kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout.
@@ -458,8 +550,10 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
* This function needs to be called after kbase_csf_enter_protected_mode() to
* wait for the GPU to actually enter protected mode. GPU reset is triggered if
* the wait is unsuccessful.
+ *
+ * Return: 0 on success, or negative on failure.
*/
-void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
{
@@ -816,5 +910,16 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
* GPU must be powered during this call.
*/
void kbase_csf_debug_dump_registers(struct kbase_device *kbdev);
+/**
+ * kbase_csf_firmware_req_core_dump - Request a firmware core dump
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Request a firmware core dump and wait for for firmware to acknowledge.
+ * Firmware will enter infinite loop after the firmware core dump is created.
+ *
+ * Return: 0 if success, or negative error code on failure.
+ */
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev);
#endif
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index ef8f328..13a816b 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -20,13 +20,22 @@
*/
#include <mali_kbase.h>
-#include "mali_kbase_csf_firmware_cfg.h"
#include <mali_kbase_reset_gpu.h>
#include <linux/version.h>
+#include "mali_kbase_csf_firmware_cfg.h"
+#include "mali_kbase_csf_firmware_log.h"
+
#if CONFIG_SYSFS
#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config"
+#define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity"
+
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+#define HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME "Host controls SC rails"
+#endif
+
+
/**
* struct firmware_config - Configuration item within the MCU firmware
*
@@ -125,7 +134,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
if (attr == &fw_cfg_attr_cur) {
unsigned long flags;
- u32 val;
+ u32 val, cur_val;
int ret = kstrtouint(buf, 0, &val);
if (ret) {
@@ -136,11 +145,19 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
return -EINVAL;
}
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ if (!strcmp(config->name,
+ HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME))
+ return -EPERM;
+#endif
+
if ((val < config->min) || (val > config->max))
return -EINVAL;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- if (config->cur_val == val) {
+
+ cur_val = config->cur_val;
+ if (cur_val == val) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return count;
}
@@ -177,6 +194,20 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ /* Enable FW logging only if Log verbosity is non-zero */
+ if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) &&
+ (!cur_val || !val)) {
+ ret = kbase_csf_firmware_log_toggle_logging_calls(kbdev, val);
+ if (ret) {
+ /* Undo FW configuration changes */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ config->cur_val = cur_val;
+ kbase_csf_update_firmware_memory(kbdev, config->address, cur_val);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return ret;
+ }
+ }
+
/* If we can update the config without firmware reset then
* we need to just trigger FIRMWARE_CONFIG_UPDATE.
*/
@@ -330,6 +361,24 @@ int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const
return -ENOENT;
}
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
+{
+ struct firmware_config *config;
+
+ list_for_each_entry(config, &kbdev->csf.firmware_config, node) {
+ if (strcmp(config->name,
+ HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME))
+ continue;
+
+ kbase_csf_update_firmware_memory(kbdev, config->address, 1);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+#endif
+
#else
int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
@@ -348,4 +397,11 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
{
return 0;
}
+
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
+{
+ return 0;
+}
+#endif
#endif /* CONFIG_SYSFS */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
index 770fedb..bf99c46 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
@@ -70,6 +70,22 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
const struct kbase_csf_mcu_fw *const fw,
const u32 *entry, unsigned int size, bool updatable);
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+/**
+ * kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails() - Enable the config in FW to support
+ * Host based control of SC power rails
+ *
+ * Look for the config entry that enables support in FW for the Host based
+ * control of shader core power rails and set it before the intial boot
+ * or reload of firmware.
+ *
+ * @kbdev: Kbase device structure
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev);
+#endif
+
/**
* kbase_csf_firmware_cfg_find_config_address() - Get a FW config option address
*
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
new file mode 100644
index 0000000..ce8e4af
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
@@ -0,0 +1,809 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/* Page size in bytes in use by MCU. */
+#define FW_PAGE_SIZE 4096
+
+/*
+ * FW image header core dump data format supported.
+ * Currently only version 0.1 is supported.
+ */
+#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0
+#define FW_CORE_DUMP_DATA_VERSION_MINOR 1
+
+/* Full version of the image header core dump data format */
+#define FW_CORE_DUMP_DATA_VERSION \
+ ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR)
+
+/* Validity flag to indicate if the MCU registers in the buffer are valid */
+#define FW_MCU_STATUS_MASK 0x1
+#define FW_MCU_STATUS_VALID (1 << 0)
+
+/* Core dump entry fields */
+#define FW_CORE_DUMP_VERSION_INDEX 0
+#define FW_CORE_DUMP_START_ADDR_INDEX 1
+
+/* MCU registers stored by a firmware core dump */
+struct fw_core_dump_mcu {
+ u32 r0;
+ u32 r1;
+ u32 r2;
+ u32 r3;
+ u32 r4;
+ u32 r5;
+ u32 r6;
+ u32 r7;
+ u32 r8;
+ u32 r9;
+ u32 r10;
+ u32 r11;
+ u32 r12;
+ u32 sp;
+ u32 lr;
+ u32 pc;
+};
+
+/* Any ELF definitions used in this file are from elf.h/elfcore.h except
+ * when specific 32-bit versions are required (mainly for the
+ * ELF_PRSTATUS32 note that is used to contain the MCU registers).
+ */
+
+/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */
+struct prstatus32_timeval {
+ int tv_sec;
+ int tv_usec;
+};
+
+/* - Structure defining ELF32 PRSTATUS note contents, as defined by the
+ * GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h.
+ * Note: GDB checks for the size of this structure to be 0x94.
+ * Modified pr_reg (array containing the Arm 32-bit MCU registers) to
+ * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs.
+ */
+struct elf_prstatus32 {
+ struct elf_siginfo pr_info; /* Info associated with signal. */
+ short int pr_cursig; /* Current signal. */
+ unsigned int pr_sigpend; /* Set of pending signals. */
+ unsigned int pr_sighold; /* Set of held signals. */
+ pid_t pr_pid;
+ pid_t pr_ppid;
+ pid_t pr_pgrp;
+ pid_t pr_sid;
+ struct prstatus32_timeval pr_utime; /* User time. */
+ struct prstatus32_timeval pr_stime; /* System time. */
+ struct prstatus32_timeval pr_cutime; /* Cumulative user time. */
+ struct prstatus32_timeval pr_cstime; /* Cumulative system time. */
+ u32 pr_reg[18]; /* GP registers. */
+ int pr_fpvalid; /* True if math copro being used. */
+};
+
+/**
+ * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump'
+ * debugfs file.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+struct fw_core_dump_data {
+ struct kbase_device *kbdev;
+};
+
+/*
+ * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump'
+ * debugfs file.
+ * @interface: current firmware memory interface
+ * @page_num: current page number (0..) within @interface
+ */
+struct fw_core_dump_seq_off {
+ struct kbase_csf_firmware_interface *interface;
+ u32 page_num;
+};
+
+/**
+ * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @regs: Pointer to a core dump mcu struct where the MCU registers are copied
+ * to. Should be allocated by the called.
+ *
+ * Return: 0 if successfully copied the MCU registers, negative error code otherwise.
+ */
+static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs)
+{
+ unsigned int i;
+ u32 status = 0;
+ u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr;
+ u32 *data = (u32 *)regs;
+
+ /* Check if the core dump entry exposed the buffer */
+ if (!regs || !kbdev->csf.fw_core_dump.available)
+ return -EPERM;
+
+ /* Check if the data in the buffer is valid, if not, return error */
+ kbase_csf_read_firmware_memory(kbdev, data_addr, &status);
+ if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID)
+ return -EPERM;
+
+ /* According to image header documentation, the MCU registers core dump
+ * buffer is 32-bit aligned.
+ */
+ for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i)
+ kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]);
+
+ return 0;
+}
+
+/**
+ * fw_core_dump_fill_elf_header - Initializes an ELF32 header
+ * @hdr: ELF32 header to initialize
+ * @sections: Number of entries in the ELF program header table
+ *
+ * Initializes an ELF32 header for an ARM 32-bit little-endian
+ * 'Core file' object file.
+ */
+static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections)
+{
+ /* Reset all members in header. */
+ memset(hdr, 0, sizeof(*hdr));
+
+ /* Magic number identifying file as an ELF object. */
+ memcpy(hdr->e_ident, ELFMAG, SELFMAG);
+
+ /* Identify file as 32-bit, little-endian, using current
+ * ELF header version, with no OS or ABI specific ELF
+ * extensions used.
+ */
+ hdr->e_ident[EI_CLASS] = ELFCLASS32;
+ hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+ hdr->e_ident[EI_VERSION] = EV_CURRENT;
+ hdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+
+ /* 'Core file' type of object file. */
+ hdr->e_type = ET_CORE;
+
+ /* ARM 32-bit architecture (AARCH32) */
+ hdr->e_machine = EM_ARM;
+
+ /* Object file version: the original format. */
+ hdr->e_version = EV_CURRENT;
+
+ /* Offset of program header table in file. */
+ hdr->e_phoff = sizeof(struct elf32_hdr);
+
+ /* No processor specific flags. */
+ hdr->e_flags = 0;
+
+ /* Size of the ELF header in bytes. */
+ hdr->e_ehsize = sizeof(struct elf32_hdr);
+
+ /* Size of the ELF program header entry in bytes. */
+ hdr->e_phentsize = sizeof(struct elf32_phdr);
+
+ /* Number of entries in the program header table. */
+ hdr->e_phnum = sections;
+}
+
+/**
+ * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header
+ * for holding auxiliary information
+ * @phdr: ELF32 program header
+ * @file_offset: Location of the note in the file in bytes
+ * @size: Size of the note in bytes.
+ *
+ * Initializes an ELF32 program header describing auxiliary information (containing
+ * one or more notes) of @size bytes alltogether located in the file at offset
+ * @file_offset.
+ */
+static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset,
+ u32 size)
+{
+ /* Auxiliary information (note) in program header. */
+ phdr->p_type = PT_NOTE;
+
+ /* Location of first note in file in bytes. */
+ phdr->p_offset = file_offset;
+
+ /* Size of all notes combined in bytes. */
+ phdr->p_filesz = size;
+
+ /* Other members not relevant for a note. */
+ phdr->p_vaddr = 0;
+ phdr->p_paddr = 0;
+ phdr->p_memsz = 0;
+ phdr->p_align = 0;
+ phdr->p_flags = 0;
+}
+
+/**
+ * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment
+ * @phdr: ELF32 program header to initialize.
+ * @file_offset: Location of loadable segment in file in bytes
+ * (aligned to FW_PAGE_SIZE bytes)
+ * @vaddr: 32-bit virtual address where to write the segment
+ * (aligned to FW_PAGE_SIZE bytes)
+ * @size: Size of the segment in bytes.
+ * @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions.
+ *
+ * Initializes an ELF32 program header describing a loadable segment of
+ * @size bytes located in the file at offset @file_offset to be loaded
+ * at virtual address @vaddr with access permissions as described by
+ * CSF_FIRMWARE_ENTRY_* flags in @flags.
+ */
+static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset,
+ u32 vaddr, u32 size, u32 flags)
+{
+ /* Loadable segment in program header. */
+ phdr->p_type = PT_LOAD;
+
+ /* Location of segment in file in bytes. Aligned to p_align bytes. */
+ phdr->p_offset = file_offset;
+
+ /* Virtual address of segment. Aligned to p_align bytes. */
+ phdr->p_vaddr = vaddr;
+
+ /* Physical address of segment. Not relevant. */
+ phdr->p_paddr = 0;
+
+ /* Size of segment in file and memory. */
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+
+ /* Alignment of segment in the file and memory in bytes (integral power of 2). */
+ phdr->p_align = FW_PAGE_SIZE;
+
+ /* Set segment access permissions. */
+ phdr->p_flags = 0;
+ if (flags & CSF_FIRMWARE_ENTRY_READ)
+ phdr->p_flags |= PF_R;
+ if (flags & CSF_FIRMWARE_ENTRY_WRITE)
+ phdr->p_flags |= PF_W;
+ if (flags & CSF_FIRMWARE_ENTRY_EXECUTE)
+ phdr->p_flags |= PF_X;
+}
+
+/**
+ * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note
+ * @name: Name given to the PRSTATUS note.
+ *
+ * Calculates the size of a 32-bit PRSTATUS note (which contains information
+ * about a process like the current MCU registers) taking into account
+ * @name must be padded to a 4-byte multiple.
+ *
+ * Return: size of 32-bit PRSTATUS note in bytes.
+ */
+static unsigned int fw_core_dump_get_prstatus_note_size(char *name)
+{
+ return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) +
+ sizeof(struct elf_prstatus32);
+}
+
+/**
+ * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure
+ * @prs: ELF32 PRSTATUS note to initialize
+ * @regs: MCU registers to copy into the PRSTATUS note
+ *
+ * Initializes an ELF32 PRSTATUS structure with MCU registers @regs.
+ * Other process information is N/A for CSF Firmware.
+ */
+static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs,
+ struct fw_core_dump_mcu *regs)
+{
+ /* Only fill in registers (32-bit) of PRSTATUS note. */
+ memset(prs, 0, sizeof(*prs));
+ prs->pr_reg[0] = regs->r0;
+ prs->pr_reg[1] = regs->r1;
+ prs->pr_reg[2] = regs->r2;
+ prs->pr_reg[3] = regs->r3;
+ prs->pr_reg[4] = regs->r4;
+ prs->pr_reg[5] = regs->r5;
+ prs->pr_reg[6] = regs->r0;
+ prs->pr_reg[7] = regs->r7;
+ prs->pr_reg[8] = regs->r8;
+ prs->pr_reg[9] = regs->r9;
+ prs->pr_reg[10] = regs->r10;
+ prs->pr_reg[11] = regs->r11;
+ prs->pr_reg[12] = regs->r12;
+ prs->pr_reg[13] = regs->sp;
+ prs->pr_reg[14] = regs->lr;
+ prs->pr_reg[15] = regs->pc;
+}
+
+/**
+ * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note
+ * @name: Name for the PRSTATUS note
+ * @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note
+ * @created_prstatus_note:
+ * Pointer to the allocated ELF32 PRSTATUS note
+ *
+ * Creates an ELF32 note with one PRSTATUS entry containing the
+ * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in
+ * @created_prstatus_note.
+ *
+ * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes.
+ */
+static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs,
+ struct elf32_note **created_prstatus_note)
+{
+ struct elf32_note *note;
+ unsigned int note_name_sz;
+ unsigned int note_sz;
+
+ /* Allocate memory for ELF32 note containing a PRSTATUS note. */
+ note_name_sz = strlen(name) + 1;
+ note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) +
+ sizeof(struct elf_prstatus32);
+ note = kmalloc(note_sz, GFP_KERNEL);
+ if (!note)
+ return 0;
+
+ /* Fill in ELF32 note with one entry for a PRSTATUS note. */
+ note->n_namesz = note_name_sz;
+ note->n_descsz = sizeof(struct elf_prstatus32);
+ note->n_type = NT_PRSTATUS;
+ memcpy(note + 1, name, note_name_sz);
+ memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs));
+
+ /* Return pointer and size of the created ELF32 note. */
+ *created_prstatus_note = note;
+ return note_sz;
+}
+
+/**
+ * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump
+ * @m: the seq_file handle
+ *
+ * Writes the ELF header of the core dump including program headers for
+ * memory sections and a note containing the current MCU register
+ * values.
+ *
+ * Excludes memory sections without read access permissions or
+ * are for protected memory.
+ *
+ * The data written is as follows:
+ * - ELF header
+ * - ELF PHDRs for memory sections
+ * - ELF PHDR for program header NOTE
+ * - ELF PRSTATUS note
+ * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE
+ *
+ * The actual memory section dumps should follow this (not written
+ * by this function).
+ *
+ * Retrieves the necessary information via the struct
+ * fw_core_dump_data stored in the private member of the seq_file
+ * handle.
+ *
+ * Return:
+ * * 0 - success
+ * * -ENOMEM - not enough memory for allocating ELF32 note
+ */
+static int fw_core_dump_write_elf_header(struct seq_file *m)
+{
+ struct elf32_hdr hdr;
+ struct elf32_phdr phdr;
+ struct fw_core_dump_data *dump_data = m->private;
+ struct kbase_device *const kbdev = dump_data->kbdev;
+ struct kbase_csf_firmware_interface *interface;
+ struct elf_prstatus32 elf_prs;
+ struct elf32_note *elf_prstatus_note;
+ unsigned int sections = 0;
+ unsigned int elf_prstatus_note_size;
+ u32 elf_prstatus_offset;
+ u32 elf_phdr_note_offset;
+ u32 elf_memory_sections_data_offset;
+ u32 total_pages = 0;
+ u32 padding_size, *padding;
+ struct fw_core_dump_mcu regs = { 0 };
+
+ CSTD_UNUSED(total_pages);
+
+ /* Count number of memory sections. */
+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+ /* Skip memory sections that cannot be read or are protected. */
+ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+ continue;
+ sections++;
+ }
+
+ /* Prepare ELF header. */
+ fw_core_dump_fill_elf_header(&hdr, sections + 1);
+ seq_write(m, &hdr, sizeof(struct elf32_hdr));
+
+ elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE");
+ /* PHDRs of PT_LOAD type. */
+ elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr);
+ /* PHDR of PT_NOTE type. */
+ elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr);
+ elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size;
+
+ /* Calculate padding size to page offset. */
+ padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) -
+ elf_memory_sections_data_offset;
+ elf_memory_sections_data_offset += padding_size;
+
+ /* Prepare ELF program header table. */
+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+ /* Skip memory sections that cannot be read or are protected. */
+ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+ continue;
+
+ fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset,
+ interface->virtual,
+ interface->num_pages * FW_PAGE_SIZE,
+ interface->flags);
+
+ seq_write(m, &phdr, sizeof(struct elf32_phdr));
+
+ elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE;
+ total_pages += interface->num_pages;
+ }
+
+ /* Prepare PHDR of PT_NOTE type. */
+ fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset,
+ elf_prstatus_note_size);
+ seq_write(m, &phdr, sizeof(struct elf32_phdr));
+
+ /* Prepare ELF note of PRSTATUS type. */
+ if (fw_get_core_dump_mcu(kbdev, &regs))
+ dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero");
+ /* Even if MCU Registers are not available the ELF prstatus is still
+ * filled with the registers equal to zero.
+ */
+ fw_core_dump_fill_elf_prstatus(&elf_prs, &regs);
+ elf_prstatus_note_size =
+ fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note);
+ if (elf_prstatus_note_size == 0)
+ return -ENOMEM;
+
+ seq_write(m, elf_prstatus_note, elf_prstatus_note_size);
+ kfree(elf_prstatus_note);
+
+ /* Pad file to page size. */
+ padding = kzalloc(padding_size, GFP_KERNEL);
+ seq_write(m, padding, padding_size);
+ kfree(padding);
+
+ return 0;
+}
+
+/**
+ * fw_core_dump_create - Requests firmware to save state for a firmware core dump
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_create(struct kbase_device *kbdev)
+{
+ int err;
+
+ /* Ensure MCU is active before requesting the core dump. */
+ kbase_csf_scheduler_pm_active(kbdev);
+ err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+ if (!err)
+ err = kbase_csf_firmware_req_core_dump(kbdev);
+
+ kbase_csf_scheduler_pm_idle(kbdev);
+
+ return err;
+}
+
+/**
+ * fw_core_dump_seq_start - seq_file start operation for firmware core dump file
+ * @m: the seq_file handle
+ * @_pos: holds the current position in pages
+ * (0 or most recent position used in previous session)
+ *
+ * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1
+ * within the firmware interface memory sections. @_pos value 0 is used to indicate the
+ * position of the ELF header at the start of the file.
+ *
+ * Retrieves the necessary information via the struct fw_core_dump_data stored in
+ * the private member of the seq_file handle.
+ *
+ * Return:
+ * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off
+ * * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file
+ * * NULL - iterator could not be allocated
+ */
+static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos)
+{
+ struct fw_core_dump_data *dump_data = m->private;
+ struct fw_core_dump_seq_off *data;
+ struct kbase_csf_firmware_interface *interface;
+ loff_t pos = *_pos;
+
+ if (pos == 0)
+ return SEQ_START_TOKEN;
+
+ /* Move iterator in the right position based on page number within
+ * available pages of firmware interface memory sections.
+ */
+ pos--; /* ignore start token */
+ list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) {
+ /* Skip memory sections that cannot be read or are protected. */
+ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+ continue;
+
+ if (pos >= interface->num_pages) {
+ pos -= interface->num_pages;
+ } else {
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+ data->interface = interface;
+ data->page_num = pos;
+ return data;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ *
+ * Closes the current session and frees any memory related.
+ */
+static void fw_core_dump_seq_stop(struct seq_file *m, void *v)
+{
+ kfree(v);
+}
+
+/**
+ * fw_core_dump_seq_next - seq_file next operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ * @pos: holds the current position in pages
+ * (0 or most recent position used in previous session)
+ *
+ * Moves the iterator @v forward to the next page within the firmware interface
+ * memory sections and returns the updated position in @pos.
+ * @v value SEQ_START_TOKEN indicates the ELF header position.
+ *
+ * Return:
+ * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off
+ * * NULL - iterator could not be allocated
+ */
+static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct fw_core_dump_data *dump_data = m->private;
+ struct fw_core_dump_seq_off *data = v;
+ struct kbase_csf_firmware_interface *interface;
+ struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces;
+
+ /* Is current position at the ELF header ? */
+ if (v == SEQ_START_TOKEN) {
+ if (list_empty(interfaces))
+ return NULL;
+
+ /* Prepare iterator for starting at first page in firmware interface
+ * memory sections.
+ */
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+ data->interface =
+ list_first_entry(interfaces, struct kbase_csf_firmware_interface, node);
+ data->page_num = 0;
+ ++*pos;
+ return data;
+ }
+
+ /* First attempt to satisfy from current firmware interface memory section. */
+ interface = data->interface;
+ if (data->page_num + 1 < interface->num_pages) {
+ data->page_num++;
+ ++*pos;
+ return data;
+ }
+
+ /* Need next firmware interface memory section. This could be the last one. */
+ if (list_is_last(&interface->node, interfaces)) {
+ kfree(data);
+ return NULL;
+ }
+
+ /* Move to first page in next firmware interface memory section. */
+ data->interface = list_next_entry(interface, node);
+ data->page_num = 0;
+ ++*pos;
+
+ return data;
+}
+
+/**
+ * fw_core_dump_seq_show - seq_file show operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ *
+ * Writes the current page in a firmware interface memory section indicated
+ * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF
+ * header is written.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_seq_show(struct seq_file *m, void *v)
+{
+ struct fw_core_dump_seq_off *data = v;
+ struct page *page;
+ u32 *p;
+
+ /* Either write the ELF header or current page. */
+ if (v == SEQ_START_TOKEN)
+ return fw_core_dump_write_elf_header(m);
+
+ /* Write the current page. */
+ page = as_page(data->interface->phys[data->page_num]);
+ p = kmap_atomic(page);
+ seq_write(m, p, FW_PAGE_SIZE);
+ kunmap_atomic(p);
+
+ return 0;
+}
+
+/* Sequence file operations for firmware core dump file. */
+static const struct seq_operations fw_core_dump_seq_ops = {
+ .start = fw_core_dump_seq_start,
+ .next = fw_core_dump_seq_next,
+ .stop = fw_core_dump_seq_stop,
+ .show = fw_core_dump_seq_show,
+};
+
+/**
+ * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file
+ * @inode: inode of the file
+ * @file: file pointer
+ *
+ * Prepares for servicing a write request to request a core dump from firmware and
+ * a read request to retrieve the core dump.
+ *
+ * Returns an error if the firmware is not initialized yet.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file)
+{
+ struct kbase_device *const kbdev = inode->i_private;
+ struct fw_core_dump_data *dump_data;
+ int ret;
+
+ /* Fail if firmware is not initialized yet. */
+ if (!kbdev->csf.firmware_inited) {
+ ret = -ENODEV;
+ goto open_fail;
+ }
+
+ /* Open a sequence file for iterating through the pages in the
+ * firmware interface memory pages. seq_open stores a
+ * struct seq_file * in the private_data field of @file.
+ */
+ ret = seq_open(file, &fw_core_dump_seq_ops);
+ if (ret)
+ goto open_fail;
+
+ /* Allocate a context for sequence file operations. */
+ dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL);
+ if (!dump_data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Kbase device will be shared with sequence file operations. */
+ dump_data->kbdev = kbdev;
+
+ /* Link our sequence file context. */
+ ((struct seq_file *)file->private_data)->private = dump_data;
+
+ return 0;
+out:
+ seq_release(inode, file);
+open_fail:
+ return ret;
+}
+
+/**
+ * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file
+ * @file: file pointer
+ * @ubuf: user buffer containing data to store
+ * @count: number of bytes in user buffer
+ * @ppos: file position
+ *
+ * Any data written to the file triggers a firmware core dump request which
+ * subsequently can be retrieved by reading from the file.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count,
+ loff_t *ppos)
+{
+ int err;
+ struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
+ struct kbase_device *const kbdev = dump_data->kbdev;
+
+ CSTD_UNUSED(ppos);
+
+ err = fw_core_dump_create(kbdev);
+
+ return err ? err : count;
+}
+
+/**
+ * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file
+ * @inode: inode of the file
+ * @file: file pointer
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file)
+{
+ struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
+
+ seq_release(inode, file);
+
+ kfree(dump_data);
+
+ return 0;
+}
+/* Debugfs file operations for firmware core dump file. */
+static const struct file_operations kbase_csf_fw_core_dump_fops = {
+ .owner = THIS_MODULE,
+ .open = fw_core_dump_debugfs_open,
+ .read = seq_read,
+ .write = fw_core_dump_debugfs_write,
+ .llseek = seq_lseek,
+ .release = fw_core_dump_debugfs_release,
+};
+
+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev,
+ &kbase_csf_fw_core_dump_fops);
+#endif /* CONFIG_DEBUG_FS */
+}
+
+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry)
+{
+ /* Casting to u16 as version is defined by bits 15:0 */
+ kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX];
+
+ if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION)
+ return -EPERM;
+
+ kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX];
+ kbdev->csf.fw_core_dump.available = true;
+
+ return 0;
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h
new file mode 100644
index 0000000..0537dca
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
+#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
+
+struct kbase_device;
+
+/** Offset of the last field of core dump entry from the image header */
+#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4)
+
+/**
+ * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from
+ * the image header.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @entry: Pointer to section.
+ *
+ * Read a "core dump" entry from the image header, check the version for
+ * compatibility and store the address pointer.
+ *
+ * Return: 0 if successfully parse entry, negative error code otherwise.
+ */
+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry);
+
+/**
+ * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * Must be zero-initialized.
+ *
+ * Creates the fw_core_dump debugfs file through which to request a firmware
+ * core dump. The created debugfs file is cleaned up as part of kbdev debugfs
+ * cleanup.
+ *
+ * The fw_core_dump debugs file that case be used in the following way:
+ *
+ * To explicitly request core dump:
+ * echo 1 >/sys/kernel/debug/mali0/fw_core_dump
+ *
+ * To output current core dump (after explicitly requesting a core dump, or
+ * kernel driver reported an internal firmware error):
+ * cat /sys/kernel/debug/mali0/fw_core_dump
+ */
+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev);
+
+#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
index bfcc6c8..77d3b1e 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_log.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
@@ -20,12 +20,35 @@
*/
#include <mali_kbase.h>
+#include "backend/gpu/mali_kbase_pm_internal.h"
#include <csf/mali_kbase_csf_firmware_log.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <linux/debugfs.h>
#include <linux/string.h>
#include <linux/workqueue.h>
+/*
+ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address.
+ */
+#define ARMV7_T1_BL_IMM_INSTR 0xd800f000
+
+/*
+ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
+ * negative jump offset.
+ */
+#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216
+
+/*
+ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
+ * positive jump offset.
+ */
+#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214
+
+/*
+ * ARMv7 instruction: Double NOP instructions.
+ */
+#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00
+
#if defined(CONFIG_DEBUG_FS)
static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
@@ -62,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
enable_bits_count = 64;
}
- new_mask = val & ((1 << enable_bits_count) - 1);
+ new_mask = val & (UINT64_MAX >> (64 - enable_bits_count));
if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
@@ -292,3 +315,134 @@ void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
atomic_set(&fw_log->busy, 0);
}
+
+void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev,
+ const uint32_t *entry)
+{
+ kbdev->csf.fw_log.func_call_list_va_start = entry[0];
+ kbdev->csf.fw_log.func_call_list_va_end = entry[1];
+}
+
+/**
+ * toggle_logging_calls_in_loaded_image - Toggles FW log func calls in loaded FW image.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @enable: Whether to enable or disable the function calls.
+ */
+static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, bool enable)
+{
+ uint32_t bl_instruction, diff;
+ uint32_t imm11, imm10, i1, i2, j1, j2, sign;
+ uint32_t calling_address = 0, callee_address = 0;
+ uint32_t list_entry = kbdev->csf.fw_log.func_call_list_va_start;
+ const uint32_t list_va_end = kbdev->csf.fw_log.func_call_list_va_end;
+
+ if (list_entry == 0 || list_va_end == 0)
+ return;
+
+ if (enable) {
+ for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
+ /* Read calling address */
+ kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
+ /* Read callee address */
+ kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t),
+ &callee_address);
+
+ diff = callee_address - calling_address - 4;
+ sign = !!(diff & 0x80000000);
+ if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff ||
+ ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
+ dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
+ calling_address);
+ continue;
+ }
+
+ i1 = (diff & 0x00800000) >> 23;
+ j1 = !i1 ^ sign;
+ i2 = (diff & 0x00400000) >> 22;
+ j2 = !i2 ^ sign;
+ imm11 = (diff & 0xffe) >> 1;
+ imm10 = (diff & 0x3ff000) >> 12;
+
+ /* Compose BL instruction */
+ bl_instruction = ARMV7_T1_BL_IMM_INSTR;
+ bl_instruction |= j1 << 29;
+ bl_instruction |= j2 << 27;
+ bl_instruction |= imm11 << 16;
+ bl_instruction |= sign << 10;
+ bl_instruction |= imm10;
+
+ /* Patch logging func calls in their load location */
+ dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address,
+ bl_instruction);
+ kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
+ bl_instruction);
+ }
+ } else {
+ for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
+ /* Read calling address */
+ kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
+
+ /* Overwrite logging func calls with 2 NOP instructions */
+ kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
+ ARMV7_DOUBLE_NOP_INSTR);
+ }
+ }
+}
+
+int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val)
+{
+ unsigned long flags;
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+ bool mcu_inactive;
+ bool resume_needed = false;
+ int ret = 0;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+ return -EBUSY;
+
+ /* Suspend all the active CS groups */
+ dev_dbg(kbdev->dev, "Suspend all the active CS groups");
+
+ kbase_csf_scheduler_lock(kbdev);
+ while (scheduler->state != SCHED_SUSPENDED) {
+ kbase_csf_scheduler_unlock(kbdev);
+ kbase_csf_scheduler_pm_suspend(kbdev);
+ kbase_csf_scheduler_lock(kbdev);
+ resume_needed = true;
+ }
+
+ /* Wait for the MCU to get disabled */
+ dev_info(kbdev->dev, "Wait for the MCU to get disabled");
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+ if (ret) {
+ dev_err(kbdev->dev,
+ "wait for PM state failed when toggling FW logging calls");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ mcu_inactive =
+ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ if (!mcu_inactive) {
+ dev_err(kbdev->dev,
+ "MCU not inactive after PM state wait when toggling FW logging calls");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ /* Toggle FW logging call in the loaded FW image */
+ toggle_logging_calls_in_loaded_image(kbdev, val);
+ dev_dbg(kbdev->dev, "FW logging: %s", val ? "enabled" : "disabled");
+
+out:
+ kbase_csf_scheduler_unlock(kbdev);
+ if (resume_needed)
+ /* Resume queue groups and start mcu */
+ kbase_csf_scheduler_pm_resume(kbdev);
+ atomic_set(&fw_log->busy, 0);
+ return ret;
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.h b/mali_kbase/csf/mali_kbase_csf_firmware_log.h
index 6655f6f..1008320 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_log.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.h
@@ -24,6 +24,9 @@
#include <mali_kbase.h>
+/** Offset of the last field of functions call list entry from the image header */
+#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8)
+
/*
* Firmware log dumping buffer size.
*/
@@ -53,4 +56,22 @@ void kbase_csf_firmware_log_term(struct kbase_device *kbdev);
*/
void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev);
+/**
+ * kbase_csf_firmware_log_parse_logging_call_list_entry - Parse FW logging function call list entry.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @entry: Pointer to section.
+ */
+void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev,
+ const uint32_t *entry);
+/**
+ * kbase_csf_firmware_log_toggle_logging_calls - Enables/Disables FW logging function calls.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @val: Configuration option value.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val);
+
#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index c49a20b..514492c 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -32,7 +32,8 @@
#include "mali_kbase_csf_scheduler.h"
#include "mmu/mali_kbase_mmu.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
+#include <csf/mali_kbase_csf_registers.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -104,7 +105,6 @@ struct dummy_firmware_interface {
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
-
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -273,6 +273,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
/* NO_MALI: Nothing to do here */
}
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 *value)
+{
+ /* NO_MALI: Nothing to do here */
+}
+
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
+ u32 gpu_addr, u32 value)
+{
+ /* NO_MALI: Nothing to do here */
+}
+
void kbase_csf_firmware_cs_input(
const struct kbase_csf_cmd_stream_info *const info, const u32 offset,
const u32 value)
@@ -704,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ bool complete = false;
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+ complete = true;
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+ u32 const req_mask)
+{
+ u32 glb_debug_req;
+
+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+ glb_debug_req ^= req_mask;
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+ const struct kbase_csf_global_iface *const global_iface)
+{
+ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ unsigned long flags;
+ int ret;
+
+ /* Serialize CORE_DUMP requests. */
+ mutex_lock(&kbdev->csf.reg_lock);
+
+ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ request_fw_core_dump(global_iface);
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+ if (!ret)
+ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ return ret;
+}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
@@ -712,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
- GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
- 0;
+ GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -727,6 +803,15 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
+#ifndef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ /* The GPU idle timer is always enabled for simplicity. Checks will be
+ * done before scheduling the GPU idle worker to see if it is
+ * appropriate for the current power policy.
+ */
+ enable_gpu_idle_timer(kbdev);
+#endif
+
+
/* Unmask the interrupts */
kbase_csf_firmware_global_input(global_iface,
GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -899,7 +984,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- dur = kbdev->csf.gpu_idle_hysteresis_ms;
+ dur = kbdev->csf.gpu_idle_hysteresis_us;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
@@ -916,7 +1001,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
@@ -932,8 +1017,18 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
return kbdev->csf.gpu_idle_dur_count;
}
+#ifndef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ /* The 'reg_lock' is also taken and is held till the update is not
+ * complete, to ensure the update of idle timer value by multiple Users
+ * gets serialized.
+ */
+ mutex_lock(&kbdev->csf.reg_lock);
+#endif
+
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
kbase_csf_scheduler_lock(kbdev);
if (kbdev->csf.scheduler.gpu_idle_fw_timer_enabled) {
+#endif /* CONFIG_MALI_HOST_CONTROLS_SC_RAILS */
/* The firmware only reads the new idle timer value when the timer is
* disabled.
*/
@@ -944,21 +1039,25 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
} else {
/* Record the new values. Would be used later when timer is
* enabled
*/
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
kbase_csf_scheduler_unlock(kbdev);
+#else
+ mutex_unlock(&kbdev->csf.reg_lock);
+#endif
kbase_csf_scheduler_pm_idle(kbdev);
@@ -971,7 +1070,6 @@ end:
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
{
-#define PWROFF_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
u64 dur_val = dur_us;
@@ -1049,6 +1147,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -1058,16 +1157,21 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
return 0;
}
+void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
+{
+ mutex_destroy(&kbdev->csf.reg_lock);
+}
+
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
- kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
- kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
- WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
kbdev->csf.gpu_idle_dur_count =
- convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
return 0;
}
@@ -1150,8 +1254,6 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
/* NO_MALI: Don't stop firmware or unload MMU tables */
- kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
-
kbase_csf_scheduler_term(kbdev);
kbase_csf_free_dummy_user_reg_page(kbdev);
@@ -1177,12 +1279,12 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
/* NO_MALI: No trace buffers to terminate */
- mutex_destroy(&kbdev->csf.reg_lock);
-
/* This will also free up the region allocated for the shared interface
* entry parsed from the firmware image.
*/
kbase_mcu_shared_interface_region_tracker_term(kbdev);
+
+ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
}
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
@@ -1231,8 +1333,9 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
-int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
{
+ CSTD_UNUSED(wait_timeout_ms);
kbase_csf_firmware_ping(kbdev);
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
}
@@ -1271,7 +1374,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
-void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
{
int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
@@ -1279,6 +1382,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
+
+ return err;
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@@ -1499,8 +1604,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
@@ -1516,7 +1621,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
- KBASE_MEM_GROUP_CSF_FW, NULL);
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
if (ret)
goto mmu_insert_pages_error;
@@ -1577,4 +1682,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}
-
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index f94806e..7c14b8e 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -76,6 +76,41 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
}
/**
+ * evict_heap_context - Evict the data of heap context from GPU's L2 cache.
+ *
+ * @ctx_alloc: Pointer to the heap context allocator.
+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
+ *
+ * This function is called when memory for the heap context is freed. It uses the
+ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
+ * there is nothing done. The whole GPU cache is anyways expected to be flushed
+ * on older GPUs when initial chunks of the heap are freed just before the memory
+ * for heap context is freed.
+ */
+static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+ u64 const heap_gpu_va)
+{
+ struct kbase_context *const kctx = ctx_alloc->kctx;
+ u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
+ u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
+ u32 page_index = offset_in_bytes >> PAGE_SHIFT;
+ struct tagged_addr page =
+ kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
+ phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
+
+ lockdep_assert_held(&ctx_alloc->lock);
+
+ /* There is no need to take vm_lock here as the ctx_alloc region is protected
+ * via a nonzero no_user_free_count. The region and the backing page can't
+ * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
+ * to CLN+INV L2 only.
+ */
+ kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
+ heap_context_pa, ctx_alloc->heap_context_size_aligned,
+ KBASE_MMU_OP_FLUSH_PT);
+}
+
+/**
* sub_free - Free a heap context sub-allocated from a GPU memory region
*
* @ctx_alloc: Pointer to the heap context allocator.
@@ -102,6 +137,8 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
return;
+ evict_heap_context(ctx_alloc, heap_gpu_va);
+
heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
dev_dbg(kctx->kbdev->dev,
"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
@@ -144,14 +181,9 @@ void kbase_csf_heap_context_allocator_term(
if (ctx_alloc->region) {
kbase_gpu_vm_lock(kctx);
- /*
- * We can't enforce (nor check) the no_user_free refcount
- * to be 0 here as other code regions can take such a reference.
- * Anyway, this isn't an issue as the region will eventually
- * be freed by the region tracker if its refcount didn't drop
- * to 0.
- */
- kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
+ WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
+
+ kbase_va_region_no_user_free_dec(ctx_alloc->region);
kbase_mem_free_region(kctx, ctx_alloc->region);
kbase_gpu_vm_unlock(kctx);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index dc66f62..345cf98 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -80,7 +80,14 @@ static int kbase_kcpu_map_import_prepare(
* on the physical pages tracking object. When the last
* reference to the tracking object is dropped the pages
* would be unpinned if they weren't unpinned before.
+ *
+ * Region should be CPU cached: abort if it isn't.
*/
+ if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = kbase_jd_user_buf_pin_pages(kctx, reg);
if (ret)
goto out;
@@ -183,8 +190,7 @@ static void kbase_jit_add_to_pending_alloc_list(
struct kbase_kcpu_command_queue *blocked_queue;
lockdep_assert_held(&queue->lock);
-
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(blocked_queue,
&kctx->csf.kcpu_queues.jit_blocked_queues,
@@ -200,8 +206,6 @@ static void kbase_jit_add_to_pending_alloc_list(
}
list_add_tail(&queue->jit_blocked, target_list_head);
-
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
}
/**
@@ -233,25 +237,26 @@ static int kbase_kcpu_jit_allocate_process(
lockdep_assert_held(&queue->lock);
- if (alloc_info->blocked) {
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
- list_del(&queue->jit_blocked);
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
- alloc_info->blocked = false;
- }
-
if (WARN_ON(!info))
return -EINVAL;
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
+
/* Check if all JIT IDs are not in use */
for (i = 0; i < count; i++, info++) {
/* The JIT ID is still in use so fail the allocation */
if (kctx->jit_alloc[info->id]) {
dev_dbg(kctx->kbdev->dev, "JIT ID still in use");
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail;
}
}
+ if (alloc_info->blocked) {
+ list_del(&queue->jit_blocked);
+ alloc_info->blocked = false;
+ }
+
/* Now start the allocation loop */
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
/* Create a JIT allocation */
@@ -260,7 +265,6 @@ static int kbase_kcpu_jit_allocate_process(
bool can_block = false;
struct kbase_kcpu_command const *jit_cmd;
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) {
if (jit_cmd == cmd)
break;
@@ -279,7 +283,6 @@ static int kbase_kcpu_jit_allocate_process(
}
}
}
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
if (!can_block) {
/*
@@ -288,7 +291,7 @@ static int kbase_kcpu_jit_allocate_process(
*/
dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd);
ret = -ENOMEM;
- goto fail;
+ goto fail_rollback;
}
/* There are pending frees for an active allocation
@@ -306,7 +309,8 @@ static int kbase_kcpu_jit_allocate_process(
kctx->jit_alloc[info->id] = NULL;
}
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto fail;
}
/* Bind it to the user provided ID. */
@@ -322,7 +326,7 @@ static int kbase_kcpu_jit_allocate_process(
KBASE_REG_CPU_WR, &mapping);
if (!ptr) {
ret = -ENOMEM;
- goto fail;
+ goto fail_rollback;
}
reg = kctx->jit_alloc[info->id];
@@ -331,9 +335,11 @@ static int kbase_kcpu_jit_allocate_process(
kbase_vunmap(kctx, &mapping);
}
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
+
return 0;
-fail:
+fail_rollback:
/* Roll back completely */
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
/* Free the allocations that were successful.
@@ -346,6 +352,8 @@ fail:
kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC;
}
+fail:
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return ret;
}
@@ -357,23 +365,16 @@ static int kbase_kcpu_jit_allocate_prepare(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
void __user *data = u64_to_user_ptr(alloc_info->info);
- struct base_jit_alloc_info *info;
+ struct base_jit_alloc_info *info = NULL;
u32 count = alloc_info->count;
int ret = 0;
u32 i;
lockdep_assert_held(&kcpu_queue->lock);
- if (!kbase_mem_allow_alloc(kctx)) {
- dev_dbg(kctx->kbdev->dev,
- "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
- current->comm, current->pid, kctx->tgid, kctx->id);
- ret = -EINVAL;
- goto out;
- }
-
- if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
- count > ARRAY_SIZE(kctx->jit_alloc)) {
+ if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) ||
+ (count > kcpu_queue->kctx->jit_max_allocations) || (!data) ||
+ !kbase_mem_allow_alloc(kctx)) {
ret = -EINVAL;
goto out;
}
@@ -408,13 +409,13 @@ static int kbase_kcpu_jit_allocate_prepare(
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
- list_add_tail(&current_command->info.jit_alloc.node,
- &kctx->csf.kcpu_queues.jit_cmds_head);
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
current_command->info.jit_alloc.info = info;
current_command->info.jit_alloc.count = count;
current_command->info.jit_alloc.blocked = false;
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
+ list_add_tail(&current_command->info.jit_alloc.node,
+ &kctx->csf.kcpu_queues.jit_cmds_head);
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
out_free:
@@ -435,7 +436,7 @@ static void kbase_kcpu_jit_allocate_finish(
{
lockdep_assert_held(&queue->lock);
- spin_lock(&queue->kctx->csf.kcpu_queues.jit_lock);
+ mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock);
/* Remove this command from the jit_cmds_head list */
list_del(&cmd->info.jit_alloc.node);
@@ -449,7 +450,7 @@ static void kbase_kcpu_jit_allocate_finish(
cmd->info.jit_alloc.blocked = false;
}
- spin_unlock(&queue->kctx->csf.kcpu_queues.jit_lock);
+ mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock);
kfree(cmd->info.jit_alloc.info);
}
@@ -463,17 +464,17 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
{
struct kbase_kcpu_command_queue *blocked_queue;
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+
/*
* Reschedule all queues blocked by JIT_ALLOC commands.
* NOTE: This code traverses the list of blocked queues directly. It
* only works as long as the queued works are not executed at the same
* time. This precondition is true since we're holding the
- * kbase_csf_kcpu_queue_context.lock .
+ * kbase_csf_kcpu_queue_context.jit_lock .
*/
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
kthread_queue_work(&blocked_queue->csf_kcpu_worker, &blocked_queue->work);
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
}
static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
@@ -491,6 +492,7 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
return -EINVAL;
lockdep_assert_held(&queue->lock);
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
queue);
@@ -526,11 +528,11 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
* Remove this command from the jit_cmds_head list and retry pending
* allocations.
*/
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
list_del(&cmd->info.jit_free.node);
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
kbase_kcpu_jit_retry_pending_allocs(kctx);
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
+
/* Free the list of ids */
kfree(ids);
@@ -595,12 +597,12 @@ static int kbase_kcpu_jit_free_prepare(
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
- spin_lock(&kctx->csf.kcpu_queues.jit_lock);
- list_add_tail(&current_command->info.jit_free.node,
- &kctx->csf.kcpu_queues.jit_cmds_head);
- spin_unlock(&kctx->csf.kcpu_queues.jit_lock);
current_command->info.jit_free.ids = ids;
current_command->info.jit_free.count = count;
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
+ list_add_tail(&current_command->info.jit_free.node,
+ &kctx->csf.kcpu_queues.jit_cmds_head);
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
out_free:
@@ -609,6 +611,7 @@ out:
return ret;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
static int kbase_csf_queue_group_suspend_prepare(
struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_group_suspend_info *suspend_buf,
@@ -680,8 +683,7 @@ static int kbase_csf_queue_group_suspend_prepare(
(kbase_reg_current_backed_size(reg) < nr_pages) ||
!(reg->flags & KBASE_REG_CPU_WR) ||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
- (kbase_is_region_shrinkable(reg)) ||
- (kbase_va_region_is_no_user_free(kctx, reg))) {
+ (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) {
ret = -EINVAL;
goto out_clean_pages;
}
@@ -725,6 +727,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx,
{
return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle);
}
+#endif
static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
@@ -1036,9 +1039,12 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
u64 val = 0;
- /* GPUCORE-28172 RDT to review */
- if (!queue->command_started)
+ if (!queue->command_started) {
queue->command_started = true;
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(
+ kbdev, queue);
+ }
+
if (!evt) {
dev_warn(kbdev->dev,
@@ -1088,7 +1094,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
queue->has_error = true;
}
- /* GPUCORE-28172 RDT to review */
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(
+ kbdev, queue, *(u32 *)evt);
queue->command_started = false;
}
@@ -1231,8 +1238,6 @@ static void kbase_kcpu_cqs_set_operation_process(
evt = (uintptr_t)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
- /* GPUCORE-28172 RDT to review */
-
if (!evt) {
dev_warn(kbdev->dev,
"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
@@ -1257,7 +1262,8 @@ static void kbase_kcpu_cqs_set_operation_process(
break;
}
- /* GPUCORE-28172 RDT to review */
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(
+ kbdev, queue, *(u32 *)evt ? 1 : 0);
/* Always propagate errors */
*(u32 *)evt = queue->has_error;
@@ -1344,9 +1350,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
}
-static void kbase_kcpu_fence_wait_cancel(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct kbase_kcpu_command_fence_info *fence_info)
+static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
@@ -1528,15 +1533,14 @@ static int kbase_kcpu_fence_wait_process(
*/
if (fence_status)
- kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
+ kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
return fence_status;
}
-static int kbase_kcpu_fence_wait_prepare(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct base_kcpu_command_fence_info *fence_info,
- struct kbase_kcpu_command *current_command)
+static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct base_kcpu_command_fence_info *fence_info,
+ struct kbase_kcpu_command *current_command)
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_in;
@@ -1547,8 +1551,7 @@ static int kbase_kcpu_fence_wait_prepare(
lockdep_assert_held(&kcpu_queue->lock);
- if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
- sizeof(fence)))
+ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
return -ENOMEM;
fence_in = sync_file_get_fence(fence.basep.fd);
@@ -1562,9 +1565,8 @@ static int kbase_kcpu_fence_wait_prepare(
return 0;
}
-static int kbase_kcpu_fence_signal_process(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct kbase_kcpu_command_fence_info *fence_info)
+static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
int ret;
@@ -1585,37 +1587,37 @@ static int kbase_kcpu_fence_signal_process(
fence_info->fence->seqno);
/* dma_fence refcount needs to be decreased to release it. */
- dma_fence_put(fence_info->fence);
+ kbase_fence_put(fence_info->fence);
fence_info->fence = NULL;
return ret;
}
-static int kbase_kcpu_fence_signal_prepare(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct base_kcpu_command_fence_info *fence_info,
- struct kbase_kcpu_command *current_command)
+static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command *current_command,
+ struct base_fence *fence, struct sync_file **sync_file,
+ int *fd)
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_out;
#else
struct dma_fence *fence_out;
#endif
- struct base_fence fence;
- struct sync_file *sync_file;
+ struct kbase_kcpu_dma_fence *kcpu_fence;
int ret = 0;
- int fd;
lockdep_assert_held(&kcpu_queue->lock);
- if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
- sizeof(fence)))
- return -EFAULT;
-
- fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL);
- if (!fence_out)
+ kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL);
+ if (!kcpu_fence)
return -ENOMEM;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ fence_out = (struct fence *)kcpu_fence;
+#else
+ fence_out = (struct dma_fence *)kcpu_fence;
+#endif
+
dma_fence_init(fence_out,
&kbase_fence_ops,
&kbase_csf_fence_lock,
@@ -1631,28 +1633,66 @@ static int kbase_kcpu_fence_signal_prepare(
dma_fence_get(fence_out);
#endif
+ /* Set reference to KCPU metadata and increment refcount */
+ kcpu_fence->metadata = kcpu_queue->metadata;
+ WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
+
/* create a sync_file fd representing the fence */
- sync_file = sync_file_create(fence_out);
- if (!sync_file) {
+ *sync_file = sync_file_create(fence_out);
+ if (!(*sync_file)) {
ret = -ENOMEM;
goto file_create_fail;
}
- fd = get_unused_fd_flags(O_CLOEXEC);
- if (fd < 0) {
- ret = fd;
+ *fd = get_unused_fd_flags(O_CLOEXEC);
+ if (*fd < 0) {
+ ret = *fd;
goto fd_flags_fail;
}
- fence.basep.fd = fd;
+ fence->basep.fd = *fd;
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
current_command->info.fence.fence = fence_out;
+ return 0;
+
+fd_flags_fail:
+ fput((*sync_file)->file);
+file_create_fail:
+ /*
+ * Upon failure, dma_fence refcount that was increased by
+ * dma_fence_get() or sync_file_create() needs to be decreased
+ * to release it.
+ */
+ kbase_fence_put(fence_out);
+ current_command->info.fence.fence = NULL;
+
+ return ret;
+}
+
+static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct base_kcpu_command_fence_info *fence_info,
+ struct kbase_kcpu_command *current_command)
+{
+ struct base_fence fence;
+ struct sync_file *sync_file = NULL;
+ int fd;
+ int ret = 0;
+
+ lockdep_assert_held(&kcpu_queue->lock);
+
+ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
+ return -EFAULT;
+
+ ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd);
+ if (ret)
+ return ret;
+
if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence,
sizeof(fence))) {
ret = -EFAULT;
- goto fd_flags_fail;
+ goto fail;
}
/* 'sync_file' pointer can't be safely dereferenced once 'fd' is
@@ -1662,21 +1702,34 @@ static int kbase_kcpu_fence_signal_prepare(
fd_install(fd, sync_file->file);
return 0;
-fd_flags_fail:
+fail:
fput(sync_file->file);
-file_create_fail:
- /*
- * Upon failure, dma_fence refcount that was increased by
- * dma_fence_get() or sync_file_create() needs to be decreased
- * to release it.
- */
- dma_fence_put(fence_out);
-
+ kbase_fence_put(current_command->info.fence.fence);
current_command->info.fence.fence = NULL;
- kfree(fence_out);
return ret;
}
+
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
+{
+ if (!kcpu_queue || !fence_info)
+ return -EINVAL;
+
+ return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command *current_command,
+ struct base_fence *fence, struct sync_file **sync_file, int *fd)
+{
+ if (!kcpu_queue || !current_command || !fence || !sync_file || !fd)
+ return -EINVAL;
+
+ return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
#endif /* CONFIG_SYNC_FILE */
static void kcpu_queue_process_worker(struct kthread_work *data)
@@ -1713,6 +1766,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
mutex_lock(&queue->lock);
+ /* Metadata struct may outlive KCPU queue. */
+ kbase_kcpu_dma_fence_meta_put(queue->metadata);
+
/* Drain the remaining work for this queue first and go past
* all the waits.
*/
@@ -1818,8 +1874,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
if (drain_queue) {
- kbase_kcpu_fence_wait_cancel(queue,
- &cmd->info.fence);
+ kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence);
} else {
status = kbase_kcpu_fence_wait_process(queue,
&cmd->info.fence);
@@ -1849,8 +1904,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
- status = kbase_kcpu_fence_signal_process(
- queue, &cmd->info.fence);
+ status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence);
if (status < 0)
queue->has_error = true;
@@ -2021,6 +2075,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
kbdev, queue);
break;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
struct kbase_suspend_copy_buffer *sus_buf =
cmd->info.suspend_buf_copy.sus_buf;
@@ -2055,6 +2110,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
kfree(sus_buf);
break;
}
+#endif
default:
dev_dbg(kbdev->dev,
"Unrecognized command type");
@@ -2129,12 +2185,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
}
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
{
- /* GPUCORE-28172 RDT to review */
+ const struct base_cqs_wait_operation_info *waits =
+ cmd->info.cqs_wait_operation.objs;
+ u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags;
+ unsigned int i;
+
+ for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(
+ kbdev, queue, waits[i].addr, waits[i].val,
+ waits[i].operation, waits[i].data_type,
+ (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
{
- /* GPUCORE-28172 RDT to review */
+ const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs;
+ unsigned int i;
+
+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(
+ kbdev, queue, sets[i].addr, sets[i].val,
+ sets[i].operation, sets[i].data_type);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
@@ -2181,11 +2254,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
break;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
cmd->info.suspend_buf_copy.group_handle);
break;
+#endif
default:
dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
break;
@@ -2202,9 +2277,11 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
/* The offset to the first command that is being processed or yet to
* be processed is of u8 type, so the number of commands inside the
- * queue cannot be more than 256.
+ * queue cannot be more than 256. The current implementation expects
+ * exactly 256, any other size will require the addition of wrapping
+ * logic.
*/
- BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256);
+ BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256);
/* Whilst the backend interface allows enqueueing multiple commands in
* a single operation, the Base interface does not expose any mechanism
@@ -2280,7 +2357,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
}
- kcpu_cmd->enqueue_ts = atomic64_read(&kctx->csf.kcpu_queues.num_cmds);
+ kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num);
switch (command.type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
#if IS_ENABLED(CONFIG_SYNC_FILE)
@@ -2340,19 +2417,19 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
ret = kbase_kcpu_jit_free_prepare(queue,
&command.info.jit_free, kcpu_cmd);
break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
ret = kbase_csf_queue_group_suspend_prepare(queue,
&command.info.suspend_buf_copy,
kcpu_cmd);
break;
+#endif
default:
dev_dbg(queue->kctx->kbdev->dev,
"Unknown command type %u", command.type);
ret = -EINVAL;
break;
}
-
- atomic64_inc(&kctx->csf.kcpu_queues.num_cmds);
}
if (!ret) {
@@ -2369,10 +2446,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
queue->num_pending_cmds += enq->nr_commands;
- kthread_queue_work(&queue->csf_kcpu_worker, &queue->work);
- } else {
- /* Roll back the number of enqueued commands */
- atomic64_sub(i, &kctx->csf.kcpu_queues.num_cmds);
+ kcpu_queue_process(queue, false);
}
out:
@@ -2392,7 +2466,7 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx)
mutex_init(&kctx->csf.kcpu_queues.lock);
- atomic64_set(&kctx->csf.kcpu_queues.num_cmds, 0);
+ atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0);
return 0;
}
@@ -2412,6 +2486,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
mutex_destroy(&kctx->csf.kcpu_queues.lock);
}
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_delete *del)
@@ -2424,8 +2499,11 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
{
struct kbase_kcpu_command_queue *queue;
int idx;
+ int n;
int ret = 0;
-
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ struct kbase_kcpu_dma_fence_meta *metadata;
+#endif
/* The queue id is of u8 type and we use the index of the kcpu_queues
* array as an id, so the number of elements in the array can't be
* more than 256.
@@ -2471,7 +2549,31 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
queue->fence_context = dma_fence_context_alloc(1);
queue->fence_seqno = 0;
queue->fence_wait_processed = false;
-#endif
+
+ metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
+ if (!metadata) {
+ kbase_destroy_kworker_stack(&queue->csf_kcpu_worker);
+ kfree(queue);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ metadata->kbdev = kctx->kbdev;
+ metadata->kctx_id = kctx->id;
+ n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu",
+ kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
+ if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+ kbase_destroy_kworker_stack(&queue->csf_kcpu_worker);
+ kfree(queue);
+ kfree(metadata);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ kbase_refcount_set(&metadata->refcount, 1);
+ queue->metadata = metadata;
+ atomic_inc(&kctx->kbdev->live_fence_metadata);
+#endif /* CONFIG_SYNC_FILE */
queue->enqueue_failed = false;
queue->command_started = false;
INIT_LIST_HEAD(&queue->jit_blocked);
@@ -2497,3 +2599,4 @@ out:
return ret;
}
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index f982f56..41c6e07 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,9 @@
#ifndef _KBASE_CSF_KCPU_H_
#define _KBASE_CSF_KCPU_H_
+#include <mali_kbase_fence.h>
+#include <mali_kbase_sync.h>
+
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
#include <linux/fence.h>
#else
@@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info {
};
/**
- * struct kbase_kcpu_command_fence_info - Structure which holds information
- * about the fence object enqueued in the kcpu command queue
+ * struct kbase_kcpu_command_fence_info - Structure which holds information about the
+ * fence object enqueued in the kcpu command queue
*
* @fence_cb: Fence callback
* @fence: Fence
@@ -183,6 +186,7 @@ struct kbase_suspend_copy_buffer {
struct kbase_mem_phy_alloc *cpu_alloc;
};
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
/**
* struct kbase_kcpu_command_group_suspend_info - structure which contains
* suspend buffer data captured for a suspended queue group.
@@ -195,6 +199,7 @@ struct kbase_kcpu_command_group_suspend_info {
struct kbase_suspend_copy_buffer *sus_buf;
u8 group_handle;
};
+#endif
/**
@@ -229,7 +234,9 @@ struct kbase_kcpu_command {
struct kbase_kcpu_command_import_info import;
struct kbase_kcpu_command_jit_alloc_info jit_alloc;
struct kbase_kcpu_command_jit_free_info jit_free;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
+#endif
} info;
};
@@ -275,6 +282,8 @@ struct kbase_kcpu_command {
* @jit_blocked: Used to keep track of command queues blocked
* by a pending JIT allocation command.
* @fence_timeout: Timer used to detect the fence wait timeout.
+ * @metadata: Metadata structure containing basic information about
+ * this queue for any fence objects associated with this queue.
*/
struct kbase_kcpu_command_queue {
struct mutex lock;
@@ -296,6 +305,9 @@ struct kbase_kcpu_command_queue {
#ifdef CONFIG_MALI_FENCE_DEBUG
struct timer_list fence_timeout;
#endif /* CONFIG_MALI_FENCE_DEBUG */
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ struct kbase_kcpu_dma_fence_meta *metadata;
+#endif /* CONFIG_SYNC_FILE */
};
/**
@@ -360,4 +372,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
*/
void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx);
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+/* Test wrappers for dma fence operations. */
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command *current_command,
+ struct base_fence *fence, struct sync_file **sync_file, int *fd);
+#endif /* CONFIG_SYNC_FILE */
+
#endif /* _KBASE_CSF_KCPU_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
new file mode 100644
index 0000000..bb5a092
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -0,0 +1,817 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/protected_memory_allocator.h>
+#include <mali_kbase.h>
+#include "mali_kbase_csf.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
+#include <mali_kbase_mem_migrate.h>
+
+/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */
+#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8)
+
+/* MCU shared region map attempt limit */
+#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4)
+
+/* Convert a VPFN to its start addr */
+#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT)
+
+/* Macros for extract the corresponding VPFNs from a CSG_REG */
+#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn)
+#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages)
+#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi))
+
+/* MCU shared segment dummy page mapping flags */
+#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX)
+
+/* MCU shared segment suspend buffer mapping flags */
+#define SUSP_PAGE_MAP_FLAGS \
+ (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT))
+
+/**
+ * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime
+ * resources for suspend buffer pages, userio pages
+ * and their corresponding mapping GPU VA addresses
+ * from the MCU shared interface segment
+ *
+ * @link: Link to the managing list for the wrapper object.
+ * @reg: pointer to the region allocated from the shared interface segment, which
+ * covers the normal/P-mode suspend buffers, userio pages of the queues
+ * @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free).
+ * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's
+ * protected mode suspend buffer pages.
+ */
+struct kbase_csg_shared_region {
+ struct list_head link;
+ struct kbase_va_region *reg;
+ struct kbase_queue_group *grp;
+ bool pmode_mapped;
+};
+
+static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
+{
+ unsigned long userio_map_flags;
+
+ if (kbdev->system_coherency == COHERENCY_NONE)
+ userio_map_flags =
+ KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+ else
+ userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH |
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
+
+ return (userio_map_flags | KBASE_REG_GPU_NX);
+}
+
+static void set_page_meta_status_not_movable(struct tagged_addr phy)
+{
+ if (kbase_page_migration_enabled) {
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));
+
+ if (page_md) {
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+}
+
+static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group)
+{
+ return (struct kbase_csg_shared_region *)group->csg_reg;
+}
+
+static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn,
+ u32 nr_pages)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
+
+ return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages,
+ mem_flags, KBASE_MEM_GROUP_CSF_FW);
+}
+
+static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+ mmu_sync_info, NULL, false);
+}
+
+/* Reset consecutive retry count to zero */
+static void notify_group_csg_reg_map_done(struct kbase_queue_group *group)
+{
+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+ /* Just clear the internal map retry count */
+ group->csg_reg_bind_retries = 0;
+}
+
+/* Return true if a fatal group error has already been triggered */
+static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group)
+{
+ struct kbase_device *kbdev = group->kctx->kbdev;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (group->csg_reg_bind_retries < U8_MAX)
+ group->csg_reg_bind_retries++;
+
+ /* Allow only one fatal error notification */
+ if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) {
+ struct base_gpu_queue_group_error const err_payload = {
+ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } }
+ };
+
+ dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit",
+ group->kctx->tgid, group->kctx->id, group->handle);
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ kbase_event_wakeup_nosync(group->kctx);
+ }
+
+ return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT;
+}
+
+/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping.
+ * If phys is NULL, the internal dummy_phys is used, which effectively
+ * restores back to the initialized state for the given queue's userio_pages
+ * (i.e. mapped to the default dummy page).
+ * In case of CSF mmu update error on a queue, the dummy phy is used to restore
+ * back the default 'unbound' (i.e. mapped to dummy) condition.
+ *
+ * It's the caller's responsibility to ensure that the given vpfn is extracted
+ * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN().
+ */
+static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ int err = 0, err1;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (phys) {
+ unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags;
+ unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR;
+
+ /* Dealing with a queue's INPUT page */
+ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input,
+ KBASE_MEM_GROUP_CSF_IO);
+ /* Dealing with a queue's OUTPUT page */
+ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1,
+ mem_flags_output, KBASE_MEM_GROUP_CSF_IO);
+ if (unlikely(err1))
+ err = err1;
+ }
+
+ if (unlikely(err) || !phys) {
+ /* Restore back to dummy_userio_phy */
+ update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
+ }
+
+ return err;
+}
+
+/* Update a group's queues' mappings for a group with its runtime bound group region */
+static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group,
+ struct kbase_queue_group *prev_grp)
+{
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ struct tagged_addr *phy;
+ int err = 0, err1;
+ u32 i;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg"))
+ return -EINVAL;
+
+ for (i = 0; i < nr_csis; i++) {
+ struct kbase_queue *queue = group->bound_queues[i];
+ struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL;
+
+ /* Set the phy if the group's queue[i] needs mapping, otherwise NULL */
+ phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL;
+
+ /* Either phy is valid, or this update is for a transition change from
+ * prev_group, and the prev_queue was mapped, so an update is required.
+ */
+ if (phy || (prev_queue && prev_queue->user_io_gpu_va)) {
+ u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages);
+
+ err1 = userio_pages_replace_phys(kbdev, vpfn, phy);
+
+ if (unlikely(err1)) {
+ dev_warn(kbdev->dev,
+ "%s: Error in update queue-%d mapping for csg_%d_%d_%d",
+ __func__, i, group->kctx->tgid, group->kctx->id,
+ group->handle);
+ err = err1;
+ } else if (phy)
+ queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
+
+ /* Mark prev_group's queue has lost its mapping */
+ if (prev_queue)
+ prev_queue->user_io_gpu_va = 0;
+ }
+ }
+
+ return err;
+}
+
+/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced
+ * with the given group's phy pages, or, if no replacement, the default dummy pages.
+ * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its
+ * new binding owner in this function. At the end, the prev_grp would be completely
+ * detached away from the previously bound csg_reg.
+ */
+static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group,
+ struct kbase_csg_shared_region *csg_reg)
+{
+ const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ struct kbase_queue_group *prev_grp = csg_reg->grp;
+ struct kbase_va_region *reg = csg_reg->reg;
+ struct tagged_addr *phy;
+ int err = 0, err1;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* The csg_reg is expected still on the unused list so its link is not empty */
+ if (WARN_ON_ONCE(list_empty(&csg_reg->link))) {
+ dev_dbg(kbdev->dev, "csg_reg is marked in active use");
+ return -EINVAL;
+ }
+
+ if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) {
+ dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group");
+ prev_grp->csg_reg = NULL;
+ return -EINVAL;
+ }
+
+ /* Replacing the csg_reg bound group to the newly given one */
+ csg_reg->grp = group;
+ group->csg_reg = csg_reg;
+
+ /* Resolving mappings, deal with protected mode first */
+ if (group->protected_suspend_buf.pma) {
+ /* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping
+ * status is now stale during this transition of ownership. For the new owner,
+ * its mapping would have been updated away when it lost its binding previously.
+ * So it needs an update to this pma map. By clearing here the mapped flag
+ * ensures it reflects the new owner's condition.
+ */
+ csg_reg->pmode_mapped = false;
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+ } else if (csg_reg->pmode_mapped) {
+ /* Need to unmap the previous one, use the dummy pages */
+ err = update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+
+ if (unlikely(err))
+ dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d",
+ __func__, group->kctx->tgid, group->kctx->id, group->handle);
+
+ csg_reg->pmode_mapped = false;
+ }
+
+ /* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is
+ * actually reflected by a specific mapped flag (due to phys[] is only allocated on
+ * in-need basis). So the GPU_VA is always updated to the bound region's corresponding
+ * VA, as a reflection of the binding to the csg_reg.
+ */
+ group->protected_suspend_buf.gpu_va =
+ GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages));
+
+ /* Deal with normal mode suspend buffer */
+ phy = group->normal_suspend_buf.phy;
+ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy,
+ nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW);
+
+ if (unlikely(err1)) {
+ dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d",
+ __func__, group->kctx->tgid, group->kctx->id, group->handle);
+
+ /* Attempt a restore to default dummy for removing previous mapping */
+ if (prev_grp)
+ update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+ err = err1;
+ /* Marking the normal suspend buffer is not mapped (due to error) */
+ group->normal_suspend_buf.gpu_va = 0;
+ } else {
+ /* Marking the normal suspend buffer is actually mapped */
+ group->normal_suspend_buf.gpu_va =
+ GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages));
+ }
+
+ /* Deal with queue uerio_pages */
+ err1 = csg_reg_update_on_csis(kbdev, group, prev_grp);
+ if (likely(!err1))
+ err = err1;
+
+ /* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */
+ if (prev_grp) {
+ prev_grp->normal_suspend_buf.gpu_va = 0;
+ prev_grp->protected_suspend_buf.gpu_va = 0;
+ prev_grp->csg_reg = NULL;
+ }
+
+ return err;
+}
+
+/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */
+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding",
+ group->kctx->tgid, group->kctx->id, group->handle))
+ return;
+
+ /* By dropping out the csg_reg from the unused list, it becomes active and is tracked
+ * by its bound group that is on-slot. The design is that, when this on-slot group is
+ * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the
+ * unused list.
+ */
+ if (!WARN_ON_ONCE(list_empty(&csg_reg->link)))
+ list_del_init(&csg_reg->link);
+}
+
+/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use
+ * anymore. Existing bounding/mappings are left untouched. These would only be dealt with
+ * if the bound csg_reg is to be reused with another group.
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound",
+ group->kctx->tgid, group->kctx->id, group->handle))
+ return;
+
+ /* By adding back the csg_reg to the unused list, it becomes available for another
+ * group to break its existing binding and set up a new one.
+ */
+ if (!list_empty(&csg_reg->link)) {
+ WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot");
+ list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
+ } else
+ list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
+}
+
+/* Adding a new queue to an existing on-slot group */
+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
+{
+ struct kbase_queue_group *group = queue->group;
+ struct kbase_csg_shared_region *csg_reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u64 vpfn;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot"))
+ return -EIO;
+
+ csg_reg = get_group_bound_csg_reg(group);
+ if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link),
+ "No bound csg_reg, or in wrong state"))
+ return -EIO;
+
+ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
+ err = userio_pages_replace_phys(kbdev, vpfn, queue->phys);
+ if (likely(!err)) {
+ /* Mark the queue has been successfully mapped */
+ queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
+ } else {
+ /* Mark the queue has no mapping on its phys[] */
+ queue->user_io_gpu_va = 0;
+ dev_dbg(kbdev->dev,
+ "%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__,
+ queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle);
+
+ /* notify the error for the bound group */
+ if (notify_group_csg_reg_map_error(group))
+ err = -EIO;
+ }
+
+ return err;
+}
+
+/* Unmap a given queue's userio pages, when the queue is deleted */
+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
+{
+ struct kbase_queue_group *group;
+ struct kbase_csg_shared_region *csg_reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u64 vpfn;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* The queue has no existing mapping, nothing to do */
+ if (!queue || !queue->user_io_gpu_va)
+ return;
+
+ group = queue->group;
+ if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region"))
+ return;
+
+ csg_reg = get_group_bound_csg_reg(group);
+
+ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
+
+ WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL),
+ "Unexpected restoring to dummy map update error");
+ queue->user_io_gpu_va = 0;
+}
+
+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ int err = 0, err1;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL"))
+ return -EINVAL;
+
+ /* If the pmode already mapped, nothing to do */
+ if (csg_reg->pmode_mapped)
+ return 0;
+
+ /* P-mode map not in place and the group has allocated P-mode pages, map it */
+ if (group->protected_suspend_buf.pma) {
+ unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
+ struct tagged_addr *phy = shared_regs->pma_phys;
+ struct kbase_va_region *reg = csg_reg->reg;
+ u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ u32 i;
+
+ /* Populate the protected phys from pma to phy[] */
+ for (i = 0; i < nr_susp_pages; i++)
+ phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa);
+
+ /* Add the P-mode suspend buffer mapping */
+ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags,
+ KBASE_MEM_GROUP_CSF_FW);
+
+ /* If error, restore to default dummpy */
+ if (unlikely(err)) {
+ err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages);
+ if (unlikely(err1))
+ dev_warn(
+ kbdev->dev,
+ "%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d",
+ __func__, group->kctx->tgid, group->kctx->id,
+ group->handle);
+
+ csg_reg->pmode_mapped = false;
+ } else
+ csg_reg->pmode_mapped = true;
+ }
+
+ return err;
+}
+
+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+ struct kbase_va_region *reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ int err = 0;
+ u32 i;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ /* Nothing to do for clearing up if no bound csg_reg */
+ if (!csg_reg)
+ return;
+
+ reg = csg_reg->reg;
+ /* Restore mappings default dummy pages for any mapped pages */
+ if (csg_reg->pmode_mapped) {
+ err = update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+ WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping");
+
+ csg_reg->pmode_mapped = false;
+ }
+
+ if (group->normal_suspend_buf.gpu_va) {
+ err = update_mapping_with_dummy_pages(
+ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+ WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping");
+ }
+
+ /* Deal with queue uerio pages */
+ for (i = 0; i < nr_csis; i++)
+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]);
+
+ group->normal_suspend_buf.gpu_va = 0;
+ group->protected_suspend_buf.gpu_va = 0;
+
+ /* Break the binding */
+ group->csg_reg = NULL;
+ csg_reg->grp = NULL;
+
+ /* Put the csg_reg to the front of the unused list */
+ if (WARN_ON_ONCE(list_empty(&csg_reg->link)))
+ list_add(&csg_reg->link, &shared_regs->unused_csg_regs);
+ else
+ list_move(&csg_reg->link, &shared_regs->unused_csg_regs);
+}
+
+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_csg_shared_region *csg_reg;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ csg_reg = get_group_bound_csg_reg(group);
+ if (!csg_reg)
+ csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs,
+ struct kbase_csg_shared_region, link);
+
+ if (!WARN_ON_ONCE(!csg_reg)) {
+ struct kbase_queue_group *prev_grp = csg_reg->grp;
+
+ /* Deal with the previous binding and lazy unmap, i.e if the previous mapping not
+ * the required one, unmap it.
+ */
+ if (prev_grp == group) {
+ /* Update existing bindings, if there have been some changes */
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+ if (likely(!err))
+ err = csg_reg_update_on_csis(kbdev, group, NULL);
+ } else
+ err = group_bind_csg_reg(kbdev, group, csg_reg);
+ } else {
+ /* This should not have been possible if the code operates rightly */
+ dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d",
+ __func__, group->handle, group->kctx->tgid, group->kctx->id);
+ return -EIO;
+ }
+
+ if (likely(!err))
+ notify_group_csg_reg_map_done(group);
+ else
+ notify_group_csg_reg_map_error(group);
+
+ return err;
+}
+
+static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
+ struct kbase_csg_shared_region *csg_reg)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis);
+ struct kbase_va_region *reg;
+ u64 vpfn;
+ int err, i;
+
+ INIT_LIST_HEAD(&csg_reg->link);
+ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
+ KBASE_REG_ZONE_MCU_SHARED);
+
+ if (!reg) {
+ dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
+ __func__, nr_csg_reg_pages);
+ return -ENOMEM;
+ }
+
+ /* Insert the region into rbtree, so it becomes ready to use */
+ mutex_lock(&kbdev->csf.reg_lock);
+ err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1);
+ reg->flags &= ~KBASE_REG_FREE;
+ mutex_unlock(&kbdev->csf.reg_lock);
+ if (err) {
+ kfree(reg);
+ dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__,
+ nr_csg_reg_pages);
+ return err;
+ }
+
+ /* Initialize the mappings so MMU only need to update the the corresponding
+ * mapped phy-pages at runtime.
+ * Map the normal suspend buffer pages to the prepared dummy phys[].
+ */
+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
+
+ if (unlikely(err))
+ goto fail_susp_map_fail;
+
+ /* Map the protected suspend buffer pages to the prepared dummy phys[] */
+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
+
+ if (unlikely(err))
+ goto fail_pmod_map_fail;
+
+ for (i = 0; i < nr_csis; i++) {
+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+ err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
+
+ if (unlikely(err))
+ goto fail_userio_pages_map_fail;
+ }
+
+ /* Replace the previous NULL-valued field with the successully initialized reg */
+ csg_reg->reg = reg;
+
+ return 0;
+
+fail_userio_pages_map_fail:
+ while (i-- > 0) {
+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+ MCU_AS_NR, true);
+ }
+
+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+fail_pmod_map_fail:
+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+fail_susp_map_fail:
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_remove_va_region(kbdev, reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
+ kfree(reg);
+
+ return err;
+}
+
+/* Note, this helper can only be called on scheduler shutdown */
+static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
+ struct kbase_csg_shared_region *csg_reg)
+{
+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+ struct kbase_va_region *reg = csg_reg->reg;
+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ u64 vpfn;
+ int i;
+
+ for (i = 0; i < nr_csis; i++) {
+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+ MCU_AS_NR, true);
+ }
+
+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_remove_va_region(kbdev, reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
+ kfree(reg);
+}
+
+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
+ struct kbase_csg_shared_region *array_csg_regs;
+ const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ const u32 nr_groups = kbdev->csf.global_iface.group_num;
+ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
+ const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES);
+ u32 i;
+ int err;
+
+ shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev);
+ INIT_LIST_HEAD(&shared_regs->unused_csg_regs);
+
+ shared_regs->dummy_phys =
+ kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL);
+ if (!shared_regs->dummy_phys)
+ return -ENOMEM;
+
+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
+ &shared_regs->dummy_phys[0], false, NULL) <= 0)
+ return -ENOMEM;
+
+ shared_regs->dummy_phys_allocated = true;
+ set_page_meta_status_not_movable(shared_regs->dummy_phys[0]);
+
+ /* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */
+ for (i = 1; i < nr_dummy_phys; i++)
+ shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0];
+
+ shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL);
+ if (!shared_regs->pma_phys)
+ return -ENOMEM;
+
+ array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL);
+ if (!array_csg_regs)
+ return -ENOMEM;
+ shared_regs->array_csg_regs = array_csg_regs;
+
+ /* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs
+ * are properly populated and ready to use. Now initialize the items in
+ * shared_regs->array_csg_regs[]
+ */
+ for (i = 0; i < nr_csg_regs; i++) {
+ err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]);
+ if (err)
+ return err;
+
+ list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs);
+ }
+
+ return 0;
+}
+
+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
+ struct kbase_csg_shared_region *array_csg_regs =
+ (struct kbase_csg_shared_region *)shared_regs->array_csg_regs;
+ const u32 nr_groups = kbdev->csf.global_iface.group_num;
+ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
+
+ if (array_csg_regs) {
+ struct kbase_csg_shared_region *csg_reg;
+ u32 i, cnt_csg_regs = 0;
+
+ for (i = 0; i < nr_csg_regs; i++) {
+ csg_reg = &array_csg_regs[i];
+ /* There should not be any group mapping bindings */
+ WARN_ONCE(csg_reg->grp, "csg_reg has a bound group");
+
+ if (csg_reg->reg) {
+ shared_mcu_csg_reg_term(kbdev, csg_reg);
+ cnt_csg_regs++;
+ }
+ }
+
+ /* The nr_susp_regs counts should match the array_csg_regs' length */
+ list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link)
+ cnt_csg_regs--;
+
+ WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs");
+ kfree(shared_regs->array_csg_regs);
+ }
+
+ if (shared_regs->dummy_phys_allocated) {
+ struct page *page = as_page(shared_regs->dummy_phys[0]);
+
+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+ }
+
+ kfree(shared_regs->dummy_phys);
+ kfree(shared_regs->pma_phys);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h
new file mode 100644
index 0000000..61943cb
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_MCU_SHARED_REG_H_
+#define _KBASE_CSF_MCU_SHARED_REG_H_
+
+/**
+ * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with
+ * scheduling action. Essential runtime resources
+ * are bound with the group for it to run
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that is placed into active on-slot running by the scheduler.
+ *
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with
+ * scheduling action. Some of bound runtime
+ * resources can be reallocated for others to use
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that is placed off-slot by the scheduler.
+ *
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected
+ * suspend buffer pages to be mapped for supporting
+ * protected mode operations.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping.
+ *
+ * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an
+ * error code is returned.
+ */
+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the
+ * given group is evicted out of the runtime
+ * operations.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that has been evicted out of set of operational groups.
+ *
+ * This function will taken away any of the bindings/mappings immediately so the resources
+ * are not tied up to the given group, which has been evicted out of scheduling action for
+ * termination.
+ */
+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be
+ * run on slot.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @queue: Pointer to the queue that requires some runtime resource to be bound for joining
+ * others that are already running on-slot with their bound group.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
+
+/**
+ * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped
+ * from its operational state from a group.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @queue: Pointer to the queue that has been stopped from operational state.
+ *
+ */
+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
+
+/**
+ * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given
+ * group for ready to run on-slot.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the queue group that requires the runtime resources.
+ *
+ * This function binds/maps the required suspend buffer pages and userio pages for the given
+ * group, readying it to run on-slot.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
+ struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for
+ * the given device.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function allocate and initialize the MCU shared VA regions for runtime operations
+ * of the CSF scheduler.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for
+ * the given device.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function terminates the MCU shared VA regions allocated for runtime operations
+ * of the CSF scheduler.
+ */
+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index aad4005..b5bf7bb 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -31,10 +31,6 @@
* Begin register sets
*/
-/* DOORBELLS base address */
-#define DOORBELLS_BASE 0x0080000
-#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
-
/* CS_KERNEL_INPUT_BLOCK base address */
#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
@@ -71,10 +67,6 @@
#define GLB_OUTPUT_BLOCK_BASE 0x0000
#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
-/* USER base address */
-#define USER_BASE 0x0010000
-#define USER_REG(r) (USER_BASE + (r))
-
/* End register sets */
/*
@@ -163,6 +155,8 @@
#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
#define CSG_CONFIG 0x0050 /* () CSG configuration options */
#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */
+#define CSG_DVS_BUF_LO 0x0060 /* () Normal mode deferred vertex shading work buffer, low word */
+#define CSG_DVS_BUF_HI 0x0064 /* () Normal mode deferred vertex shading work buffer, high word */
/* CSG_OUTPUT_BLOCK register offsets */
#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
@@ -227,24 +221,43 @@
#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
-#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
-#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
-#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
-#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
+#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */
+#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */
+#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */
+#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */
+
+/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */
+#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */
+#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */
+#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */
+#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */
+
#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
/* GLB_OUTPUT_BLOCK register offsets */
+#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */
+#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */
+#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */
+#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */
+
#define GLB_ACK 0x0000 /* () Global acknowledge */
#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
-#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
+#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
-/* USER register offsets */
-#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+#ifdef CONFIG_MALI_CORESIGHT
+#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4
+#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT)
+#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5
+#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT)
+#define GLB_DEBUG_ARG_IN0 0x0FE0
+#define GLB_DEBUG_ARG_IN1 0x0FE4
+#define GLB_DEBUG_ARG_OUT0 0x0FE0
+#endif /* CONFIG_MALI_CORESIGHT */
/* End register offsets */
@@ -302,10 +315,17 @@
#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
- (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+ (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
(((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \
(((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
+#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12
+#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
+#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \
+ (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
+#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \
+ (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \
+ (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK))
#define CS_REQ_TILER_OOM_SHIFT 26
#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
@@ -564,6 +584,7 @@
/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5
/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28
#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
@@ -696,6 +717,27 @@
#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB
/* End of CS_FAULT_EXCEPTION_TYPE values */
#define CS_FAULT_EXCEPTION_DATA_SHIFT 8
#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
@@ -849,11 +891,6 @@
#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT)
#define CSG_REQ_IDLE_SET(reg_val, value) \
(((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK))
-#define CSG_REQ_DOORBELL_SHIFT 30
-#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT)
-#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT)
-#define CSG_REQ_DOORBELL_SET(reg_val, value) \
- (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK))
#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31
#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \
@@ -970,6 +1007,21 @@
(((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \
(((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
+/* CSG_DVS_BUF_BUFFER register */
+#define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0)
+#define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT)
+#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT)
+#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \
+ (((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \
+ (((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK))
+#define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12)
+#define CSG_DVS_BUF_BUFFER_POINTER_MASK \
+ (GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT)
+#define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \
+ (((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT)
+#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \
+ (((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \
+ (((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK))
/* End of CSG_INPUT_BLOCK register set definitions */
@@ -1584,4 +1636,43 @@
((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \
GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK))
+/* GLB_DEBUG_REQ register */
+#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23)
+#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \
+ (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \
+ (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \
+ (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK))
+
+#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24)
+#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT)
+#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \
+ (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT)
+#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \
+ (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \
+ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK))
+
+/* GLB_DEBUG_ACK register */
+#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23)
+#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \
+ (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \
+ (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \
+ (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK))
+
+#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24)
+#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT)
+#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \
+ (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT)
+#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \
+ (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \
+ (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK))
+
+/* RUN_MODE values */
+#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0
+#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1
+/* End of RUN_MODE values */
+
#endif /* _KBASE_CSF_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index b12a3b4..d076f3d 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -21,7 +21,7 @@
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@@ -339,7 +339,6 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
"The flush has completed so reset the active indicator\n");
kbdev->irq_reset_flush = false;
- rt_mutex_lock(&kbdev->pm.lock);
if (!silent)
dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
RESET_TIMEOUT);
@@ -364,6 +363,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
*/
kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface);
+ rt_mutex_lock(&kbdev->pm.lock);
/* Reset the GPU */
err = kbase_pm_init_hw(kbdev, 0);
@@ -608,6 +608,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
return kbase_csf_reset_state_is_active(reset_state);
}
+bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
+{
+ return atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING;
+}
+
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
{
const long wait_timeout =
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index e472dba..f46ea5a 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -34,6 +34,8 @@
#include <mali_kbase_hwaccess_time.h>
#include <trace/events/power.h>
#include "mali_kbase_csf_tiler_heap.h"
+#include "mali_kbase_csf_tiler_heap_reclaim.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -56,50 +58,14 @@
/* CSG_REQ:STATUS_UPDATE timeout */
#define CSG_STATUS_UPDATE_REQ_TIMEOUT_MS (250) /* 250 milliseconds */
-/*
- * CSF scheduler time threshold for converting "tock" requests into "tick" if
- * they come too close to the end of a tick interval. This avoids scheduling
- * twice in a row.
- */
-#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \
- CSF_SCHEDULER_TIME_TICK_MS
-
-#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \
- msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS)
-
-/* Nanoseconds per millisecond */
-#define NS_PER_MS ((u64)1000 * 1000)
-
-/*
- * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock"
- * requests are not serviced immediately, but shall wait for a minimum time in
- * order to reduce load on the CSF scheduler thread.
- */
-#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
-
-/* CS suspended and is idle (empty ring buffer) */
-#define CS_IDLE_FLAG (1 << 0)
-
-/* CS suspended and is wait for a CQS condition */
-#define CS_WAIT_SYNC_FLAG (1 << 1)
-
/* A GPU address space slot is reserved for MCU. */
#define NUM_RESERVED_AS_SLOTS (1)
-/* Heap deferral time in ms from a CSG suspend to be included in reclaim scan list. The
- * value corresponds to realtime priority CSGs. Other priorites are of derived time value
- * from this, with the realtime case the highest delay.
- */
-#define HEAP_RECLAIM_PRIO_DEFERRAL_MS (1000)
-
-/* Additional heap deferral time in ms if a CSG suspended is in state of WAIT_SYNC */
-#define HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS (200)
+/* Time to wait for completion of PING req before considering MCU as hung */
+#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
-/* Tiler heap reclaim count size for limiting a count run length */
-#define HEAP_RECLAIM_COUNT_BATCH_SIZE (HEAP_SHRINKER_BATCH << 6)
-
-/* Tiler heap reclaim scan (free) method size for limiting a scan run length */
-#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7)
+/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
+#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
@@ -118,7 +84,10 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev,
static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
bool system_suspend);
static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
static bool evaluate_sync_update(struct kbase_queue *queue);
+#endif
+static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
@@ -129,8 +98,11 @@ void turn_on_sc_power_rails(struct kbase_device *kbdev)
WARN_ON(kbdev->csf.scheduler.state == SCHED_SUSPENDED);
- kbase_pm_turn_on_sc_power_rails(kbdev);
- kbdev->csf.scheduler.sc_power_rails_off = false;
+ if (kbdev->csf.scheduler.sc_power_rails_off) {
+ if (kbdev->pm.backend.callback_power_on_sc_rails)
+ kbdev->pm.backend.callback_power_on_sc_rails(kbdev);
+ kbdev->csf.scheduler.sc_power_rails_off = false;
+ }
}
/**
@@ -146,8 +118,11 @@ static void turn_off_sc_power_rails(struct kbase_device *kbdev)
WARN_ON(kbdev->csf.scheduler.state == SCHED_SUSPENDED);
- kbase_pm_turn_off_sc_power_rails(kbdev);
- kbdev->csf.scheduler.sc_power_rails_off = true;
+ if (!kbdev->csf.scheduler.sc_power_rails_off) {
+ if (kbdev->pm.backend.callback_power_off_sc_rails)
+ kbdev->pm.backend.callback_power_off_sc_rails(kbdev);
+ kbdev->csf.scheduler.sc_power_rails_off = true;
+ }
}
/**
@@ -227,6 +202,101 @@ static bool queue_empty_or_blocked(struct kbase_queue *queue)
}
#endif
+/**
+ * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
+ * scheduling tick/tock to complete before the group deschedule.
+ *
+ * @group: Pointer to the group that is being descheduled.
+ *
+ * This function blocks the descheduling of the group until the dump on fault is
+ * completed and scheduling tick/tock has completed.
+ * To deschedule an on slot group CSG termination request would be sent and that
+ * might time out if the fault had occurred and also potentially affect the state
+ * being dumped. Moreover the scheduler lock would be held, so the access to debugfs
+ * files would get blocked.
+ * Scheduler lock and 'kctx->csf.lock' are released before this function starts
+ * to wait. When a request sent by the Scheduler to the FW times out, Scheduler
+ * would also wait for the dumping to complete and release the Scheduler lock
+ * before the wait. Meanwhile Userspace can try to delete the group, this function
+ * would ensure that the group doesn't exit the Scheduler until scheduling
+ * tick/tock has completed. Though very unlikely, group deschedule can be triggered
+ * from multiple threads around the same time and after the wait Userspace thread
+ * can win the race and get the group descheduled and free the memory for group
+ * pointer before the other threads wake up and notice that group has already been
+ * descheduled. To avoid the freeing in such a case, a sort of refcount is used
+ * for the group which is incremented & decremented across the wait.
+ */
+static
+void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_device *kbdev = group->kctx->kbdev;
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&kctx->csf.lock);
+ lockdep_assert_held(&scheduler->lock);
+
+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
+ return;
+
+ while ((!kbase_debug_csf_fault_dump_complete(kbdev) ||
+ (scheduler->state == SCHED_BUSY)) &&
+ queue_group_scheduled_locked(group)) {
+ group->deschedule_deferred_cnt++;
+ mutex_unlock(&scheduler->lock);
+ rt_mutex_unlock(&kctx->csf.lock);
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ rt_mutex_lock(&kctx->csf.lock);
+ mutex_lock(&scheduler->lock);
+ group->deschedule_deferred_cnt--;
+ }
+#endif
+}
+
+/**
+ * schedule_actions_trigger_df() - Notify the client about the fault and
+ * wait for the dumping to complete.
+ *
+ * @kbdev: Pointer to the device
+ * @kctx: Pointer to the context associated with the CSG slot for which
+ * the timeout was seen.
+ * @error: Error code indicating the type of timeout that occurred.
+ *
+ * This function notifies the Userspace client waiting for the faults and wait
+ * for the Client to complete the dumping.
+ * The function is called only from Scheduling tick/tock when a request sent by
+ * the Scheduler to FW times out or from the protm event work item of the group
+ * when the protected mode entry request times out.
+ * In the latter case there is no wait done as scheduler lock would be released
+ * immediately. In the former case the function waits and releases the scheduler
+ * lock before the wait. It has been ensured that the Scheduler view of the groups
+ * won't change meanwhile, so no group can enter/exit the Scheduler, become
+ * runnable or go off slot.
+ */
+static void schedule_actions_trigger_df(struct kbase_device *kbdev,
+ struct kbase_context *kctx, enum dumpfault_error_type error)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
+ return;
+
+ if (unlikely(scheduler->state != SCHED_BUSY)) {
+ WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
+ return;
+ }
+
+ mutex_unlock(&scheduler->lock);
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ mutex_lock(&scheduler->lock);
+ WARN_ON(scheduler->state != SCHED_BUSY);
+#endif
+}
+
#ifdef KBASE_PM_RUNTIME
/**
* wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
@@ -334,6 +404,7 @@ static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
}
scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
return 0;
@@ -581,17 +652,18 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
#endif
static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler,
- unsigned long delay_ms)
+ unsigned long delay)
{
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
lockdep_assert_held(&scheduler->lock);
scheduler->gpu_idle_work_pending = true;
+ mod_delayed_work(system_highpri_wq, &scheduler->gpu_idle_work, delay);
#else
+ CSTD_UNUSED(delay);
atomic_set(&scheduler->gpu_no_longer_idle, false);
+ queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
#endif
-
- mod_delayed_work(system_highpri_wq, &scheduler->gpu_idle_work, msecs_to_jiffies(delay_ms));
}
bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
@@ -601,6 +673,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
bool can_suspend_on_idle;
bool ack_gpu_idle_event = true;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
lockdep_assert_held(&scheduler->interrupt_lock);
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
@@ -620,12 +693,23 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
}
#else
if (can_suspend_on_idle) {
+ /* fast_gpu_idle_handling is protected by the
+ * interrupt_lock, which would prevent this from being
+ * updated whilst gpu_idle_worker() is executing.
+ */
+ scheduler->fast_gpu_idle_handling =
+ (kbdev->csf.gpu_idle_hysteresis_us == 0) ||
+ !kbase_csf_scheduler_all_csgs_idle(kbdev);
+
/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
* finished. It's queued before to reduce the time it takes till execution
* but it'll eventually be blocked by the scheduler->interrupt_lock.
*/
enqueue_gpu_idle_work(scheduler, 0);
- update_on_slot_queues_offsets(kbdev);
+
+ /* The extract offsets are unused in fast GPU idle handling */
+ if (!scheduler->fast_gpu_idle_handling)
+ update_on_slot_queues_offsets(kbdev);
}
#endif
} else {
@@ -746,10 +830,14 @@ static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
remaining = wait_event_timeout(kbdev->csf.event_wait,
!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
- if (!remaining) {
+ if (unlikely(!remaining)) {
+ struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp;
+ struct kbase_context *kctx = group ? group->kctx : NULL;
+
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms);
+ schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
success = false;
}
@@ -870,7 +958,8 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
* Scheduler
*
* @kbdev: Pointer to the device
- * @flags: flags containing previous interrupt state
+ * @flags: Pointer to the flags variable containing the interrupt state
+ * when hwaccess lock was acquired.
*
* This function is called when Scheduler needs to be activated from the
* sleeping state.
@@ -878,14 +967,14 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
* MCU is initiated. It resets the flag that indicates to the MCU state
* machine that MCU needs to be put in sleep state.
*
- * Note: This function shall be called with hwaccess lock held and it will
- * release that lock.
+ * Note: This function shall be called with hwaccess lock held and it may
+ * release that lock and reacquire it.
*
* Return: zero when the PM reference was taken and non-zero when the
* system is being suspending/suspended.
*/
static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
- unsigned long flags)
+ unsigned long *flags)
{
u32 prev_count;
int ret = 0;
@@ -896,20 +985,20 @@ static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
prev_count = kbdev->csf.scheduler.pm_active_count;
if (!WARN_ON(prev_count == U32_MAX))
kbdev->csf.scheduler.pm_active_count++;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* On 0 => 1, make a pm_ctx_active request */
if (!prev_count) {
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags);
+
ret = kbase_pm_context_active_handle_suspend(kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, *flags);
if (ret)
kbdev->csf.scheduler.pm_active_count--;
else
kbdev->pm.backend.gpu_sleep_mode_active = false;
kbase_pm_update_state(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
return ret;
@@ -989,6 +1078,7 @@ static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
}
#endif
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -1051,6 +1141,7 @@ static void update_gpu_idle_timer_on_scheduler_wakeup(struct kbase_device *kbdev
return;
}
+#endif
static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
{
@@ -1076,8 +1167,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
"Re-activating the Scheduler out of sleep");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- ret = scheduler_pm_active_after_sleep(kbdev, flags);
- /* hwaccess_lock is released in the previous function call. */
+ ret = scheduler_pm_active_after_sleep(kbdev, &flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif
}
@@ -1090,9 +1181,12 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
return;
}
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
update_gpu_idle_timer_on_scheduler_wakeup(kbdev);
+#endif
scheduler->state = SCHED_INACTIVE;
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
if (kick)
scheduler_enable_tick_timer_nolock(kbdev);
@@ -1108,6 +1202,7 @@ static void scheduler_suspend(struct kbase_device *kbdev)
dev_dbg(kbdev->dev, "Suspending the Scheduler");
scheduler_pm_idle(kbdev);
scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
}
}
@@ -1138,6 +1233,8 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group)
KBASE_CSF_GROUP_SUSPENDED);
} else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
+ group->run_state);
/* If scheduler is not suspended and the given group's
* static priority (reflected by the scan_seq_num) is inside
@@ -1304,10 +1401,11 @@ static int halt_stream_sync(struct kbase_queue *queue)
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
+
/* Timed wait */
remaining = wait_event_timeout(kbdev->csf.event_wait,
(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
@@ -1584,9 +1682,11 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
err = sched_halt_stream(queue);
unassign_user_doorbell_from_queue(kbdev, queue);
+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
}
mutex_unlock(&kbdev->csf.scheduler.lock);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state);
return err;
}
@@ -1664,9 +1764,9 @@ static void program_cs(struct kbase_device *kbdev,
struct kbase_csf_cmd_stream_group_info *ginfo;
struct kbase_csf_cmd_stream_info *stream;
int csi_index = queue->csi_index;
+ unsigned long flags;
u64 user_input;
u64 user_output;
- unsigned long flags;
if (WARN_ON(!group))
return;
@@ -1682,11 +1782,13 @@ static void program_cs(struct kbase_device *kbdev,
WARN_ON(csi_index >= ginfo->stream_num))
return;
- assign_user_doorbell_to_queue(kbdev, queue);
- if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
- return;
+ if (queue->enabled) {
+ assign_user_doorbell_to_queue(kbdev, queue);
+ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
+ return;
- WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+ }
if (queue->enabled && queue_group_suspended_locked(group))
program_cs_extract_init(queue);
@@ -1700,17 +1802,15 @@ static void program_cs(struct kbase_device *kbdev,
kbase_csf_firmware_cs_input(stream, CS_SIZE,
queue->size);
- user_input = (queue->reg->start_pfn << PAGE_SHIFT);
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
- user_input & 0xFFFFFFFF);
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
- user_input >> 32);
+ user_input = queue->user_io_gpu_va;
+ WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
- user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
- user_output & 0xFFFFFFFF);
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
- user_output >> 32);
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
+
+ user_output = user_input + PAGE_SIZE;
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
kbase_csf_firmware_cs_input(stream, CS_CONFIG,
(queue->doorbell_nr << 8) | (queue->priority & 0xF));
@@ -1721,16 +1821,31 @@ static void program_cs(struct kbase_device *kbdev,
/* Enable all interrupts for now */
kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+
+ /* The fault bit could be misaligned between CS_REQ and CS_ACK if the
+ * acknowledgment was deferred due to dump on fault and the group was
+ * removed from the CSG slot before the fault could be acknowledged.
+ */
+ if (queue->enabled) {
+ u32 const cs_ack =
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ }
+
/*
* Enable the CSG idle notification once the CS's ringbuffer
* becomes empty or the CS becomes sync_idle, waiting sync update
* or protected mode switch.
*/
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK,
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK);
- spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
/* Set state to START/STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
@@ -1744,6 +1859,7 @@ static void program_cs(struct kbase_device *kbdev,
update_hw_active(queue, true);
}
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
static void start_stream_sync(struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
@@ -1787,6 +1903,21 @@ static void start_stream_sync(struct kbase_queue *queue)
kbase_reset_gpu(kbdev);
}
}
+#endif
+
+static int onslot_csg_add_new_queue(struct kbase_queue *queue)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
+ if (!err)
+ program_cs(kbdev, queue, true);
+
+ return err;
+}
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
{
@@ -1795,7 +1926,6 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
bool const cs_enabled = queue->enabled;
int err = 0;
bool evicted = false;
- unsigned long flags;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&queue->kctx->csf.lock);
@@ -1805,6 +1935,13 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
mutex_lock(&kbdev->csf.scheduler.lock);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) {
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+ return -EBUSY;
+ }
+#endif
+
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
group->run_state);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
@@ -1839,6 +1976,17 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
turn_on_sc_power_rails(kbdev);
#endif
if (cs_enabled) {
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock,
+ flags);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev,
+ queue->csi_index, group->csg_nr,
+ true);
+ spin_unlock_irqrestore(
+ &kbdev->csf.scheduler.interrupt_lock, flags);
+ } else {
+ start_stream_sync(queue);
+#else
/* In normal situation, when a queue is
* already running, the queue update
* would be a doorbell kick on user
@@ -1849,15 +1997,31 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
* kernel as the hw_active flag is yet
* to be set. The sheduler needs to
* give a kick to the corresponding
- * CSI door-bell on such a case.
+ * user door-bell on such a case.
*/
- spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
- kbase_csf_ring_cs_kernel_doorbell(kbdev,
- queue->csi_index, group->csg_nr,
- true);
- spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
+ kbase_csf_ring_cs_user_doorbell(kbdev, queue);
} else {
- start_stream_sync(queue);
+ err = onslot_csg_add_new_queue(queue);
+ /* For an on slot CSG, the only error in adding a new
+ * queue to run is that the scheduler could not map
+ * the required userio pages due to likely some resource
+ * issues. In such a case, and if the group is yet
+ * to enter its fatal error state, we return a -EBUSY
+ * to the submitter for another kick. The queue itself
+ * has yet to be programmed hence needs to remain its
+ * previous (disabled) state. If the error persists,
+ * the group will eventually reports a fatal error by
+ * the group's error reporting mechanism, when the MCU
+ * shared region map retry limit of the group is
+ * exceeded. For such a case, the expected error value
+ * is -EIO.
+ */
+ if (unlikely(err)) {
+ queue->enabled = cs_enabled;
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+ return (err != -EIO) ? -EBUSY : err;
+ }
+#endif
}
}
queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
@@ -2005,6 +2169,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
unsigned long flags;
struct kbase_csf_cmd_stream_group_info *ginfo =
&global_iface->groups[slot];
+
u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
CSG_REQ_STATE_TERMINATE;
@@ -2022,8 +2187,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
csg_slot[slot].trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
- KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
- kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
}
}
@@ -2037,6 +2202,31 @@ static void suspend_csg_slot(struct kbase_queue_group *group)
halt_csg_slot(group, true);
}
+static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev)
+{
+ const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version);
+ const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version);
+
+ switch (glb_major) {
+ case 0:
+ break;
+ case 1:
+ if (glb_minor >= 4)
+ return true;
+ break;
+ case 2:
+ if (glb_minor >= 6)
+ return true;
+ break;
+ case 3:
+ if (glb_minor >= 6)
+ return true;
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
/**
* evaluate_sync_update() - Evaluate the sync wait condition the GPU command
* queue has been blocked on.
@@ -2056,11 +2246,13 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
u32 sync_current_val;
struct kbase_device *kbdev;
bool sync_wait_align_valid = false;
+ bool sync_wait_cond_valid = false;
if (WARN_ON(!queue))
return false;
kbdev = queue->kctx->kbdev;
+
lockdep_assert_held(&kbdev->csf.scheduler.lock);
sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
@@ -2089,9 +2281,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
sync_wait_cond =
CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
+ sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) ||
+ (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) ||
+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
+ csf_wait_ge_condition_supported(kbdev));
- WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
- (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
+ WARN_ON(!sync_wait_cond_valid);
sync_current_val = READ_ONCE(*sync_ptr);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
@@ -2102,6 +2297,8 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
(sync_current_val > queue->sync_value)) ||
+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
+ (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) ||
((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
(sync_current_val <= queue->sync_value))) {
/* The sync wait condition is satisfied so the group to which
@@ -2152,7 +2349,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
queue, status);
- if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
+ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
queue->status_wait = status;
queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_POINTER_LO);
@@ -2168,7 +2365,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
kbase_csf_firmware_cs_output(stream,
CS_STATUS_BLOCKED_REASON));
- if (!evaluate_sync_update(queue)) {
+ if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
+ !evaluate_sync_update(queue)) {
is_waiting = true;
} else {
/* Sync object already got updated & met the condition
@@ -2211,6 +2409,44 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
}
}
+static void ktrace_log_group_state(struct kbase_queue_group *const group)
+{
+ switch (group->run_state) {
+ case KBASE_CSF_GROUP_INACTIVE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_RUNNABLE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_IDLE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_SUSPENDED:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC,
+ group, group->run_state);
+ break;
+ case KBASE_CSF_GROUP_FAULT_EVICTED:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_TERMINATED:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group,
+ group->run_state);
+ break;
+ }
+}
+
static
void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *const group,
@@ -2228,6 +2464,8 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
group->run_state = run_state;
+ ktrace_log_group_state(group);
+
if (run_state == KBASE_CSF_GROUP_RUNNABLE)
group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
@@ -2279,6 +2517,9 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
WARN_ON(!queue_group_scheduled_locked(group));
group->run_state = run_state;
+
+ ktrace_log_group_state(group);
+
list_del_init(&group->link);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
@@ -2377,6 +2618,8 @@ static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
kctx->csf.sched.num_idle_wait_grps);
group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group,
+ group->run_state);
dev_dbg(kctx->kbdev->dev,
"Group-%d suspended on sync_wait, total wait_groups: %u\n",
group->handle, kctx->csf.sched.num_idle_wait_grps);
@@ -2402,6 +2645,7 @@ static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
NULL;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state);
}
static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
@@ -2416,7 +2660,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
insert_group_to_idle_wait(group);
}
-static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
+static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -2518,145 +2762,6 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
return cs_idle;
}
-static void detach_from_sched_reclaim_mgr(struct kbase_context *kctx)
-{
- struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
- struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
-
- lockdep_assert_held(&scheduler->lock);
-
- if (!list_empty(&heap_info->mgr_link)) {
- WARN_ON(!heap_info->flags);
- list_del_init(&heap_info->mgr_link);
-
- if (heap_info->flags & CSF_CTX_RECLAIM_CANDI_FLAG)
- WARN_ON(atomic_sub_return(heap_info->nr_est_pages,
- &scheduler->reclaim_mgr.est_cand_pages) < 0);
- if (heap_info->flags & CSF_CTX_RECLAIM_SCAN_FLAG)
- WARN_ON(atomic_sub_return(heap_info->nr_scan_pages,
- &scheduler->reclaim_mgr.mgr_scan_pages) < 0);
-
- dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_detach: ctx_%d_%d, flags = 0x%x\n",
- kctx->tgid, kctx->id, heap_info->flags);
- /* Clear on detaching */
- heap_info->nr_est_pages = 0;
- heap_info->nr_scan_pages = 0;
- heap_info->flags = 0;
- }
-}
-
-static void attach_to_sched_reclaim_mgr(struct kbase_context *kctx)
-{
- struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info;
- struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
-
- lockdep_assert_held(&scheduler->lock);
-
- if (WARN_ON(!list_empty(&heap_info->mgr_link)))
- list_del_init(&heap_info->mgr_link);
-
- list_add_tail(&heap_info->mgr_link, &scheduler->reclaim_mgr.candidate_ctxs);
-
- /* Read the kctx's tiler heap estimate of pages, this separates it away
- * from the kctx's tiler heap side updates/changes. The value remains static
- * for the duration of this kctx on the reclaim manager's candidate_ctxs list.
- */
- heap_info->nr_est_pages = (u32)atomic_read(&kctx->csf.tiler_heaps.est_count_pages);
- atomic_add(heap_info->nr_est_pages, &scheduler->reclaim_mgr.est_cand_pages);
-
- heap_info->attach_jiffies = jiffies;
- heap_info->flags = CSF_CTX_RECLAIM_CANDI_FLAG;
-
- dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages = %u\n",
- kctx->tgid, kctx->id, heap_info->nr_est_pages);
-}
-
-static void update_kctx_heap_info_on_grp_on_slot(struct kbase_queue_group *group)
-{
- struct kbase_context *kctx = group->kctx;
- struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
-
- lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
-
- heap_info->on_slot_grps++;
- /* If the kctx transitioned on-slot CSGs: 0 => 1, detach the kctx scheduler->reclaim_mgr */
- if (heap_info->on_slot_grps == 1) {
- dev_dbg(kctx->kbdev->dev,
- "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager\n",
- group->kctx->tgid, group->kctx->id, group->handle);
-
- detach_from_sched_reclaim_mgr(kctx);
- }
-}
-
-static void update_kctx_heap_info_on_grp_evict(struct kbase_queue_group *group)
-{
- struct kbase_context *kctx = group->kctx;
- struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info;
- struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
- const u32 num_groups = kctx->kbdev->csf.global_iface.group_num;
- u32 on_slot_grps = 0;
- u32 i;
-
- lockdep_assert_held(&scheduler->lock);
-
- /* Group eviction from the scheduler is a bit more complex, but fairly less
- * frequent in operations. Taking the opportunity to actually count the
- * on-slot CSGs from the given kctx, for robustness and clearer code logic.
- */
- for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
- struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
- struct kbase_queue_group *grp = csg_slot->resident_group;
-
- if (unlikely(!grp))
- continue;
-
- if (grp->kctx == kctx)
- on_slot_grps++;
- }
-
- heap_info->on_slot_grps = on_slot_grps;
-
- /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */
- if (!heap_info->on_slot_grps) {
- if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) {
- /* The kctx has other operational CSGs, attach it if not yet done */
- if (list_empty(&heap_info->mgr_link)) {
- dev_dbg(kctx->kbdev->dev,
- "CSG_%d_%d_%d evict, add kctx to reclaim manager\n",
- group->kctx->tgid, group->kctx->id, group->handle);
-
- attach_to_sched_reclaim_mgr(kctx);
- }
- } else {
- /* The kctx is a zombie after the group eviction, drop it out */
- dev_dbg(kctx->kbdev->dev,
- "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager\n",
- group->kctx->tgid, group->kctx->id, group->handle);
-
- detach_from_sched_reclaim_mgr(kctx);
- }
- }
-}
-
-static void update_kctx_heap_info_on_grp_suspend(struct kbase_queue_group *group)
-{
- struct kbase_context *kctx = group->kctx;
- struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
-
- lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
-
- if (!WARN_ON(heap_info->on_slot_grps == 0))
- heap_info->on_slot_grps--;
- /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */
- if (heap_info->on_slot_grps == 0) {
- dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager\n",
- group->kctx->tgid, group->kctx->id, group->handle);
-
- attach_to_sched_reclaim_mgr(kctx);
- }
-}
-
static void save_csg_slot(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
@@ -2692,9 +2797,14 @@ static void save_csg_slot(struct kbase_queue_group *group)
if (!queue || !queue->enabled)
continue;
- if (save_slot_cs(ginfo, queue))
- sync_wait = true;
- else {
+ if (save_slot_cs(ginfo, queue)) {
+ /* sync_wait is only true if the queue is blocked on
+ * a CQS and not a scoreboard.
+ */
+ if (queue->blocked_reason !=
+ CS_STATUS_BLOCKED_ON_SB_WAIT)
+ sync_wait = true;
+ } else {
/* Need to confirm if ringbuffer of the GPU
* queue is empty or not. A race can arise
* between the flush of GPU queue and suspend
@@ -2719,15 +2829,19 @@ static void save_csg_slot(struct kbase_queue_group *group)
else {
group->run_state =
KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
+ group->run_state);
dev_dbg(kbdev->dev, "Group-%d suspended: idle",
group->handle);
}
} else {
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group,
+ group->run_state);
}
update_offslot_non_idle_cnt_on_grp_suspend(group);
- update_kctx_heap_info_on_grp_suspend(group);
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group);
}
}
@@ -2807,6 +2921,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
kbdev->gpu_props.props.raw_props.gpu_id, slot);
+ /* Notify the group is off-slot and the csg_reg might be available for
+ * resue with other groups in a 'lazy unbinding' style.
+ */
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+
return as_fault;
}
@@ -2890,8 +3009,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
u32 state;
int i;
unsigned long flags;
- const u64 normal_suspend_buf =
- group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
+ u64 normal_suspend_buf;
+ u64 protm_suspend_buf;
struct kbase_csf_csg_slot *csg_slot =
&kbdev->csf.scheduler.csg_slots[slot];
@@ -2903,6 +3022,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
+ if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
+ dev_warn(kbdev->dev,
+ "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
+ group->handle, group->kctx->tgid, kctx->id, slot);
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+ return;
+ }
+
+ /* The suspend buf has already been mapped through binding to csg_reg */
+ normal_suspend_buf = group->normal_suspend_buf.gpu_va;
+ protm_suspend_buf = group->protected_suspend_buf.gpu_va;
+ WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
+
ginfo = &global_iface->groups[slot];
/* Pick an available address space for this context */
@@ -2915,6 +3047,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
group->handle, kctx->tgid, kctx->id, slot);
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
return;
}
@@ -2966,16 +3099,22 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
normal_suspend_buf >> 32);
- if (group->protected_suspend_buf.reg) {
- const u64 protm_suspend_buf =
- group->protected_suspend_buf.reg->start_pfn <<
- PAGE_SHIFT;
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
- protm_suspend_buf & U32_MAX);
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
- protm_suspend_buf >> 32);
- }
+ /* Note, we program the P-mode buffer pointer here, but actual runtime
+ * enter into pmode execution is controlled by the P-mode phy pages are
+ * allocated and mapped with the bound csg_reg, which has a specific flag
+ * for indicating this P-mode runnable condition before a group is
+ * granted its p-mode section entry. Without a P-mode entry, the buffer
+ * pointed is not going to be accessed at all.
+ */
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
+ if (group->dvs_buf) {
+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
+ group->dvs_buf & U32_MAX);
+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI,
+ group->dvs_buf >> 32);
+ }
/* Enable all interrupts for now */
kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
@@ -3018,10 +3157,13 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
kbasep_platform_event_work_begin(group);
/* Update the heap reclaim manager */
- update_kctx_heap_info_on_grp_on_slot(group);
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group);
/* Programming a slot consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
+
+ /* Notify the group's bound csg_reg is now in active use */
+ kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
}
static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -3042,7 +3184,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev,
}
static void sched_evict_group(struct kbase_queue_group *group, bool fault,
- bool update_non_idle_offslot_grps_cnt)
+ bool update_non_idle_offslot_grps_cnt_from_run_state)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
@@ -3053,7 +3195,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
if (queue_group_scheduled_locked(group)) {
u32 i;
- if (update_non_idle_offslot_grps_cnt &&
+ if (update_non_idle_offslot_grps_cnt_from_run_state &&
(group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
int new_val = atomic_dec_return(
@@ -3068,8 +3210,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
}
if (group->prepared_seq_num !=
- KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
+ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
+ if (!update_non_idle_offslot_grps_cnt_from_run_state)
+ update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
+ }
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
remove_group_from_idle_wait(group);
@@ -3080,8 +3225,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
- if (fault)
+ if (fault) {
group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group,
+ scheduler->total_runnable_grps);
+ }
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
(((u64)scheduler->total_runnable_grps) << 32) |
@@ -3092,7 +3240,10 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
wake_up_all(&kbdev->csf.event_wait);
}
- update_kctx_heap_info_on_grp_evict(group);
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
+
+ /* Clear all the bound shared regions and unmap any in-place MMU maps */
+ kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
}
static int term_group_sync(struct kbase_queue_group *group)
@@ -3107,7 +3258,8 @@ static int term_group_sync(struct kbase_queue_group *group)
group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
remaining);
- if (!remaining) {
+ if (unlikely(!remaining)) {
+ enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT;
const struct gpu_uevent evt = {
.type = GPU_UEVENT_TYPE_KMD_ERROR,
.info = GPU_UEVENT_INFO_GROUP_TERM
@@ -3117,6 +3269,9 @@ static int term_group_sync(struct kbase_queue_group *group)
kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
@@ -3139,6 +3294,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
mutex_lock(&scheduler->lock);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
+ wait_for_dump_complete_on_group_deschedule(group);
if (!queue_group_scheduled_locked(group))
goto unlock;
@@ -3228,6 +3384,8 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
group));
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
/* A normal mode CSG could be idle onslot during
* protected mode. In this case clear the
@@ -3469,8 +3627,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
scheduler->remaining_tick_slots--;
}
} else {
- update_offslot_non_idle_cnt_for_faulty_grp(
- group);
+ update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
}
}
@@ -3582,7 +3739,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
csg_slot_stopped_raw),
remaining);
- if (remaining) {
+ if (likely(remaining)) {
u32 i;
for_each_set_bit(i, changed, num_groups) {
@@ -3595,6 +3752,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
/* The on slot csg is now stopped */
clear_bit(i, slot_mask);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
if (likely(group)) {
bool as_fault;
/* Only do save/cleanup if the
@@ -3623,6 +3783,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
for_each_set_bit(i, slot_mask, num_groups) {
struct kbase_queue_group *const group =
scheduler->csg_slots[i].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
struct base_gpu_queue_group_error const
err_payload = { .error_type =
@@ -3636,10 +3797,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
if (unlikely(group == NULL))
continue;
- kbase_csf_add_group_fatal_error(group,
- &err_payload);
- kbase_event_wakeup_nosync(group->kctx);
-
/* TODO GPUCORE-25328: The CSG can't be
* terminated, the GPU will be reset as a
* work-around.
@@ -3654,14 +3811,19 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
group->handle, group->kctx->tgid,
group->kctx->id, i,
kbdev->csf.fw_timeout_ms);
+ if (kbase_csf_firmware_ping_wait(kbdev,
+ FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
+
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ kbase_event_wakeup_nosync(group->kctx);
/* The group has failed suspension, stop
* further examination.
*/
clear_bit(i, slot_mask);
set_bit(i, scheduler->csgs_events_enable_mask);
- update_offslot_non_idle_cnt_for_onslot_grp(
- group);
}
suspend_wait_failed = true;
@@ -3741,7 +3903,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
slots_state_changed(kbdev, changed, csg_slot_running),
remaining);
- if (remaining) {
+ if (likely(remaining)) {
for_each_set_bit(i, changed, num_groups) {
struct kbase_queue_group *group =
scheduler->csg_slots[i].resident_group;
@@ -3749,17 +3911,27 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
/* The on slot csg is now running */
clear_bit(i, slot_mask);
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
}
} else {
const struct gpu_uevent evt = {
.type = GPU_UEVENT_TYPE_KMD_ERROR,
.info = GPU_UEVENT_INFO_CSG_SLOTS_START
};
+ const int csg_nr = ffs(slot_mask[0]) - 1;
+ struct kbase_queue_group *group =
+ scheduler->csg_slots[csg_nr].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT;
+
pixel_gpu_uevent_send(kbdev, &evt);
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms,
num_groups, slot_mask);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
@@ -3876,11 +4048,10 @@ static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev,
slot_mask, dones),
remaining);
- if (remaining)
+ if (likely(remaining))
bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
else {
-
/* Timed-out on the wait */
return -ETIMEDOUT;
}
@@ -3899,7 +4070,11 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->csf.scheduler.lock);
- if (ret != 0) {
+ if (unlikely(ret != 0)) {
+ const int csg_nr = ffs(slot_mask[0]) - 1;
+ struct kbase_queue_group *group =
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT;
/* The update timeout is not regarded as a serious
* issue, no major consequences are expected as a
* result, so just warn the case.
@@ -3915,6 +4090,13 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms,
slot_mask[0]);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
+
+ /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
}
}
@@ -3980,6 +4162,7 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
kbase_event_wakeup_nosync(kctx);
mutex_unlock(&scheduler->lock);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups);
}
/**
@@ -4113,11 +4296,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
struct kbase_queue_group *const input_grp)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
unsigned long flags;
bool protm_in_use;
lockdep_assert_held(&scheduler->lock);
+ /* Return early if the physical pages have not been allocated yet */
+ if (unlikely(!sbuf->pma))
+ return;
+
/* This lock is taken to prevent the issuing of MMU command during the
* transition to protected mode. This helps avoid the scenario where the
* entry to protected mode happens with a memory region being locked and
@@ -4162,6 +4350,8 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
CSG_SLOT_RUNNING) {
if (kctx_as_enabled(input_grp->kctx) &&
scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
+ int err;
+
/* Option of acknowledging to multiple
* CSGs from the same kctx is dropped,
* after consulting with the
@@ -4174,15 +4364,28 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
0u);
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ /* Coresight must be disabled before entering protected mode. */
+ kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
kbase_csf_enter_protected_mode(kbdev);
/* Set the pending protm seq number to the next one */
protm_enter_set_next_pending_seq(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- kbase_csf_wait_protected_mode_enter(kbdev);
+ err = kbase_csf_wait_protected_mode_enter(kbdev);
mutex_unlock(&kbdev->mmu_hw_mutex);
+ if (err)
+ schedule_actions_trigger_df(kbdev, input_grp->kctx,
+ DF_PROTECTED_MODE_ENTRY_FAILURE);
+
scheduler->protm_enter_time = ktime_get_raw();
return;
@@ -4282,8 +4485,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
if (!kctx_as_enabled(group->kctx) || group->faulted) {
/* Drop the head group and continue */
- update_offslot_non_idle_cnt_for_faulty_grp(
- group);
+ update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
continue;
}
@@ -4562,6 +4764,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
set_bit(i, csg_bitmap);
} else {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
}
}
@@ -4580,6 +4784,9 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
.type = GPU_UEVENT_TYPE_KMD_ERROR,
.info = GPU_UEVENT_INFO_CSG_REQ_STATUS_UPDATE
};
+ const int csg_nr = ffs(csg_bitmap[0]) - 1;
+ struct kbase_queue_group *group =
+ scheduler->csg_slots[csg_nr].resident_group;
pixel_gpu_uevent_send(kbdev, &evt);
dev_warn(
@@ -4588,6 +4795,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
kbase_backend_get_cycle_cnt(kbdev),
CSG_STATUS_UPDATE_REQ_TIMEOUT_MS,
csg_bitmap[0]);
+ schedule_actions_trigger_df(kbdev, group->kctx,
+ DF_CSG_STATUS_UPDATE_TIMEOUT);
/* Store the bitmap of timed out slots */
bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
@@ -4663,17 +4872,21 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
if (group_on_slot_is_idle(kbdev, i)) {
group->run_state = KBASE_CSF_GROUP_IDLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
set_bit(i, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
group, scheduler->csg_slots_idle_mask[0]);
- } else
+ } else {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ }
}
bitmap_or(scheduler->csg_slots_idle_mask,
scheduler->csg_slots_idle_mask,
failed_csg_bitmap, num_groups);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL,
scheduler->csg_slots_idle_mask[0]);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
@@ -4759,7 +4972,12 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
int ret = suspend_active_queue_groups(kbdev, slot_mask);
- if (ret) {
+ if (unlikely(ret)) {
+ const int csg_nr = ffs(slot_mask[0]) - 1;
+ struct kbase_queue_group *group =
+ scheduler->csg_slots[csg_nr].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
+
/* The suspend of CSGs failed,
* trigger the GPU reset to be in a deterministic state.
*/
@@ -4772,6 +4990,9 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms,
kbdev->csf.global_iface.group_num, slot_mask);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
@@ -4857,6 +5078,21 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&scheduler->interrupt_lock);
+
+ if (scheduler->fast_gpu_idle_handling) {
+ scheduler->fast_gpu_idle_handling = false;
+
+ if (scheduler->total_runnable_grps) {
+ suspend = !atomic_read(&scheduler->non_idle_offslot_grps) &&
+ kbase_pm_idle_groups_sched_suspendable(kbdev);
+ } else
+ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
+ spin_unlock(&scheduler->interrupt_lock);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return suspend;
+ }
+
if (scheduler->total_runnable_grps) {
/* Check both on-slots and off-slots groups idle status */
@@ -4908,6 +5144,7 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
cancel_tick_timer(kbdev);
scheduler_pm_idle_before_sleep(kbdev);
scheduler->state = SCHED_SLEEPING;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
}
#endif
@@ -4949,8 +5186,13 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
static void gpu_idle_worker(struct work_struct *work)
{
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
struct kbase_device *kbdev = container_of(
work, struct kbase_device, csf.scheduler.gpu_idle_work.work);
+#else
+ struct kbase_device *kbdev = container_of(
+ work, struct kbase_device, csf.scheduler.gpu_idle_work);
+#endif
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
bool scheduler_is_idle_suspendable = false;
bool all_groups_suspended = false;
@@ -4966,6 +5208,7 @@ static void gpu_idle_worker(struct work_struct *work)
__ENCODE_KTRACE_INFO(true, false, false));
return;
}
+ kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
@@ -4975,6 +5218,14 @@ static void gpu_idle_worker(struct work_struct *work)
scheduler->gpu_idle_work_pending = false;
#endif
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(scheduler->state == SCHED_BUSY)) {
+ mutex_unlock(&scheduler->lock);
+ kbase_reset_gpu_allow(kbdev);
+ return;
+ }
+#endif
+
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
if (scheduler_is_idle_suspendable) {
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
@@ -4986,6 +5237,8 @@ static void gpu_idle_worker(struct work_struct *work)
else
#endif
all_groups_suspended = scheduler_suspend_on_idle(kbdev);
+
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u);
}
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
@@ -5405,14 +5658,173 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev)
struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
struct kbase_queue_group *group = csg_slot->resident_group;
- if (!queue_group_idle_locked(group))
+ if (!queue_group_idle_locked(group)) {
group->run_state = KBASE_CSF_GROUP_IDLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
+ }
}
/* Return the number of idle slots for potential replacement */
return bitmap_weight(csg_bitmap, num_groups);
}
+static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
+ unsigned int timeout_ms)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
+ u32 num_groups = kbdev->csf.global_iface.group_num;
+ int err = 0;
+ DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
+
+ lockdep_assert_held(&scheduler->lock);
+
+ bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
+
+ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
+ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
+
+ bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
+
+ remaining = wait_event_timeout(
+ kbdev->csf.event_wait,
+ slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
+
+ if (likely(remaining)) {
+ u32 i;
+
+ for_each_set_bit(i, changed, num_groups) {
+ struct kbase_queue_group *group;
+
+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
+ continue;
+
+ /* The on slot csg is now stopped */
+ clear_bit(i, slot_mask_local);
+
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
+ group = scheduler->csg_slots[i].resident_group;
+ if (likely(group)) {
+ /* Only do save/cleanup if the
+ * group is not terminated during
+ * the sleep.
+ */
+ save_csg_slot(group);
+ if (cleanup_csg_slot(group))
+ sched_evict_group(group, true, true);
+ }
+ }
+ } else {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx",
+ kbase_backend_get_cycle_cnt(kbdev), slot_mask[0],
+ slot_mask_local[0]);
+ /* Return the bitmask of the timed out slots to the caller */
+ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
+
+ err = -ETIMEDOUT;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG
+ *
+ * @kbdev: Pointer to the device
+ *
+ * Used to allow for speedier starting/resumption of another CSG. The worst-case
+ * scenario of the evicted CSG being scheduled next is expected to be rare.
+ * Also, the eviction will not be applied if the GPU is running in protected mode.
+ * Otherwise the the eviction attempt would force the MCU to quit the execution of
+ * the protected mode, and likely re-request to enter it again.
+ */
+static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ size_t i;
+ struct kbase_queue_group *lru_idle_group = NULL;
+ const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
+ const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >=
+ (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS));
+ u8 as_usage[BASE_MAX_NR_AS] = { 0 };
+
+ lockdep_assert_held(&scheduler->lock);
+ if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
+ return;
+
+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE));
+ if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots)
+ return; /* Some CSG slots remain unused */
+
+ if (all_addr_spaces_used) {
+ for (i = 0; i != total_csg_slots; ++i) {
+ if (scheduler->csg_slots[i].resident_group != NULL) {
+ if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
+ 0))
+ continue;
+
+ as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
+ }
+ }
+ }
+
+ for (i = 0; i != total_csg_slots; ++i) {
+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
+
+ /* We expect that by this point all groups would normally be
+ * assigned a physical CSG slot, but if circumstances have
+ * changed then bail out of this optimisation.
+ */
+ if (group == NULL)
+ return;
+
+ /* Real-time priority CSGs must be kept on-slot even when
+ * idle.
+ */
+ if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
+ (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
+ ((lru_idle_group == NULL) ||
+ (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
+ if (WARN_ON(group->kctx->as_nr < 0))
+ continue;
+
+ /* If all address spaces are used, we need to ensure the group does not
+ * share the AS with other active CSGs. Or CSG would be freed without AS
+ * and this optimization would not work.
+ */
+ if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1))
+ lru_idle_group = group;
+ }
+ }
+
+ if (lru_idle_group != NULL) {
+ unsigned long slot_mask = 1 << lru_idle_group->csg_nr;
+
+ dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d",
+ lru_idle_group->handle, lru_idle_group->kctx->tgid,
+ lru_idle_group->kctx->id, lru_idle_group->csg_nr);
+ suspend_queue_group(lru_idle_group);
+ if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
+
+ dev_warn(
+ kbdev->dev,
+ "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)",
+ kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle,
+ lru_idle_group->kctx->tgid, lru_idle_group->kctx->id,
+ lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type);
+ }
+ }
+}
+
static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
@@ -5502,16 +5914,12 @@ redo_local_tock:
* queue jobs.
*/
if (protm_grp && scheduler->top_grp == protm_grp) {
- int new_val;
-
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
protm_grp->handle);
- new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp,
- new_val);
-
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
+ remove_scheduled_group(kbdev, protm_grp);
scheduler_check_pmode_progress(kbdev);
} else if (scheduler->top_grp) {
if (protm_grp)
@@ -5569,6 +5977,8 @@ redo_local_tock:
} else {
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
+
+ evict_lru_or_blocked_csg(kbdev);
}
/**
@@ -5590,6 +6000,9 @@ static bool can_skip_scheduling(struct kbase_device *kbdev)
lockdep_assert_held(&scheduler->lock);
+ if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev)))
+ return true;
+
if (scheduler->state == SCHED_SUSPENDED)
return true;
@@ -5599,12 +6012,12 @@ static bool can_skip_scheduling(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbdev->pm.backend.exit_gpu_sleep_mode) {
- int ret = scheduler_pm_active_after_sleep(kbdev, flags);
- /* hwaccess_lock is released in the previous function
- * call.
- */
+ int ret = scheduler_pm_active_after_sleep(kbdev, &flags);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (!ret) {
scheduler->state = SCHED_INACTIVE;
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
return false;
}
@@ -5634,12 +6047,17 @@ static void schedule_on_tock(struct kthread_work *work)
if (err)
return;
+ kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
if (can_skip_scheduling(kbdev))
+ {
+ atomic_set(&scheduler->pending_tock_work, false);
goto exit_no_schedule_unlock;
+ }
WARN_ON(!(scheduler->state == SCHED_INACTIVE));
scheduler->state = SCHED_BUSY;
+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
/* Undertaking schedule action steps */
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
@@ -5650,6 +6068,7 @@ static void schedule_on_tock(struct kthread_work *work)
scheduler->last_schedule = jiffies;
scheduler->state = SCHED_INACTIVE;
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
if (!scheduler->total_runnable_grps)
enqueue_gpu_idle_work(scheduler, 0);
mutex_unlock(&scheduler->lock);
@@ -5679,12 +6098,14 @@ static void schedule_on_tick(struct kthread_work *work)
if (err)
return;
+ kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
if (can_skip_scheduling(kbdev))
goto exit_no_schedule_unlock;
scheduler->state = SCHED_BUSY;
+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
/* Undertaking schedule action steps */
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
@@ -5706,6 +6127,7 @@ static void schedule_on_tick(struct kthread_work *work)
scheduler->state = SCHED_INACTIVE;
mutex_unlock(&scheduler->lock);
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
kbase_reset_gpu_allow(kbdev);
dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
@@ -5719,80 +6141,6 @@ exit_no_schedule_unlock:
kbase_reset_gpu_allow(kbdev);
}
-static int wait_csg_slots_suspend(struct kbase_device *kbdev,
- const unsigned long *slot_mask,
- unsigned int timeout_ms)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
- u32 num_groups = kbdev->csf.global_iface.group_num;
- int err = 0;
- DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
-
- lockdep_assert_held(&scheduler->lock);
-
- bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
-
- while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)
- && remaining) {
- DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
-
- bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
-
- remaining = wait_event_timeout(kbdev->csf.event_wait,
- slots_state_changed(kbdev, changed,
- csg_slot_stopped_locked),
- remaining);
-
- if (remaining) {
- u32 i;
-
- for_each_set_bit(i, changed, num_groups) {
- struct kbase_queue_group *group;
-
- if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
- continue;
-
- /* The on slot csg is now stopped */
- clear_bit(i, slot_mask_local);
-
- group = scheduler->csg_slots[i].resident_group;
- if (likely(group)) {
- /* Only do save/cleanup if the
- * group is not terminated during
- * the sleep.
- */
- save_csg_slot(group);
- if (cleanup_csg_slot(group))
- sched_evict_group(group, true, true);
- }
- }
- } else {
- const struct gpu_uevent evt = {
- .type = GPU_UEVENT_TYPE_KMD_ERROR,
- .info = GPU_UEVENT_INFO_CSG_SLOTS_SUSPEND
- };
- pixel_gpu_uevent_send(kbdev, &evt);
- dev_warn(kbdev->dev, "[%llu] Timeout waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
- kbase_backend_get_cycle_cnt(kbdev),
- num_groups, slot_mask_local);
-
-
- err = -ETIMEDOUT;
- }
- }
-
- if (err == -ETIMEDOUT) {
- //TODO: should introduce SSCD report if this happens.
- kbase_gpu_timeout_debug_message(kbdev);
- dev_warn(kbdev->dev, "[%llu] Firmware ping %d",
- kbase_backend_get_cycle_cnt(kbdev),
- kbase_csf_firmware_ping_wait(kbdev));
- }
-
- return err;
-}
-
static int suspend_active_queue_groups(struct kbase_device *kbdev,
unsigned long *slot_mask)
{
@@ -5835,7 +6183,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
kbase_gpu_timeout_debug_message(kbdev);
dev_warn(kbdev->dev, "[%llu] Firmware ping %d",
kbase_backend_get_cycle_cnt(kbdev),
- kbase_csf_firmware_ping_wait(kbdev));
+ kbase_csf_firmware_ping_wait(kbdev, 0));
}
/* Need to flush the GPU cache to ensure suspend buffer
@@ -5960,6 +6308,7 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
cleanup_csg_slot(group);
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state);
/* Simply treat the normal mode groups as non-idle. The tick
* scheduled after the reset will re-initialize the counter
@@ -5994,7 +6343,11 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
/* Cancel any potential queued delayed work(s) */
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
cancel_delayed_work_sync(&scheduler->gpu_idle_work);
+#else
+ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
+#endif
cancel_tick_timer(kbdev);
cancel_tick_work(scheduler);
cancel_tock_work(scheduler);
@@ -6031,6 +6384,8 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
+ kbase_debug_csf_fault_wait_completion(kbdev);
+
if (scheduler_handle_reset_in_protected_mode(kbdev) &&
!suspend_active_queue_groups_on_reset(kbdev)) {
/* As all groups have been successfully evicted from the CSG
@@ -6067,6 +6422,8 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
mutex_unlock(&kbdev->kctx_list_lock);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u);
+
/* After queue groups reset, the scheduler data fields clear out */
scheduler_inner_reset(kbdev);
}
@@ -6121,7 +6478,7 @@ static void firmware_aliveness_monitor(struct work_struct *work)
kbase_csf_scheduler_wait_mcu_active(kbdev);
- err = kbase_csf_firmware_ping_wait(kbdev);
+ err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms);
if (err) {
/* It is acceptable to enqueue a reset whilst we've prevented
@@ -6396,8 +6753,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
mutex_lock(&scheduler->lock);
- if (group->run_state == KBASE_CSF_GROUP_IDLE)
+ if (group->run_state == KBASE_CSF_GROUP_IDLE) {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ }
/* Check if the group is now eligible for execution in protected mode. */
if (scheduler_get_protm_enter_async_group(kbdev, group))
scheduler_group_check_protm_enter(kbdev, group);
@@ -6472,7 +6832,9 @@ static bool check_sync_update_for_on_slot_group(
if (!evaluate_sync_update(queue))
continue;
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
queue->status_wait = 0;
+#endif
/* Update csg_slots_idle_mask and group's run_state */
if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) {
@@ -6494,6 +6856,8 @@ static bool check_sync_update_for_on_slot_group(
*/
group->reevaluate_idle_status = true;
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
}
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
@@ -6642,6 +7006,15 @@ static void check_group_sync_update_worker(struct kthread_work *work)
mutex_lock(&scheduler->lock);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(scheduler->state == SCHED_BUSY)) {
+ kthread_queue_work(&kctx->csf.sched.sync_update_worker,
+ &kctx->csf.sched.sync_update_work);
+ mutex_unlock(&scheduler->lock);
+ return;
+ }
+#endif
+
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
if (kctx->csf.sched.num_idle_wait_grps != 0) {
struct kbase_queue_group *group, *temp;
@@ -6729,7 +7102,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
"mali_kbase_csf_sync_update");
if (err) {
dev_err(kctx->kbdev->dev,
- "Failed to initialize scheduler context kworker");
+ "Failed to initialize scheduler context workqueue");
err = -ENOMEM;
goto alloc_wq_failed;
}
@@ -6737,6 +7110,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
kthread_init_work(&kctx->csf.sched.sync_update_work,
check_group_sync_update_worker);
+ kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
+
err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
if (err) {
@@ -6745,10 +7120,6 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
goto event_wait_add_failed;
}
- /* Per-kctx heap_info object initialization */
- memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_kctx_heap_info));
- INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
-
return err;
event_wait_add_failed:
@@ -6783,23 +7154,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
return -ENOMEM;
}
- return 0;
-}
-
-static void scheduler_init_heap_reclaim_mgr(struct kbase_csf_scheduler *const scheduler)
-{
- INIT_LIST_HEAD(&scheduler->reclaim_mgr.candidate_ctxs);
- INIT_LIST_HEAD(&scheduler->reclaim_mgr.scan_list_ctxs);
- atomic_set(&scheduler->reclaim_mgr.est_cand_pages, 0);
- atomic_set(&scheduler->reclaim_mgr.mgr_scan_pages, 0);
-}
-
-static void scheduler_term_heap_reclaim_mgr(struct kbase_csf_scheduler *const scheduler)
-{
- WARN_ON(!list_empty(&scheduler->reclaim_mgr.candidate_ctxs));
- WARN_ON(!list_empty(&scheduler->reclaim_mgr.scan_list_ctxs));
- WARN_ON(atomic_read(&scheduler->reclaim_mgr.est_cand_pages));
- WARN_ON(atomic_read(&scheduler->reclaim_mgr.mgr_scan_pages));
+ return kbase_csf_mcu_shared_regs_data_init(kbdev);
}
int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
@@ -6842,6 +7197,7 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
(sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
scheduler->pm_active_count = 0;
scheduler->ngrp_to_schedule = 0;
scheduler->total_runnable_grps = 0;
@@ -6852,13 +7208,16 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
scheduler_doorbell_init(kbdev);
- INIT_DEFERRABLE_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ INIT_DEFERRABLE_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
INIT_WORK(&scheduler->sc_rails_off_work, sc_rails_off_worker);
scheduler->sc_power_rails_off = true;
scheduler->gpu_idle_work_pending = false;
-#endif
scheduler->gpu_idle_fw_timer_enabled = false;
+#else
+ INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
+#endif
+ scheduler->fast_gpu_idle_handling = false;
atomic_set(&scheduler->gpu_no_longer_idle, false);
atomic_set(&scheduler->non_idle_offslot_grps, 0);
@@ -6866,8 +7225,7 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
scheduler->tick_timer.function = tick_timer_callback;
scheduler->tick_timer_active = false;
- scheduler_init_heap_reclaim_mgr(scheduler);
- kbase_csf_tiler_heap_register_shrinker(kbdev);
+ kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
return 0;
}
@@ -6884,15 +7242,20 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
flush_work(&kbdev->csf.scheduler.sc_rails_off_work);
-#endif
flush_delayed_work(&kbdev->csf.scheduler.gpu_idle_work);
+#else
+ flush_work(&kbdev->csf.scheduler.gpu_idle_work);
+#endif
mutex_lock(&kbdev->csf.scheduler.lock);
if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
+ unsigned long flags;
/* The power policy could prevent the Scheduler from
* getting suspended when GPU becomes idle.
*/
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
scheduler_suspend(kbdev);
}
@@ -6901,10 +7264,13 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
cancel_tick_timer(kbdev);
cancel_tick_work(&kbdev->csf.scheduler);
cancel_tock_work(&kbdev->csf.scheduler);
- mutex_destroy(&kbdev->csf.scheduler.lock);
kfree(kbdev->csf.scheduler.csg_slots);
kbdev->csf.scheduler.csg_slots = NULL;
}
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
+ kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+ /* Terminating the MCU shared regions, following the release of slots */
+ kbase_csf_mcu_shared_regs_data_term(kbdev);
}
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
@@ -6914,8 +7280,8 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
if (kbdev->csf.scheduler.csf_worker.task)
kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
- kbase_csf_tiler_heap_unregister_shrinker(kbdev);
- scheduler_term_heap_reclaim_mgr(&kbdev->csf.scheduler);
+ kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
+ mutex_destroy(&kbdev->csf.scheduler.lock);
}
/**
@@ -7022,6 +7388,12 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
int result = 0;
lockdep_assert_held(&scheduler->lock);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(scheduler->state == SCHED_BUSY))
+ return -EBUSY;
+#endif
+
#ifdef KBASE_PM_RUNTIME
/* If scheduler is in sleeping state, then MCU needs to be activated
* to suspend CSGs.
@@ -7180,6 +7552,7 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
}
scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.gpu_sleep_mode_active = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -7237,207 +7610,3 @@ void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
scheduler_wakeup(kbdev, true);
mutex_unlock(&scheduler->lock);
}
-
-static bool defer_count_unused_heap_pages(struct kbase_context *kctx)
-{
- struct kbase_kctx_heap_info *info = &kctx->csf.sched.heap_info;
- u32 prio, shift;
- unsigned long ms;
-
- for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW;
- prio++) {
- if (!list_empty(&kctx->csf.sched.runnable_groups[prio]))
- break;
- }
-
- shift = (prio == KBASE_QUEUE_GROUP_PRIORITY_REALTIME) ? 0 : prio + 1;
- /* Delay time from priority */
- ms = HEAP_RECLAIM_PRIO_DEFERRAL_MS >> shift;
-
- WARN_ON(!(info->flags & CSF_CTX_RECLAIM_CANDI_FLAG));
-
- if (kctx->csf.sched.num_idle_wait_grps)
- ms += HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS;
-
- return time_before(jiffies, info->attach_jiffies + msecs_to_jiffies(ms));
-}
-
-static unsigned long
-reclaim_count_candidates_heap_pages(struct kbase_device *kbdev, unsigned long freed_pages,
- struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
- struct kbase_kctx_heap_info *info, *tmp;
- unsigned long count = 0;
- u32 cnt_ctxs = 0;
-
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
- list_for_each_entry_safe(info, tmp, &mgr->candidate_ctxs, mgr_link) {
- struct kbase_context *kctx =
- container_of(info, struct kbase_context, csf.sched.heap_info);
-
- /* If the kctx not yet exhausted its deferral time, keep it as a candidate */
- if (defer_count_unused_heap_pages(kctx))
- continue;
-
- /* Count the freeable pages of the kctx */
- info->nr_scan_pages = shrink_ctrl->count_cb(kctx);
-
- dev_dbg(kctx->kbdev->dev, "kctx_%d_%d heap pages count : %u\n", kctx->tgid,
- kctx->id, info->nr_scan_pages);
- cnt_ctxs++;
-
- /* The kctx is either moved to the pages freeable kctx list, or removed
- * from the manager if no pages are available for reclaim.
- */
- if (info->nr_scan_pages) {
- /* Move the kctx to the scan_list inside the manager */
- list_move_tail(&info->mgr_link, &mgr->scan_list_ctxs);
- WARN_ON(atomic_sub_return(info->nr_est_pages, &mgr->est_cand_pages) < 0);
- atomic_add(info->nr_scan_pages, &mgr->mgr_scan_pages);
- info->flags = CSF_CTX_RECLAIM_SCAN_FLAG;
- count += info->nr_scan_pages;
- } else
- detach_from_sched_reclaim_mgr(kctx);
-
- /* Combine with the shrinker scan method freed pages to determine the count
- * has done enough to avoid holding the scheduler lock too long.
- */
- if ((freed_pages + count) > HEAP_RECLAIM_COUNT_BATCH_SIZE)
- break;
- }
-
- dev_dbg(kbdev->dev,
- "Reclaim CSF count unused heap pages: %lu (processed kctxs: %u, from_scan: %lu)\n",
- count, cnt_ctxs, freed_pages);
-
- return count;
-}
-
-static unsigned long
-reclaim_free_counted_heap_pages(struct kbase_device *kbdev,
- struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
- unsigned long freed = 0;
- u32 cnt_ctxs = 0;
- struct kbase_kctx_heap_info *info, *tmp;
-
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
- if (WARN_ON(!shrink_ctrl->scan_cb))
- return 0;
-
- list_for_each_entry_safe(info, tmp, &mgr->scan_list_ctxs, mgr_link) {
- struct kbase_context *kctx =
- container_of(info, struct kbase_context, csf.sched.heap_info);
- /* Attempt freeing all the counted heap pages from the kctx */
- u32 n = shrink_ctrl->scan_cb(kctx, info->nr_scan_pages);
-
- if (n)
- schedule_work(&kctx->jit_work);
-
- /* The free is attempted on all the counted heap pages. If the kctx has
- * all its counted heap pages freed, or, it can't offer anymore, drop
- * it from the reclaim manger, otherwise leave it remaining in. If the
- * kctx changes its state (i.e. some CSGs becoming on-slot), the
- * scheduler will pull it out.
- */
- if (n >= info->nr_scan_pages || n == 0)
- detach_from_sched_reclaim_mgr(kctx);
- else
- info->nr_scan_pages -= n;
-
- freed += n;
- cnt_ctxs++;
-
- /* Enough has been freed, break for a gap to avoid holding the lock too long */
- if (freed >= HEAP_RECLAIM_SCAN_BATCH_SIZE)
- break;
- }
-
- dev_dbg(kbdev->dev, "Reclaim CSF heap free heap pages: %lu (processed kctxs: %u)\n", freed,
- cnt_ctxs);
-
- return freed;
-}
-
-unsigned long
-kbase_csf_scheduler_count_free_heap_pages(struct kbase_device *kbdev,
- struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
-{
- struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
-
- unsigned long scan_count = atomic_read(&mgr->mgr_scan_pages);
- unsigned long est_count = atomic_read(&mgr->est_cand_pages);
- unsigned long total;
- bool counted = false;
-
- if (mutex_trylock(&kbdev->csf.scheduler.lock)) {
- reclaim_count_candidates_heap_pages(kbdev, 0, shrink_ctrl);
- mutex_unlock(&kbdev->csf.scheduler.lock);
- counted = true;
- scan_count = atomic_read(&mgr->mgr_scan_pages);
- /* We've processed the candidates, so overwrites the estimated to 0 */
- est_count = 0;
- }
-
- total = scan_count + est_count;
- dev_dbg(kbdev->dev, "Reclaim count unused pages: %lu (scan: %lu, extra_est: %lu, %d/)\n",
- total, scan_count, est_count, counted);
-
- return total;
-}
-
-unsigned long
-kbase_csf_scheduler_scan_free_heap_pages(struct kbase_device *kbdev,
- struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl)
-{
- struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
- struct shrink_control *sc = shrink_ctrl->sc;
- unsigned long freed = 0;
- unsigned long count = 0;
- unsigned long avail = 0;
-
- /* If Scheduler is busy in action, return 0 */
- if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
- /* Wait for roughly 2-ms */
- wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY),
- msecs_to_jiffies(2));
- if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
- dev_dbg(kbdev->dev,
- "Reclaim scan see device busy (freed: 0, number to scan: %lu)\n",
- sc->nr_to_scan);
- return 0;
- }
- }
-
- avail = atomic_read(&mgr->mgr_scan_pages);
- if (avail) {
- freed = reclaim_free_counted_heap_pages(kbdev, shrink_ctrl);
- if (freed < sc->nr_to_scan && atomic_read(&mgr->est_cand_pages))
- count = reclaim_count_candidates_heap_pages(kbdev, freed, shrink_ctrl);
- } else {
- count = reclaim_count_candidates_heap_pages(kbdev, freed, shrink_ctrl);
- }
-
- /* If having done count in this call, try reclaim free again */
- if (count)
- freed += reclaim_free_counted_heap_pages(kbdev, shrink_ctrl);
-
- mutex_unlock(&kbdev->csf.scheduler.lock);
-
- dev_info(kbdev->dev,
- "Reclaim scan freed pages: %lu (avail: %lu, extra: %lu, number to scan: %lu)\n",
- freed, avail, count, sc->nr_to_scan);
-
- /* On no avilablity, and with no new extra count, return STOP */
- if (!avail && !count)
- return SHRINK_STOP;
- else
- return freed;
-}
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 358d18a..17c025b 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -24,7 +24,6 @@
#include "mali_kbase_csf.h"
#include "mali_kbase_csf_event.h"
-#include "mali_kbase_csf_tiler_heap_def.h"
/**
* kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue
@@ -37,7 +36,9 @@
* If the CSG is already scheduled and resident, the CSI will be started
* right away, otherwise once the group is made resident.
*
- * Return: 0 on success, or negative on failure.
+ * Return: 0 on success, or negative on failure. -EBUSY is returned to
+ * indicate to the caller that queue could not be enabled due to Scheduler
+ * state and the caller can try to enable the queue after sometime.
*/
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue);
@@ -531,6 +532,7 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
if (!scheduler->tick_timer_active)
kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work);
@@ -549,6 +551,7 @@ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u);
if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
}
@@ -690,36 +693,4 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
*/
void turn_on_sc_power_rails(struct kbase_device *kbdev);
#endif
-
-/* Forward declaration */
-struct kbase_csf_tiler_heap_shrink_control;
-
-/**
- * kbase_csf_scheduler_count_free_heap_pages() - Undertake shrinker reclaim count action
- *
- * @kbdev: Pointer to the device
- * @shrink_ctrl: Pointer to the kbase CSF schrink control object.
- *
- * This function is called from CSF tiler heap memory shrinker reclaim 'count_objects' operation.
- *
- * Return: number of potentially freeable tiler heap pages.
- */
-unsigned long
-kbase_csf_scheduler_count_free_heap_pages(struct kbase_device *kbdev,
- struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl);
-
-/**
- * kbase_csf_scheduler_scan_free_heap_pages() - Undertake shrinker reclaim scan action
- *
- * @kbdev: Pointer to the device
- * @shrink_ctrl: Pointer to the kbase CSF schrink control object.
- *
- * This function is called from CSF tiler heap memory shrinker reclaim 'scan_objects' operation.
- *
- * Return: number of actually freed tiler heap pagess.
- */
-unsigned long
-kbase_csf_scheduler_scan_free_heap_pages(struct kbase_device *kbdev,
- struct kbase_csf_tiler_heap_shrink_control *shrink_ctrl);
-
#endif /* _KBASE_CSF_SCHEDULER_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
new file mode 100644
index 0000000..a5e0ab5
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_sync_debugfs.h"
+#include "mali_kbase_csf_csg_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+#include "mali_kbase_sync.h"
+#endif
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
+
+/* GPU queue related values */
+#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
+#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+
+#define MAX_NR_GPU_CALLS (5)
+#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
+#define MOVE_DEST_MASK ((u64)0xFF << 48)
+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
+#define SYNC_SRC0_MASK ((u64)0xFF << 40)
+#define SYNC_SRC1_MASK ((u64)0xFF << 32)
+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
+
+/* Enumeration for types of GPU queue sync events for
+ * the purpose of dumping them through debugfs.
+ */
+enum debugfs_gpu_sync_type {
+ DEBUGFS_GPU_SYNC_WAIT,
+ DEBUGFS_GPU_SYNC_SET,
+ DEBUGFS_GPU_SYNC_ADD,
+ NUM_DEBUGFS_GPU_SYNC_TYPES
+};
+
+/**
+ * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
+ *
+ * @kctx: The context of the queue.
+ * @obj_addr: Pointer to the CQS live 32-bit value.
+ * @live_val: Pointer to the u32 that will be set to the CQS object's current, live
+ * value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr,
+ u32 *live_val)
+{
+ struct kbase_vmap_struct *mapping;
+ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+ if (!cpu_ptr)
+ return -1;
+
+ *live_val = *cpu_ptr;
+ kbase_phy_alloc_mapping_put(kctx, mapping);
+ return 0;
+}
+
+/**
+ * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object.
+ *
+ * @kctx: The context of the queue.
+ * @obj_addr: Pointer to the CQS live value (32 or 64-bit).
+ * @live_val: Pointer to the u64 that will be set to the CQS object's current, live
+ * value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr,
+ u64 *live_val)
+{
+ struct kbase_vmap_struct *mapping;
+ u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+ if (!cpu_ptr)
+ return -1;
+
+ *live_val = *cpu_ptr;
+ kbase_phy_alloc_mapping_put(kctx, mapping);
+ return 0;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait
+ * or Fence Signal command, contained in a
+ * KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
+ */
+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file,
+ struct kbase_kcpu_command *cmd,
+ const char *cmd_name)
+{
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence = NULL;
+#else
+ struct dma_fence *fence = NULL;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+
+ struct kbase_sync_fence_info info;
+ const char *timeline_name = NULL;
+ bool is_signaled = false;
+
+ fence = cmd->info.fence.fence;
+ if (WARN_ON(!fence))
+ return;
+
+ kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
+ timeline_name = fence->ops->get_timeline_name(fence);
+ is_signaled = info.status > 0;
+
+ seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence,
+ is_signaled);
+
+ /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
+ seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+ timeline_name, fence->context, (u64)fence->seqno);
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
+ * contained in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ struct kbase_context *kctx = file->private;
+ size_t i;
+
+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+ struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i];
+
+ u32 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", (u64)live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val);
+ }
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
+ * Set command, contained in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ struct kbase_context *kctx = file->private;
+ size_t i;
+
+ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
+ struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i];
+
+ u32 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", (u64)live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:add arg_value:0x%.8x", 1);
+ }
+}
+
+/**
+ * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op)
+{
+ const char *string;
+
+ switch (op) {
+ case BASEP_CQS_WAIT_OPERATION_LE:
+ string = "le";
+ break;
+ case BASEP_CQS_WAIT_OPERATION_GT:
+ string = "gt";
+ break;
+ default:
+ string = "??";
+ break;
+ }
+ return string;
+}
+
+/**
+ * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op)
+{
+ const char *string;
+
+ switch (op) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ string = "add";
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ string = "set";
+ break;
+ default:
+ string = "???";
+ break;
+ }
+ return string;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS
+ * Wait Operation command, contained
+ * in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ size_t i;
+ struct kbase_context *kctx = file->private;
+
+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+ struct base_cqs_wait_operation_info *wait_op =
+ &cmd->info.cqs_wait_operation.objs[i];
+ const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation);
+
+ u64 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val);
+
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+ }
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS
+ * Set Operation command, contained
+ * in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd: The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
+ struct kbase_kcpu_command *cmd)
+{
+ size_t i;
+ struct kbase_context *kctx = file->private;
+
+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+ struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
+ const char *op_name = kbasep_csf_sync_get_set_op_name(
+ (basep_cqs_set_operation_op)set_op->operation);
+
+ u64 live_val;
+ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val);
+
+ bool live_val_valid = (ret >= 0);
+
+ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+
+ if (live_val_valid)
+ seq_printf(file, "0x%.16llx", live_val);
+ else
+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+ }
+}
+
+/**
+ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue
+ *
+ * @file: The seq_file to print to.
+ * @queue: Pointer to the KCPU queue.
+ */
+static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
+ struct kbase_kcpu_command_queue *queue)
+{
+ char started_or_pending;
+ struct kbase_kcpu_command *cmd;
+ struct kbase_context *kctx = file->private;
+ size_t i;
+
+ if (WARN_ON(!queue))
+ return;
+
+ lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ mutex_lock(&queue->lock);
+
+ for (i = 0; i != queue->num_pending_cmds; ++i) {
+ started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
+ seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id,
+ started_or_pending);
+
+ cmd = &queue->commands[queue->start_offset + i];
+ switch (cmd->type) {
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL");
+ break;
+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT");
+ break;
+#endif
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
+ kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET:
+ kbasep_csf_sync_print_kcpu_cqs_set(file, cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+ kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd);
+ break;
+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+ kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd);
+ break;
+ default:
+ seq_puts(file, ", U, Unknown blocking command");
+ break;
+ }
+
+ seq_puts(file, "\n");
+ }
+
+ mutex_unlock(&queue->lock);
+}
+
+/**
+ * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info
+ *
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file)
+{
+ struct kbase_context *kctx = file->private;
+ unsigned long queue_idx;
+
+ mutex_lock(&kctx->csf.kcpu_queues.lock);
+ seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id);
+
+ queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
+
+ while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
+ kbasep_csf_sync_kcpu_debugfs_print_queue(file,
+ kctx->csf.kcpu_queues.array[queue_idx]);
+
+ queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
+ queue_idx + 1);
+ }
+
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ return 0;
+}
+
+/**
+ * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
+ * from a MOVE instruction.
+ *
+ * @move_cmd: Raw MOVE instruction.
+ * @sync_addr_reg: Register identifier from SYNC_* instruction.
+ * @compare_val_reg: Register identifier from SYNC_* instruction.
+ * @sync_val: Pointer to store CQS object address for sync operation.
+ * @compare_val: Pointer to store compare value for sync operation.
+ *
+ * Return: True if value is obtained by checking for correct register identifier,
+ * or false otherwise.
+ */
+static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg,
+ u64 compare_val_reg, u64 *sync_val,
+ u64 *compare_val)
+{
+ u64 imm_mask;
+
+ /* Verify MOVE instruction and get immediate mask */
+ if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE)
+ imm_mask = MOVE32_IMM_MASK;
+ else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE)
+ imm_mask = MOVE_IMM_MASK;
+ else
+ /* Error return */
+ return false;
+
+ /* Verify value from MOVE instruction and assign to variable */
+ if (sync_addr_reg == MOVE_DEST_GET(move_cmd))
+ *sync_val = move_cmd & imm_mask;
+ else if (compare_val_reg == MOVE_DEST_GET(move_cmd))
+ *compare_val = move_cmd & imm_mask;
+ else
+ /* Error return */
+ return false;
+
+ return true;
+}
+
+/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided
+ * offset.
+ *
+ * @queue: Pointer to the queue.
+ * @ringbuff_offset: Ringbuffer offset.
+ *
+ * Return: the u64 in the ringbuffer at the desired offset.
+ */
+static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset)
+{
+ u64 page_off = ringbuff_offset >> PAGE_SHIFT;
+ u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
+ struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
+ u64 *ringbuffer = kmap_atomic(page);
+ u64 value = ringbuffer[offset_within_page / sizeof(u64)];
+
+ kunmap_atomic(ringbuffer);
+ return value;
+}
+
+/**
+ * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command.
+ *
+ * @file: Pointer to debugfs seq_file file struct for writing output.
+ * @kctx: Pointer to kbase context.
+ * @queue: Pointer to the GPU command queue.
+ * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command.
+ * (Useful for finding preceding MOVE commands)
+ * @sync_cmd: Entire u64 of the sync command, which has both sync address and
+ * comparison-value encoded in it.
+ * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT).
+ * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false).
+ * @follows_wait: Bool to indicate if the operation follows at least one wait
+ * operation. Used to determine whether it's pending or started.
+ */
+static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx,
+ struct kbase_queue *queue, u32 ringbuff_offset,
+ u64 sync_cmd, enum debugfs_gpu_sync_type type,
+ bool is_64bit, bool follows_wait)
+{
+ u64 sync_addr = 0, compare_val = 0, live_val = 0;
+ u64 move_cmd;
+ u8 sync_addr_reg, compare_val_reg, wait_condition = 0;
+ int err;
+
+ static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" };
+ static const char *const gpu_sync_type_op[] = {
+ "wait", /* This should never be printed, only included to simplify indexing */
+ "set", "add"
+ };
+
+ if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) {
+ dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!");
+ return;
+ }
+
+ /* We expect there to be at least 2 preceding MOVE instructions, and
+ * Base will always arrange for the 2 MOVE + SYNC instructions to be
+ * contiguously located, and is therefore never expected to be wrapped
+ * around the ringbuffer boundary.
+ */
+ if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) {
+ dev_warn(kctx->kbdev->dev,
+ "Unexpected wraparound detected between %s & MOVE instruction",
+ gpu_sync_type_name[type]);
+ return;
+ }
+
+ /* 1. Get Register identifiers from SYNC_* instruction */
+ sync_addr_reg = SYNC_SRC0_GET(sync_cmd);
+ compare_val_reg = SYNC_SRC1_GET(sync_cmd);
+
+ /* 2. Get values from first MOVE command */
+ ringbuff_offset -= sizeof(u64);
+ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+ &sync_addr, &compare_val))
+ return;
+
+ /* 3. Get values from next MOVE command */
+ ringbuff_offset -= sizeof(u64);
+ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+ &sync_addr, &compare_val))
+ return;
+
+ /* 4. Get CQS object value */
+ if (is_64bit)
+ err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val);
+ else
+ err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val));
+
+ if (err)
+ return;
+
+ /* 5. Print info */
+ seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle,
+ queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P',
+ gpu_sync_type_name[type]);
+
+ if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
+ seq_puts(file, "slot:-");
+ else
+ seq_printf(file, "slot:%d", (int)queue->group->csg_nr);
+
+ seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
+
+ if (type == DEBUGFS_GPU_SYNC_WAIT) {
+ wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
+ seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
+ } else
+ seq_printf(file, "op:%s ", gpu_sync_type_op[type]);
+
+ seq_printf(file, "arg_value:0x%.16llx\n", compare_val);
+}
+
+/**
+ * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information.
+ *
+ * @file: seq_file for printing to.
+ * @queue: Address of a GPU command queue to examine.
+ *
+ * This function will iterate through each command in the ring buffer of the given GPU queue from
+ * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and
+ * print relevant information to the debugfs file.
+ * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e.
+ * when there are no more commands to view) or a number of consumed GPU CALL commands have
+ * been observed.
+ */
+static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue)
+{
+ struct kbase_context *kctx;
+ u32 *addr;
+ u64 cs_extract, cs_insert, instr, cursor;
+ bool follows_wait = false;
+ int nr_calls = 0;
+
+ if (!queue)
+ return;
+
+ kctx = queue->kctx;
+
+ addr = (u32 *)queue->user_io_addr;
+ cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32);
+
+ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+ cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32);
+
+ cursor = cs_extract;
+
+ if (!is_power_of_2(queue->size)) {
+ dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2",
+ queue->csi_index, queue->size);
+ return;
+ }
+
+ while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) {
+ bool instr_is_64_bit = false;
+ /* Calculate offset into ringbuffer from the absolute cursor,
+ * by finding the remainder of the cursor divided by the
+ * ringbuffer size. The ringbuffer size is guaranteed to be
+ * a power of 2, so the remainder can be calculated without an
+ * explicit modulo. queue->size - 1 is the ringbuffer mask.
+ */
+ u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1));
+
+ /* Find instruction that cursor is currently on */
+ instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset);
+
+ switch (INSTR_OPCODE_GET(instr)) {
+ case GPU_CSF_SYNC_ADD64_OPCODE:
+ case GPU_CSF_SYNC_SET64_OPCODE:
+ case GPU_CSF_SYNC_WAIT64_OPCODE:
+ instr_is_64_bit = true;
+ default:
+ break;
+ }
+
+ switch (INSTR_OPCODE_GET(instr)) {
+ case GPU_CSF_SYNC_ADD_OPCODE:
+ case GPU_CSF_SYNC_ADD64_OPCODE:
+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+ instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit,
+ follows_wait);
+ break;
+ case GPU_CSF_SYNC_SET_OPCODE:
+ case GPU_CSF_SYNC_SET64_OPCODE:
+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+ instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit,
+ follows_wait);
+ break;
+ case GPU_CSF_SYNC_WAIT_OPCODE:
+ case GPU_CSF_SYNC_WAIT64_OPCODE:
+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+ instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit,
+ follows_wait);
+ follows_wait = true; /* Future commands will follow at least one wait */
+ break;
+ case GPU_CSF_CALL_OPCODE:
+ nr_calls++;
+ /* Fallthrough */
+ default:
+ /* Unrecognized command, skip past it */
+ break;
+ }
+
+ cursor += sizeof(u64);
+ }
+}
+
+/**
+ * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
+ * the provided queue group.
+ *
+ * @file: seq_file for printing to.
+ * @group: Address of a GPU command group to iterate through.
+ *
+ * This function will iterate through each queue in the provided GPU queue group and
+ * print its SYNC related commands.
+ */
+static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file,
+ struct kbase_queue_group *const group)
+{
+ struct kbase_context *kctx = file->private;
+ unsigned int i;
+
+ seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
+ group->csg_nr, kctx->tgid, kctx->id);
+
+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
+ kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]);
+}
+
+/**
+ * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info
+ *
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
+{
+ u32 gr;
+ struct kbase_context *kctx = file->private;
+ struct kbase_device *kbdev;
+
+ if (WARN_ON(!kctx))
+ return -EINVAL;
+
+ kbdev = kctx->kbdev;
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_csf_debugfs_update_active_groups_status(kbdev);
+
+ for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) {
+ struct kbase_queue_group *const group =
+ kbdev->csf.scheduler.csg_slots[gr].resident_group;
+ if (!group || group->kctx != kctx)
+ continue;
+ kbasep_csf_dump_active_group_sync_state(file, group);
+ }
+
+ kbase_csf_scheduler_unlock(kbdev);
+ return 0;
+}
+
+/**
+ * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information
+ *
+ * @file: The seq_file for printing to.
+ * @data: The debugfs dentry private data, a pointer to kbase_context.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data)
+{
+ seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION);
+
+ kbasep_csf_sync_kcpu_debugfs_show(file);
+ kbasep_csf_sync_gpu_debugfs_show(file);
+ return 0;
+}
+
+static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file)
+{
+ return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private);
+}
+
+static const struct file_operations kbasep_csf_sync_debugfs_fops = {
+ .open = kbasep_csf_sync_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/**
+ * kbase_csf_sync_debugfs_init() - Initialise debugfs file.
+ *
+ * @kctx: Kernel context pointer.
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
+{
+ struct dentry *file;
+ const mode_t mode = 0444;
+
+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+ return;
+
+ file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx,
+ &kbasep_csf_sync_debugfs_fops);
+
+ if (IS_ERR_OR_NULL(file))
+ dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry");
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
index 01b4129..177e15d 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014, 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,18 +19,19 @@
*
*/
-/**
- * DOC: Header file for the size of the buffer to accumulate the histogram report text in
- */
+#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_
+#define _KBASE_CSF_SYNC_DEBUGFS_H_
+
+/* Forward declaration */
+struct kbase_context;
-#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
-#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define MALI_CSF_SYNC_DEBUGFS_VERSION 0
/**
- * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the histogram report text
- * in @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
*/
-#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 55) + 56))
-
-#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx);
+#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 70ecd80..8072a8b 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,26 @@
#include "mali_kbase_csf_tiler_heap_def.h"
#include "mali_kbase_csf_heap_context_alloc.h"
+/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
+#define HEAP_SHRINK_STOP_LIMIT (1)
+
+/**
+ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
+ *
+ * @cdsbp_0: Descriptor_type and buffer_type
+ * @size: The size of the current heap chunk
+ * @pointer: Pointer to the current heap chunk
+ * @low_pointer: Pointer to low end of current heap chunk
+ * @high_pointer: Pointer to high end of current heap chunk
+ */
+struct kbase_csf_gpu_buffer_heap {
+ u32 cdsbp_0;
+ u32 size;
+ u64 pointer;
+ u64 low_pointer;
+ u64 high_pointer;
+} __packed;
+
/**
* encode_chunk_ptr - Encode the address and size of a chunk as an integer.
*
@@ -95,7 +115,9 @@ static void remove_external_chunk_mappings(struct kbase_context *const kctx,
kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
chunk->region->cpu_alloc->nents);
}
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags |= KBASE_REG_DONT_NEED;
+#endif
dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
}
@@ -206,12 +228,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
kbase_vunmap(kctx, &chunk->map);
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
* regions), and so we must clear that flag too before freeing.
- * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+ * For "no user free count", we check that the count is 1 as it is a shrinkable region;
* no other code part within kbase can take a reference to it.
*/
- WARN_ON(chunk->region->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, chunk->region);
+ WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(chunk->region);
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
+#endif
kbase_mem_free_region(kctx, chunk->region);
kbase_gpu_vm_unlock(kctx);
@@ -291,8 +315,8 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
* It should be fine and not a security risk if we let the region leak till
* region tracker termination in such a case.
*/
- if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
- dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+ if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
+ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
goto unroll_region;
}
@@ -335,14 +359,22 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
remove_external_chunk_mappings(kctx, chunk);
kbase_gpu_vm_unlock(kctx);
+ /* If page migration is enabled, we don't want to migrate tiler heap pages.
+ * This does not change if the constituent pages are already marked as isolated.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
+
return chunk;
unroll_region:
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
* regions), and so we must clear that flag too before freeing.
*/
- kbase_va_region_no_user_free_put(kctx, chunk->region);
+ kbase_va_region_no_user_free_dec(chunk->region);
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
+#endif
kbase_mem_free_region(kctx, chunk->region);
kbase_gpu_vm_unlock(kctx);
unroll_chunk:
@@ -499,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
if (heap->buf_desc_reg) {
kbase_vunmap(kctx, &heap->buf_desc_map);
kbase_gpu_vm_lock(kctx);
- kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
@@ -565,7 +597,6 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx)
INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list);
mutex_init(&kctx->csf.tiler_heaps.lock);
- atomic_set(&kctx->csf.tiler_heaps.est_count_pages, 0);
dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n");
@@ -588,13 +619,9 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
entry, struct kbase_csf_tiler_heap, link);
list_del_init(&heap->link);
- atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
-
delete_heap(heap);
}
- WARN_ON(atomic_read(&kctx->csf.tiler_heaps.est_count_pages) != 0);
-
mutex_destroy(&kctx->csf.tiler_heaps.lock);
kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
@@ -689,8 +716,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
heap->chunk_size = chunk_size;
heap->max_chunks = max_chunks;
heap->target_in_flight = target_in_flight;
- heap->desc_chk_flags = 0;
- heap->desc_chk_cnt = 0;
+ heap->buf_desc_checked = false;
INIT_LIST_HEAD(&heap->chunks_list);
INIT_LIST_HEAD(&heap->link);
@@ -715,11 +741,16 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
*/
heap->buf_desc_va = buf_desc_va;
- heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
+ heap->buf_desc_reg = buf_desc_reg;
+ kbase_va_region_no_user_free_inc(buf_desc_reg);
vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
KBASE_REG_CPU_RD, &heap->buf_desc_map,
KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
+
kbase_gpu_vm_unlock(kctx);
if (unlikely(!vmap_ptr)) {
@@ -787,8 +818,6 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
- /* Assuming at least one chunk reclaimable per heap on (estimated) count */
- atomic_add(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
dev_dbg(kctx->kbdev->dev,
"Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
buf_desc_va, kctx->tgid, kctx->id);
@@ -806,7 +835,7 @@ heap_context_alloc_failed:
buf_desc_vmap_failed:
if (heap->buf_desc_reg) {
kbase_gpu_vm_lock(kctx);
- kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
buf_desc_not_suitable:
@@ -829,7 +858,6 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
heap_size = heap->chunk_size * chunk_count;
list_del_init(&heap->link);
- atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
} else {
err = -EINVAL;
}
@@ -862,6 +890,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
return err;
}
+/**
+ * validate_allocation_request - Check whether the chunk allocation request
+ * received on tiler OOM should be handled at
+ * current time.
+ *
+ * @heap: The tiler heap the OOM is associated with
+ * @nr_in_flight: Number of fragment jobs in flight
+ * @pending_frag_count: Number of pending fragment jobs
+ *
+ * Context: must hold the tiler heap lock to guarantee its lifetime
+ *
+ * Return:
+ * * 0 - allowed to allocate an additional chunk
+ * * -EINVAL - invalid
+ * * -EBUSY - there are fragment jobs still in flight, which may free chunks
+ * after completing
+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and
+ * no new ones will be allocated
+ */
static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
u32 pending_frag_count)
{
@@ -921,7 +968,12 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
if (unlikely(err)) {
- dev_err(kctx->kbdev->dev,
+ /* The allocation request can be legitimate, but be invoked on a heap
+ * that has already reached the maximum pre-configured capacity. This
+ * is useful debug information, but should not be treated as an error,
+ * since the request will be re-sent at a later point.
+ */
+ dev_dbg(kctx->kbdev->dev,
"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
gpu_heap_va, err);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
@@ -1086,27 +1138,23 @@ static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 c
return true;
}
-static bool heap_buffer_decsriptor_checked(struct kbase_csf_tiler_heap *const heap)
-{
- return heap->desc_chk_flags & HEAP_BUF_DESCRIPTOR_CHECKED;
-}
-
static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
struct kbase_csf_gpu_buffer_heap *desc)
{
- u64 ptr_addr = desc->pointer & CHUNK_ADDR_MASK;
+ u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
- if (ptr_addr) {
- struct kbase_csf_tiler_heap_chunk *chunk = find_chunk(heap, ptr_addr);
+ if (first_hoarded_chunk_gpu_va) {
+ struct kbase_csf_tiler_heap_chunk *chunk =
+ find_chunk(heap, first_hoarded_chunk_gpu_va);
if (likely(chunk)) {
dev_dbg(heap->kctx->kbdev->dev,
"Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
heap->buf_desc_va);
- heap->desc_chk_flags = HEAP_BUF_DESCRIPTOR_CHECKED;
+ heap->buf_desc_checked = true;
return;
}
}
@@ -1115,21 +1163,17 @@ static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
heap->buf_desc_va);
}
-static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *ptr_u64)
+static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr)
{
struct kbase_context *kctx = heap->kctx;
- bool checked = false;
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
/* Initialize the descriptor pointer value to 0 */
- *ptr_u64 = 0;
-
- if (heap_buffer_decsriptor_checked(heap))
- return true;
+ *chunk_gpu_va_ptr = 0;
/* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
- if (heap->buf_desc_reg) {
+ if (heap->buf_desc_reg && !heap->buf_desc_checked) {
struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
@@ -1137,12 +1181,11 @@ static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *
kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
sanity_check_gpu_buffer_heap(heap, desc);
- checked = heap_buffer_decsriptor_checked(heap);
- if (checked)
- *ptr_u64 = desc->pointer & CHUNK_ADDR_MASK;
+ if (heap->buf_desc_checked)
+ *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
}
- return checked;
+ return heap->buf_desc_checked;
}
static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
@@ -1263,7 +1306,7 @@ static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
return freed_pages;
}
-static u32 scan_kctx_unused_heap_pages_cb(struct kbase_context *kctx, u32 to_free)
+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
{
u64 freed = 0;
struct kbase_csf_tiler_heap *heap;
@@ -1309,7 +1352,7 @@ static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
return page_cnt;
}
-static u32 count_kctx_unused_heap_pages_cb(struct kbase_context *kctx)
+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
{
u64 page_cnt = 0;
struct kbase_csf_tiler_heap *heap;
@@ -1327,46 +1370,3 @@ static u32 count_kctx_unused_heap_pages_cb(struct kbase_context *kctx)
else
return (u32)page_cnt;
}
-
-static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
- struct shrink_control *sc)
-{
- struct kbase_device *kbdev = container_of(s, struct kbase_device, csf.tiler_heap_reclaim);
- struct kbase_csf_tiler_heap_shrink_control shrink_ctrl = {
- .sc = sc,
- .count_cb = count_kctx_unused_heap_pages_cb,
- .scan_cb = scan_kctx_unused_heap_pages_cb,
- };
-
- return kbase_csf_scheduler_count_free_heap_pages(kbdev, &shrink_ctrl);
-}
-
-static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
- struct shrink_control *sc)
-{
- struct kbase_device *kbdev = container_of(s, struct kbase_device, csf.tiler_heap_reclaim);
- struct kbase_csf_tiler_heap_shrink_control shrink_ctrl = {
- .sc = sc,
- .count_cb = count_kctx_unused_heap_pages_cb,
- .scan_cb = scan_kctx_unused_heap_pages_cb,
- };
-
- return kbase_csf_scheduler_scan_free_heap_pages(kbdev, &shrink_ctrl);
-}
-
-void kbase_csf_tiler_heap_register_shrinker(struct kbase_device *kbdev)
-{
- struct shrinker *reclaim = &kbdev->csf.tiler_heap_reclaim;
-
- reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects;
- reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects;
- reclaim->seeks = HEAP_SHRINKER_SEEKS;
- reclaim->batch = HEAP_SHRINKER_BATCH;
-
- register_shrinker(reclaim);
-}
-
-void kbase_csf_tiler_heap_unregister_shrinker(struct kbase_device *kbdev)
-{
- unregister_shrinker(&kbdev->csf.tiler_heap_reclaim);
-}
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
index da60c59..1b5cb56 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
@@ -118,19 +118,25 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr);
/**
- * kbase_csf_tiler_heap_register_shrinker - Register shrinker for tiler heap.
+ * kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan
+ * functionality.
*
- * @kbdev: Pointer to the device.
+ * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be
+ * operated with.
+ * @to_free: Number of pages suggested for the reclaim scan (free) method to reach.
*
+ * Return: the actual number of pages the scan method has freed from the call.
*/
-void kbase_csf_tiler_heap_register_shrinker(struct kbase_device *kbdev);
+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free);
/**
- * kbase_csf_tiler_heap_unregister_shrinker - Unregister shrinker for tiler heap on device
- * shut down.
+ * kbase_csf_tiler_heap_count_kctx_unused_pages - Performs the tiler heap shrinker calim's count
+ * functionality.
*
- * @kbdev: Pointer to the device.
+ * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be
+ * operated with.
*
+ * Return: a number of pages that could likely be freed on the subsequent scan method call.
*/
-void kbase_csf_tiler_heap_unregister_shrinker(struct kbase_device *kbdev);
+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx);
#endif
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
index 9d447ce..96f2b03 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
@@ -56,15 +56,6 @@
((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \
CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT)
-/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
-#define HEAP_SHRINK_STOP_LIMIT (1)
-
-/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */
-#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2)
-
-/* Tiler heap shrinker batch value */
-#define HEAP_SHRINKER_BATCH (512)
-
/* The size of the area needed to be vmapped prior to handing the tiler heap
* over to the tiler, so that the shrinker could be invoked.
*/
@@ -127,9 +118,7 @@ struct kbase_csf_tiler_heap_chunk {
* @target_in_flight: Number of render-passes that the driver should attempt
* to keep in flight for which allocation of new chunks is
* allowed. Must not be zero.
- * @desc_chk_flags: Runtime sanity check flags on heap chunk reclaim.
- * @desc_chk_cnt: Counter for providing a deferral gap if runtime sanity check
- * needs to be retried later.
+ * @buf_desc_checked: Indicates if runtime check on buffer descriptor has been done.
*/
struct kbase_csf_tiler_heap {
struct kbase_context *kctx;
@@ -145,40 +134,7 @@ struct kbase_csf_tiler_heap {
u32 chunk_count;
u32 max_chunks;
u16 target_in_flight;
- u8 desc_chk_flags;
- u8 desc_chk_cnt;
-};
-
-/**
- * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
- *
- * @cdsbp_0: Descriptor_type and buffer_type
- * @size: The size of the current heap chunk
- * @pointer: Pointer to the current heap chunk
- * @low_pointer: Pointer to low end of current heap chunk
- * @high_pointer: Pointer to high end of current heap chunk
- */
-struct kbase_csf_gpu_buffer_heap {
- u32 cdsbp_0;
- u32 size;
- u64 pointer;
- u64 low_pointer;
- u64 high_pointer;
-} __packed;
-
-/**
- * struct kbase_csf_tiler_heap_shrink_control - Kbase wraper object that wraps around
- * kernel shrink_control
- *
- * @sc: Pointer to shrinker control object in reclaim callback.
- * @count_cb: Functin pointer for counting tiler heap free list.
- * @scan_cb: Functin pointer for counting tiler heap free list.
- */
-
-struct kbase_csf_tiler_heap_shrink_control {
- struct shrink_control *sc;
- u32 (*count_cb)(struct kbase_context *kctx);
- u32 (*scan_cb)(struct kbase_context *kctx, u32 pages);
+ bool buf_desc_checked;
};
#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
new file mode 100644
index 0000000..069e827
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_csf.h"
+#include "mali_kbase_csf_tiler_heap.h"
+#include "mali_kbase_csf_tiler_heap_reclaim.h"
+
+/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */
+#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2)
+
+/* Tiler heap shrinker batch value */
+#define HEAP_SHRINKER_BATCH (512)
+
+/* Tiler heap reclaim scan (free) method size for limiting a scan run length */
+#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7)
+
+static u8 get_kctx_highest_csg_priority(struct kbase_context *kctx)
+{
+ u8 prio;
+
+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW;
+ prio++)
+ if (!list_empty(&kctx->csf.sched.runnable_groups[prio]))
+ break;
+
+ if (prio != KBASE_QUEUE_GROUP_PRIORITY_REALTIME && kctx->csf.sched.num_idle_wait_grps) {
+ struct kbase_queue_group *group;
+
+ list_for_each_entry(group, &kctx->csf.sched.idle_wait_groups, link) {
+ if (group->priority < prio)
+ prio = group->priority;
+ }
+ }
+
+ return prio;
+}
+
+static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx)
+{
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (!list_empty(&info->mgr_link)) {
+ u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ?
+ info->nr_est_unused_pages - info->nr_freed_pages :
+ 0;
+
+ list_del_init(&info->mgr_link);
+ if (remaining)
+ WARN_ON(atomic_sub_return(remaining, &scheduler->reclaim_mgr.unused_pages) <
+ 0);
+
+ dev_dbg(kctx->kbdev->dev,
+ "Reclaim_mgr_detach: ctx_%d_%d, est_pages=0%u, freed_pages=%u", kctx->tgid,
+ kctx->id, info->nr_est_unused_pages, info->nr_freed_pages);
+ }
+}
+
+static void attach_ctx_to_heap_reclaim_mgr(struct kbase_context *kctx)
+{
+ struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info;
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ u8 const prio = get_kctx_highest_csg_priority(kctx);
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (WARN_ON(!list_empty(&info->mgr_link)))
+ list_del_init(&info->mgr_link);
+
+ /* Count the pages that could be freed */
+ info->nr_est_unused_pages = kbase_csf_tiler_heap_count_kctx_unused_pages(kctx);
+ /* Initialize the scan operation tracking pages */
+ info->nr_freed_pages = 0;
+
+ list_add_tail(&info->mgr_link, &scheduler->reclaim_mgr.ctx_lists[prio]);
+ /* Accumulate the estimated pages to the manager total field */
+ atomic_add(info->nr_est_unused_pages, &scheduler->reclaim_mgr.unused_pages);
+
+ dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages=%u", kctx->tgid,
+ kctx->id, info->nr_est_unused_pages);
+}
+
+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
+
+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
+
+ info->on_slot_grps++;
+ /* If the kctx has an on-slot change from 0 => 1, detach it from reclaim_mgr */
+ if (info->on_slot_grps == 1) {
+ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ detach_ctx_from_heap_reclaim_mgr(kctx);
+ }
+}
+
+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info;
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ const u32 num_groups = kctx->kbdev->csf.global_iface.group_num;
+ u32 on_slot_grps = 0;
+ u32 i;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ /* Group eviction from the scheduler is a bit more complex, but fairly less
+ * frequent in operations. Taking the opportunity to actually count the
+ * on-slot CSGs from the given kctx, for robustness and clearer code logic.
+ */
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
+ struct kbase_queue_group *grp = csg_slot->resident_group;
+
+ if (unlikely(!grp))
+ continue;
+
+ if (grp->kctx == kctx)
+ on_slot_grps++;
+ }
+
+ info->on_slot_grps = on_slot_grps;
+
+ /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */
+ if (!info->on_slot_grps) {
+ if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) {
+ /* The kctx has other operational CSGs, attach it if not yet done */
+ if (list_empty(&info->mgr_link)) {
+ dev_dbg(kctx->kbdev->dev,
+ "CSG_%d_%d_%d evict, add kctx to reclaim manager",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ attach_ctx_to_heap_reclaim_mgr(kctx);
+ }
+ } else {
+ /* The kctx is a zombie after the group eviction, drop it out */
+ dev_dbg(kctx->kbdev->dev,
+ "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ detach_ctx_from_heap_reclaim_mgr(kctx);
+ }
+ }
+}
+
+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
+
+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
+
+ if (!WARN_ON(info->on_slot_grps == 0))
+ info->on_slot_grps--;
+ /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */
+ if (info->on_slot_grps == 0) {
+ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager",
+ group->kctx->tgid, group->kctx->id, group->handle);
+
+ attach_ctx_to_heap_reclaim_mgr(kctx);
+ }
+}
+
+static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
+ unsigned long total_freed_pages = 0;
+ int prio;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW;
+ total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE &&
+ prio >= KBASE_QUEUE_GROUP_PRIORITY_REALTIME;
+ prio--) {
+ struct kbase_csf_ctx_heap_reclaim_info *info, *tmp;
+ u32 cnt_ctxs = 0;
+
+ list_for_each_entry_safe(info, tmp, &scheduler->reclaim_mgr.ctx_lists[prio],
+ mgr_link) {
+ struct kbase_context *kctx =
+ container_of(info, struct kbase_context, csf.sched.heap_info);
+ u32 freed_pages = kbase_csf_tiler_heap_scan_kctx_unused_pages(
+ kctx, info->nr_est_unused_pages);
+
+ if (freed_pages) {
+ /* Remove the freed pages from the manager retained estimate. The
+ * accumulated removals from the kctx should not exceed the kctx
+ * initially notified contribution amount:
+ * info->nr_est_unused_pages.
+ */
+ u32 rm_cnt = MIN(info->nr_est_unused_pages - info->nr_freed_pages,
+ freed_pages);
+
+ WARN_ON(atomic_sub_return(rm_cnt, &mgr->unused_pages) < 0);
+
+ /* tracking the freed pages, before a potential detach call */
+ info->nr_freed_pages += freed_pages;
+ total_freed_pages += freed_pages;
+
+ schedule_work(&kctx->jit_work);
+ }
+
+ /* If the kctx can't offer anymore, drop it from the reclaim manger,
+ * otherwise leave it remaining in. If the kctx changes its state (i.e.
+ * some CSGs becoming on-slot), the scheduler will pull it out.
+ */
+ if (info->nr_freed_pages >= info->nr_est_unused_pages || freed_pages == 0)
+ detach_ctx_from_heap_reclaim_mgr(kctx);
+
+ cnt_ctxs++;
+
+ /* Enough has been freed, break to avoid holding the lock too long */
+ if (total_freed_pages >= HEAP_RECLAIM_SCAN_BATCH_SIZE)
+ break;
+ }
+
+ dev_dbg(kbdev->dev, "Reclaim free heap pages: %lu (cnt_ctxs: %u, prio: %d)",
+ total_freed_pages, cnt_ctxs, prio);
+ }
+
+ dev_dbg(kbdev->dev, "Reclaim free total heap pages: %lu (across all CSG priority)",
+ total_freed_pages);
+
+ return total_freed_pages;
+}
+
+static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_device *kbdev,
+ struct shrink_control *sc)
+{
+ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
+ unsigned long page_cnt = atomic_read(&mgr->unused_pages);
+
+ dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt);
+
+ return page_cnt;
+}
+
+static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_device *kbdev,
+ struct shrink_control *sc)
+{
+ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
+ unsigned long freed = 0;
+ unsigned long avail = 0;
+
+ /* If Scheduler is busy in action, return 0 */
+ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ /* Wait for roughly 2-ms */
+ wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY),
+ msecs_to_jiffies(2));
+ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
+ dev_dbg(kbdev->dev, "Tiler heap reclaim scan see device busy (freed: 0)");
+ return 0;
+ }
+ }
+
+ avail = atomic_read(&mgr->unused_pages);
+ if (avail)
+ freed = reclaim_unused_heap_pages(kbdev);
+
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+
+#if (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE)
+ if (freed > sc->nr_to_scan)
+ sc->nr_scanned = freed;
+#endif /* (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) */
+
+ dev_info(kbdev->dev, "Tiler heap reclaim scan freed pages: %lu (unused: %lu)", freed,
+ avail);
+
+ /* On estimate suggesting available, yet actual free failed, return STOP */
+ if (avail && !freed)
+ return SHRINK_STOP;
+ else
+ return freed;
+}
+
+static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ struct kbase_device *kbdev =
+ container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
+
+ return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc);
+}
+
+static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ struct kbase_device *kbdev =
+ container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
+
+ return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc);
+}
+
+void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx)
+{
+ /* Per-kctx heap_info object initialization */
+ memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info));
+ INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
+}
+
+void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim;
+ u8 prio;
+
+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
+ prio++)
+ INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]);
+
+ atomic_set(&scheduler->reclaim_mgr.unused_pages, 0);
+
+ reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects;
+ reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects;
+ reclaim->seeks = HEAP_SHRINKER_SEEKS;
+ reclaim->batch = HEAP_SHRINKER_BATCH;
+
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
+ register_shrinker(reclaim);
+#else
+ register_shrinker(reclaim, "mali-csf-tiler-heap");
+#endif
+#endif
+}
+
+void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u8 prio;
+
+#if !defined(CONFIG_MALI_VECTOR_DUMP)
+ unregister_shrinker(&scheduler->reclaim_mgr.heap_reclaim);
+#endif
+
+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
+ prio++)
+ WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio]));
+
+ WARN_ON(atomic_read(&scheduler->reclaim_mgr.unused_pages));
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h
new file mode 100644
index 0000000..b6e580e
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_TILER_HEAP_RECLAIM_H_
+#define _KBASE_CSF_TILER_HEAP_RECLAIM_H_
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_active - Notifier function for the scheduler
+ * to use when a group is put on-slot.
+ *
+ * @group: Pointer to the group object that has been placed on-slot for running.
+ *
+ */
+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict - Notifier function for the scheduler
+ * to use when a group is evicted out of the schedulder's scope, i.e no run of
+ * the group is possible afterwards.
+ *
+ * @group: Pointer to the group object that has been evicted.
+ *
+ */
+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend - Notifier function for the scheduler
+ * to use when a group is suspended from running, but could resume in future.
+ *
+ * @group: Pointer to the group object that is in suspended state.
+ *
+ */
+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use
+ * with the tiler heap reclaim manager.
+ *
+ * @kctx: Pointer to the kbase_context.
+ *
+ */
+void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ */
+void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ */
+void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev);
+
+#endif
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 27677ba..910ba22 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,7 @@
#include "mali_kbase_pm.h"
#include "mali_kbase_hwaccess_time.h"
-#include <linux/gcd.h>
#include <linux/math64.h>
-#include <asm/arch_timer.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "tl/mali_kbase_timeline_priv.h"
@@ -89,93 +87,15 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,
kbase_csf_tl_debugfs_poll_interval_read,
kbase_csf_tl_debugfs_poll_interval_write, "%llu\n");
-
void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
kbdev->debugfs_instr_directory, kbdev,
&kbase_csf_tl_poll_interval_fops);
-
}
#endif
/**
- * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
- *
- * @kbdev: Kbase device.
- * @cpu_ts: Output CPU timestamp.
- * @gpu_ts: Output GPU timestamp.
- * @gpu_cycle: Output GPU cycle counts.
- */
-static void get_cpu_gpu_time(
- struct kbase_device *kbdev,
- u64 *cpu_ts,
- u64 *gpu_ts,
- u64 *gpu_cycle)
-{
- struct timespec64 ts;
-
- kbase_pm_context_active(kbdev);
- kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
- kbase_pm_context_idle(kbdev);
-
- if (cpu_ts)
- *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
-}
-
-
-/**
- * kbase_ts_converter_init() - Initialize system timestamp converter.
- *
- * @self: System Timestamp Converter instance.
- * @kbdev: Kbase device pointer
- *
- * Return: Zero on success, -1 otherwise.
- */
-static int kbase_ts_converter_init(
- struct kbase_ts_converter *self,
- struct kbase_device *kbdev)
-{
- u64 cpu_ts = 0;
- u64 gpu_ts = 0;
- u64 freq;
- u64 common_factor;
-
- get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
- freq = arch_timer_get_cntfrq();
-
- if (!freq) {
- dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
- return -1;
- }
-
- common_factor = gcd(NSEC_PER_SEC, freq);
-
- self->multiplier = div64_u64(NSEC_PER_SEC, common_factor);
- self->divisor = div64_u64(freq, common_factor);
- self->offset =
- cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor);
-
- return 0;
-}
-
-/**
- * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp.
- *
- * @self: System Timestamp Converter instance.
- * @gpu_ts: System timestamp value to converter.
- *
- * Return: The CPU timestamp.
- */
-static void __maybe_unused
-kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts)
-{
- u64 old_gpu_ts = *gpu_ts;
- *gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) +
- self->offset;
-}
-
-/**
* tl_reader_overflow_notify() - Emit stream overflow tracepoint.
*
* @self: CSFFW TL Reader instance.
@@ -251,7 +171,6 @@ static void tl_reader_reset(struct kbase_csf_tl_reader *self)
self->tl_header.btc = 0;
}
-
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
int ret = 0;
@@ -276,7 +195,6 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
return -EBUSY;
}
-
/* Copying the whole buffer in a single shot. We assume
* that the buffer will not contain partially written messages.
*/
@@ -327,8 +245,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
struct kbase_csffw_tl_message *msg =
(struct kbase_csffw_tl_message *) csffw_data_it;
- kbase_ts_converter_convert(&self->ts_converter,
- &msg->timestamp);
+ msg->timestamp =
+ kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp);
}
/* Copy the message out to the tl_stream. */
@@ -402,9 +320,6 @@ static int tl_reader_init_late(
return -1;
}
- if (kbase_ts_converter_init(&self->ts_converter, kbdev))
- return -1;
-
self->kbdev = kbdev;
self->trace_buffer = tb;
self->tl_header.data = hdr;
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
index d554d56..12b285f 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,37 +40,6 @@ struct kbase_tlstream;
struct kbase_device;
/**
- * struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
- *
- * @multiplier: Numerator of the converter's fraction.
- * @divisor: Denominator of the converter's fraction.
- * @offset: Converter's offset term.
- *
- * According to Generic timer spec, system timer:
- * - Increments at a fixed frequency
- * - Starts operating from zero
- *
- * Hence CPU time is a linear function of System Time.
- *
- * CPU_ts = alpha * SYS_ts + beta
- *
- * Where
- * - alpha = 10^9/SYS_ts_freq
- * - beta is calculated by two timer samples taken at the same time:
- * beta = CPU_ts_s - SYS_ts_s * alpha
- *
- * Since alpha is a rational number, we minimizing possible
- * rounding error by simplifying the ratio. Thus alpha is stored
- * as a simple `multiplier / divisor` ratio.
- *
- */
-struct kbase_ts_converter {
- u64 multiplier;
- u64 divisor;
- s64 offset;
-};
-
-/**
* struct kbase_csf_tl_reader - CSFFW timeline reader state.
*
* @read_timer: Timer used for periodical tracebufer reading.
@@ -106,7 +75,6 @@ struct kbase_csf_tl_reader {
size_t size;
size_t btc;
} tl_header;
- struct kbase_ts_converter ts_converter;
bool got_first_event;
bool is_active;
diff --git a/mali_kbase/csf/mali_kbase_debug_csf_fault.c b/mali_kbase/csf/mali_kbase_debug_csf_fault.c
new file mode 100644
index 0000000..185779c
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_debug_csf_fault.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+/**
+ * kbasep_fault_occurred - Check if fault occurred.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if a fault occurred.
+ */
+static bool kbasep_fault_occurred(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
+ ret = (kbdev->csf.dof.error_code != DF_NO_ERROR);
+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
+
+ return ret;
+}
+
+void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev)
+{
+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) {
+ dev_dbg(kbdev->dev, "No userspace client for dumping exists");
+ return;
+ }
+
+ wait_event(kbdev->csf.dof.dump_wait_wq, kbase_debug_csf_fault_dump_complete(kbdev));
+}
+KBASE_EXPORT_TEST_API(kbase_debug_csf_fault_wait_completion);
+
+/**
+ * kbase_debug_csf_fault_wakeup - Wake up a waiting user space client.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev)
+{
+ wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq);
+}
+
+bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
+ struct kbase_context *kctx, enum dumpfault_error_type error)
+{
+ unsigned long flags;
+
+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
+ return false;
+
+ if (WARN_ON(error == DF_NO_ERROR))
+ return false;
+
+ if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) {
+ dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported",
+ kctx->tgid, kctx->id, error);
+ kctx = NULL;
+ }
+
+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
+
+ /* Only one fault at a time can be processed */
+ if (kbdev->csf.dof.error_code) {
+ dev_info(kbdev->dev, "skip this fault as there's a pending fault");
+ goto unlock;
+ }
+
+ kbdev->csf.dof.kctx_tgid = kctx ? kctx->tgid : 0;
+ kbdev->csf.dof.kctx_id = kctx ? kctx->id : 0;
+ kbdev->csf.dof.error_code = error;
+ kbase_debug_csf_fault_wakeup(kbdev);
+
+unlock:
+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
+ return true;
+}
+
+static ssize_t debug_csf_fault_read(struct file *file, char __user *buffer, size_t size,
+ loff_t *f_pos)
+{
+#define BUF_SIZE 64
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ int count;
+ char buf[BUF_SIZE];
+ u32 tgid, ctx_id;
+ enum dumpfault_error_type error_code;
+
+ if (unlikely(!file)) {
+ pr_warn("%s: file is NULL", __func__);
+ return -EINVAL;
+ }
+
+ kbdev = file->private_data;
+ if (unlikely(!buffer)) {
+ dev_warn(kbdev->dev, "%s: buffer is NULL", __func__);
+ return -EINVAL;
+ }
+
+ if (unlikely(*f_pos < 0)) {
+ dev_warn(kbdev->dev, "%s: f_pos is negative", __func__);
+ return -EINVAL;
+ }
+
+ if (size < sizeof(buf)) {
+ dev_warn(kbdev->dev, "%s: buffer is too small", __func__);
+ return -EINVAL;
+ }
+
+ if (wait_event_interruptible(kbdev->csf.dof.fault_wait_wq, kbasep_fault_occurred(kbdev)))
+ return -ERESTARTSYS;
+
+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
+ tgid = kbdev->csf.dof.kctx_tgid;
+ ctx_id = kbdev->csf.dof.kctx_id;
+ error_code = kbdev->csf.dof.error_code;
+ BUILD_BUG_ON(sizeof(buf) < (sizeof(tgid) + sizeof(ctx_id) + sizeof(error_code)));
+ count = scnprintf(buf, sizeof(buf), "%u_%u_%u\n", tgid, ctx_id, error_code);
+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
+
+ dev_info(kbdev->dev, "debug csf fault info read");
+ return simple_read_from_buffer(buffer, size, f_pos, buf, count);
+}
+
+static int debug_csf_fault_open(struct inode *in, struct file *file)
+{
+ struct kbase_device *kbdev;
+
+ if (unlikely(!in)) {
+ pr_warn("%s: inode is NULL", __func__);
+ return -EINVAL;
+ }
+
+ kbdev = in->i_private;
+ if (unlikely(!file)) {
+ dev_warn(kbdev->dev, "%s: file is NULL", __func__);
+ return -EINVAL;
+ }
+
+ if (atomic_cmpxchg(&kbdev->csf.dof.enabled, 0, 1) == 1) {
+ dev_warn(kbdev->dev, "Only one client is allowed for dump on fault");
+ return -EBUSY;
+ }
+
+ dev_info(kbdev->dev, "debug csf fault file open");
+
+ return simple_open(in, file);
+}
+
+static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, size_t count,
+ loff_t *ppos)
+{
+ struct kbase_device *kbdev;
+ unsigned long flags;
+
+ if (unlikely(!file)) {
+ pr_warn("%s: file is NULL", __func__);
+ return -EINVAL;
+ }
+
+ kbdev = file->private_data;
+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
+ kbdev->csf.dof.error_code = DF_NO_ERROR;
+ kbdev->csf.dof.kctx_tgid = 0;
+ kbdev->csf.dof.kctx_id = 0;
+ dev_info(kbdev->dev, "debug csf fault dump complete");
+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
+
+ /* User space finished the dump.
+ * Wake up blocked kernel threads to proceed.
+ */
+ wake_up(&kbdev->csf.dof.dump_wait_wq);
+
+ return count;
+}
+
+static int debug_csf_fault_release(struct inode *in, struct file *file)
+{
+ struct kbase_device *kbdev;
+ unsigned long flags;
+
+ if (unlikely(!in)) {
+ pr_warn("%s: inode is NULL", __func__);
+ return -EINVAL;
+ }
+
+ kbdev = in->i_private;
+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
+ kbdev->csf.dof.kctx_tgid = 0;
+ kbdev->csf.dof.kctx_id = 0;
+ kbdev->csf.dof.error_code = DF_NO_ERROR;
+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
+
+ atomic_set(&kbdev->csf.dof.enabled, 0);
+ dev_info(kbdev->dev, "debug csf fault file close");
+
+ /* User space closed the debugfs file.
+ * Wake up blocked kernel threads to resume.
+ */
+ wake_up(&kbdev->csf.dof.dump_wait_wq);
+
+ return 0;
+}
+
+static const struct file_operations kbasep_debug_csf_fault_fops = {
+ .owner = THIS_MODULE,
+ .open = debug_csf_fault_open,
+ .read = debug_csf_fault_read,
+ .write = debug_csf_fault_write,
+ .llseek = default_llseek,
+ .release = debug_csf_fault_release,
+};
+
+void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev)
+{
+ const char *fname = "csf_fault";
+
+ if (unlikely(!kbdev)) {
+ pr_warn("%s: kbdev is NULL", __func__);
+ return;
+ }
+
+ debugfs_create_file(fname, 0600, kbdev->mali_debugfs_directory, kbdev,
+ &kbasep_debug_csf_fault_fops);
+}
+
+int kbase_debug_csf_fault_init(struct kbase_device *kbdev)
+{
+ if (unlikely(!kbdev)) {
+ pr_warn("%s: kbdev is NULL", __func__);
+ return -EINVAL;
+ }
+
+ init_waitqueue_head(&(kbdev->csf.dof.fault_wait_wq));
+ init_waitqueue_head(&(kbdev->csf.dof.dump_wait_wq));
+ spin_lock_init(&kbdev->csf.dof.lock);
+ kbdev->csf.dof.kctx_tgid = 0;
+ kbdev->csf.dof.kctx_id = 0;
+ kbdev->csf.dof.error_code = DF_NO_ERROR;
+ atomic_set(&kbdev->csf.dof.enabled, 0);
+
+ return 0;
+}
+
+void kbase_debug_csf_fault_term(struct kbase_device *kbdev)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/csf/mali_kbase_debug_csf_fault.h b/mali_kbase/csf/mali_kbase_debug_csf_fault.h
new file mode 100644
index 0000000..6e9b1a9
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_debug_csf_fault.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DEBUG_CSF_FAULT_H
+#define _KBASE_DEBUG_CSF_FAULT_H
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+/**
+ * kbase_debug_csf_fault_debugfs_init - Initialize CSF fault debugfs
+ * @kbdev: Device pointer
+ */
+void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_csf_fault_init - Create the fault event wait queue per device
+ * and initialize the required resources.
+ * @kbdev: Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_csf_fault_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_csf_fault_term - Clean up resources created by
+ * @kbase_debug_csf_fault_init.
+ * @kbdev: Device pointer
+ */
+void kbase_debug_csf_fault_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_csf_fault_wait_completion - Wait for the client to complete.
+ *
+ * @kbdev: Device Pointer
+ *
+ * Wait for the user space client to finish reading the fault information.
+ * This function must be called in thread context.
+ */
+void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_csf_fault_notify - Notify client of a fault.
+ *
+ * @kbdev: Device pointer
+ * @kctx: Faulty context (can be NULL)
+ * @error: Error code.
+ *
+ * Store fault information and wake up the user space client.
+ *
+ * Return: true if a dump on fault was initiated or was is in progress and
+ * so caller can opt to wait for the dumping to complete.
+ */
+bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
+ struct kbase_context *kctx, enum dumpfault_error_type error);
+
+/**
+ * kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if debugfs file is opened so dump on fault is enabled.
+ */
+static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev)
+{
+ return atomic_read(&kbdev->csf.dof.enabled);
+}
+
+/**
+ * kbase_debug_csf_fault_dump_complete - Check if dump on fault is completed.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if dump on fault completes or file is closed.
+ */
+static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ bool ret;
+
+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
+ return true;
+
+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
+ ret = (kbdev->csf.dof.error_code == DF_NO_ERROR);
+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
+
+ return ret;
+}
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_debug_csf_fault_init(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
+static inline void kbase_debug_csf_fault_term(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev)
+{
+}
+
+static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
+ struct kbase_context *kctx, enum dumpfault_error_type error)
+{
+ return false;
+}
+
+static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev)
+{
+ return false;
+}
+
+static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev)
+{
+ return true;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /*_KBASE_DEBUG_CSF_FAULT_H*/
diff --git a/mali_kbase/debug/Kbuild b/mali_kbase/debug/Kbuild
index 1682c0f..8beee2d 100644
--- a/mali_kbase/debug/Kbuild
+++ b/mali_kbase/debug/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@ mali_kbase-y += debug/mali_kbase_debug_ktrace.o
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o
+ mali_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o
else
mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o
endif
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c b/mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c
new file mode 100644
index 0000000..ff5f947
--- /dev/null
+++ b/mali_kbase/debug/backend/mali_kbase_debug_coresight_csf.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/slab.h>
+#include <csf/mali_kbase_csf_registers.h>
+#include <csf/mali_kbase_csf_firmware.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
+
+static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state)
+{
+ switch (state) {
+ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
+ return "DISABLED";
+ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
+ return "ENABLED";
+ default:
+ break;
+ }
+
+ return "UNKNOWN";
+}
+
+static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client,
+ struct kbase_device *kbdev, u32 reg_addr, u8 op_type)
+{
+ int i;
+
+ if (reg_addr & 0x3) {
+ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned",
+ op_type, reg_addr);
+ return false;
+ }
+
+ for (i = 0; i < client->nr_ranges; i++) {
+ struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i];
+
+ if ((range->start <= reg_addr) && (reg_addr <= range->end))
+ return true;
+ }
+
+ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type,
+ reg_addr);
+
+ return false;
+}
+
+static bool validate_op(struct kbase_debug_coresight_csf_client *client,
+ struct kbase_debug_coresight_csf_op *op)
+{
+ struct kbase_device *kbdev;
+ u32 reg;
+
+ if (!op)
+ return false;
+
+ if (!client)
+ return false;
+
+ kbdev = (struct kbase_device *)client->drv_data;
+
+ switch (op->type) {
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
+ return true;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
+ if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
+ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
+ reg += sizeof(u32)) {
+ if (!validate_reg_addr(client, kbdev, reg, op->type))
+ return false;
+ }
+
+ return true;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
+ if (!op->op.write.ptr) {
+ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
+ break;
+ }
+
+ if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
+ if (!op->op.read.ptr) {
+ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
+ break;
+ }
+
+ if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
+ if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type))
+ return true;
+
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
+ fallthrough;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
+ fallthrough;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
+ fallthrough;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
+ if (op->op.bitw.ptr != NULL)
+ return true;
+
+ dev_err(kbdev->dev, "Invalid bitwise operation pointer");
+
+ break;
+ default:
+ dev_err(kbdev->dev, "Invalid operation %d", op->type);
+ break;
+ }
+
+ return false;
+}
+
+static bool validate_seq(struct kbase_debug_coresight_csf_client *client,
+ struct kbase_debug_coresight_csf_sequence *seq)
+{
+ struct kbase_debug_coresight_csf_op *ops = seq->ops;
+ int nr_ops = seq->nr_ops;
+ int i;
+
+ for (i = 0; i < nr_ops; i++) {
+ if (!validate_op(client, &ops[i]))
+ return false;
+ }
+
+ return true;
+}
+
+static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op)
+{
+ int result = -EINVAL;
+ u32 reg;
+
+ dev_dbg(kbdev->dev, "Execute operation %d", op->type);
+
+ switch (op->type) {
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
+ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
+ op->op.write_imm.val);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
+ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
+ reg += sizeof(u32)) {
+ result = kbase_csf_firmware_mcu_register_write(kbdev, reg,
+ op->op.write_imm_range.val);
+ if (!result)
+ break;
+ }
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
+ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
+ *op->op.write.ptr);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
+ result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr,
+ op->op.read.ptr);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
+ result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr,
+ op->op.poll.mask, op->op.poll.val);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
+ *op->op.bitw.ptr &= op->op.bitw.val;
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
+ *op->op.bitw.ptr |= op->op.bitw.val;
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
+ *op->op.bitw.ptr ^= op->op.bitw.val;
+ result = 0;
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
+ *op->op.bitw.ptr = ~(*op->op.bitw.ptr);
+ result = 0;
+ break;
+ default:
+ dev_err(kbdev->dev, "Invalid operation %d", op->type);
+ break;
+ }
+
+ return result;
+}
+
+static int coresight_config_enable(struct kbase_device *kbdev,
+ struct kbase_debug_coresight_csf_config *config)
+{
+ int ret = 0;
+ int i;
+
+ if (!config)
+ return -EINVAL;
+
+ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED)
+ return ret;
+
+ for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++)
+ ret = execute_op(kbdev, &config->enable_seq->ops[i]);
+
+ if (!ret) {
+ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
+ coresight_state_to_string(config->state),
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
+ config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED;
+ }
+
+ /* Always assign the return code during config enable.
+ * It gets propagated when calling config disable.
+ */
+ config->error = ret;
+
+ return ret;
+}
+
+static int coresight_config_disable(struct kbase_device *kbdev,
+ struct kbase_debug_coresight_csf_config *config)
+{
+ int ret = 0;
+ int i;
+
+ if (!config)
+ return -EINVAL;
+
+ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED)
+ return ret;
+
+ for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++)
+ ret = execute_op(kbdev, &config->disable_seq->ops[i]);
+
+ if (!ret) {
+ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
+ coresight_state_to_string(config->state),
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
+ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
+ } else {
+ /* Only assign the error if ret is not 0.
+ * As we don't want to overwrite an error from config enable
+ */
+ if (!config->error)
+ config->error = ret;
+ }
+
+ return ret;
+}
+
+void *kbase_debug_coresight_csf_register(void *drv_data,
+ struct kbase_debug_coresight_csf_address_range *ranges,
+ int nr_ranges)
+{
+ struct kbase_debug_coresight_csf_client *client, *client_entry;
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ int k;
+
+ if (unlikely(!drv_data)) {
+ pr_err("NULL drv_data");
+ return NULL;
+ }
+
+ kbdev = (struct kbase_device *)drv_data;
+
+ if (unlikely(!ranges)) {
+ dev_err(kbdev->dev, "NULL ranges");
+ return NULL;
+ }
+
+ if (unlikely(!nr_ranges)) {
+ dev_err(kbdev->dev, "nr_ranges is 0");
+ return NULL;
+ }
+
+ for (k = 0; k < nr_ranges; k++) {
+ if (ranges[k].end < ranges[k].start) {
+ dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x",
+ ranges[k].start, ranges[k].end);
+ return NULL;
+ }
+ }
+
+ client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL);
+
+ if (!client)
+ return NULL;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) {
+ struct kbase_debug_coresight_csf_address_range *client_ranges =
+ client_entry->addr_ranges;
+ int i;
+
+ for (i = 0; i < client_entry->nr_ranges; i++) {
+ int j;
+
+ for (j = 0; j < nr_ranges; j++) {
+ if ((ranges[j].start < client_ranges[i].end) &&
+ (client_ranges[i].start < ranges[j].end)) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ kfree(client);
+ dev_err(kbdev->dev,
+ "Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x",
+ client_ranges[i].start, client_ranges[i].end,
+ ranges[j].start, ranges[j].end);
+
+ return NULL;
+ }
+ }
+ }
+ }
+
+ client->drv_data = drv_data;
+ client->addr_ranges = ranges;
+ client->nr_ranges = nr_ranges;
+ list_add(&client->link, &kbdev->csf.coresight.clients);
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ return client;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_register);
+
+void kbase_debug_coresight_csf_unregister(void *client_data)
+{
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_debug_coresight_csf_config *config_entry;
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ bool retry = true;
+
+ if (unlikely(!client_data)) {
+ pr_err("NULL client");
+ return;
+ }
+
+ client = (struct kbase_debug_coresight_csf_client *)client_data;
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return;
+ }
+
+ /* check for active config from client */
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_del_init(&client->link);
+
+ while (retry && !list_empty(&kbdev->csf.coresight.configs)) {
+ retry = false;
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (config_entry->client == client) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ kbase_debug_coresight_csf_config_free(config_entry);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ retry = true;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ kfree(client);
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister);
+
+void *
+kbase_debug_coresight_csf_config_create(void *client_data,
+ struct kbase_debug_coresight_csf_sequence *enable_seq,
+ struct kbase_debug_coresight_csf_sequence *disable_seq)
+{
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_debug_coresight_csf_config *config;
+ struct kbase_device *kbdev;
+
+ if (unlikely(!client_data)) {
+ pr_err("NULL client");
+ return NULL;
+ }
+
+ client = (struct kbase_debug_coresight_csf_client *)client_data;
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return NULL;
+ }
+
+ if (enable_seq) {
+ if (!validate_seq(client, enable_seq)) {
+ dev_err(kbdev->dev, "Invalid enable_seq");
+ return NULL;
+ }
+ }
+
+ if (disable_seq) {
+ if (!validate_seq(client, disable_seq)) {
+ dev_err(kbdev->dev, "Invalid disable_seq");
+ return NULL;
+ }
+ }
+
+ config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL);
+ if (WARN_ON(!client))
+ return NULL;
+
+ config->client = client;
+ config->enable_seq = enable_seq;
+ config->disable_seq = disable_seq;
+ config->error = 0;
+ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
+
+ INIT_LIST_HEAD(&config->link);
+
+ return config;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create);
+
+void kbase_debug_coresight_csf_config_free(void *config_data)
+{
+ struct kbase_debug_coresight_csf_config *config;
+
+ if (unlikely(!config_data)) {
+ pr_err("NULL config");
+ return;
+ }
+
+ config = (struct kbase_debug_coresight_csf_config *)config_data;
+
+ kbase_debug_coresight_csf_config_disable(config);
+
+ kfree(config);
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free);
+
+int kbase_debug_coresight_csf_config_enable(void *config_data)
+{
+ struct kbase_debug_coresight_csf_config *config;
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_device *kbdev;
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+ int ret = 0;
+
+ if (unlikely(!config_data)) {
+ pr_err("NULL config");
+ return -EINVAL;
+ }
+
+ config = (struct kbase_debug_coresight_csf_config *)config_data;
+ client = (struct kbase_debug_coresight_csf_client *)config->client;
+
+ if (unlikely(!client)) {
+ pr_err("NULL client in config");
+ return -EINVAL;
+ }
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return -EINVAL;
+ }
+
+ /* Check to prevent double entry of config */
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (config_entry == config) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ dev_err(kbdev->dev, "Config already enabled");
+ return -EINVAL;
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Check the state of Scheduler to confirm the desired state of MCU */
+ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
+ (kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
+ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
+ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ /* Wait for MCU to reach the stable ON state */
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+
+ if (ret)
+ dev_err(kbdev->dev,
+ "Wait for PM state failed when enabling coresight config");
+ else
+ ret = coresight_config_enable(kbdev, config);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ }
+
+ /* Add config to next enable sequence */
+ if (!ret) {
+ spin_lock(&kbdev->csf.coresight.lock);
+ list_add(&config->link, &kbdev->csf.coresight.configs);
+ spin_unlock(&kbdev->csf.coresight.lock);
+ }
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ kbase_csf_scheduler_unlock(kbdev);
+
+ return ret;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable);
+
+int kbase_debug_coresight_csf_config_disable(void *config_data)
+{
+ struct kbase_debug_coresight_csf_config *config;
+ struct kbase_debug_coresight_csf_client *client;
+ struct kbase_device *kbdev;
+ struct kbase_debug_coresight_csf_config *config_entry;
+ bool found_in_list = false;
+ unsigned long flags;
+ int ret = 0;
+
+ if (unlikely(!config_data)) {
+ pr_err("NULL config");
+ return -EINVAL;
+ }
+
+ config = (struct kbase_debug_coresight_csf_config *)config_data;
+
+ /* Exit early if not enabled prior */
+ if (list_empty(&config->link))
+ return ret;
+
+ client = (struct kbase_debug_coresight_csf_client *)config->client;
+
+ if (unlikely(!client)) {
+ pr_err("NULL client in config");
+ return -EINVAL;
+ }
+
+ kbdev = (struct kbase_device *)client->drv_data;
+ if (unlikely(!kbdev)) {
+ pr_err("NULL drv_data in client");
+ return -EINVAL;
+ }
+
+ /* Check if the config is in the correct list */
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (config_entry == config) {
+ found_in_list = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ if (!found_in_list) {
+ dev_err(kbdev->dev, "Config looks corrupted");
+ return -EINVAL;
+ }
+
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+ /* Check the state of Scheduler to confirm the desired state of MCU */
+ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
+ (kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
+ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
+ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ /* Wait for MCU to reach the stable ON state */
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+
+ if (ret)
+ dev_err(kbdev->dev,
+ "Wait for PM state failed when disabling coresight config");
+ else
+ ret = coresight_config_disable(kbdev, config);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ } else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) {
+ /* MCU is OFF, so the disable sequence was already executed.
+ *
+ * Propagate any error that would have occurred during the enable
+ * or disable sequence.
+ *
+ * This is done as part of the disable sequence, since the call from
+ * client is synchronous.
+ */
+ ret = config->error;
+ }
+
+ /* Remove config from next disable sequence */
+ spin_lock(&kbdev->csf.coresight.lock);
+ list_del_init(&config->link);
+ spin_unlock(&kbdev->csf.coresight.lock);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ kbase_csf_scheduler_unlock(kbdev);
+
+ return ret;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable);
+
+static void coresight_config_enable_all(struct work_struct *data)
+{
+ struct kbase_device *kbdev =
+ container_of(data, struct kbase_device, csf.coresight.enable_work);
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ if (coresight_config_enable(kbdev, config_entry))
+ dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ wake_up_all(&kbdev->csf.coresight.event_wait);
+}
+
+static void coresight_config_disable_all(struct work_struct *data)
+{
+ struct kbase_device *kbdev =
+ container_of(data, struct kbase_device, csf.coresight.disable_work);
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ if (coresight_config_disable(kbdev, config_entry))
+ dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ wake_up_all(&kbdev->csf.coresight.event_wait);
+}
+
+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter",
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ kbase_pm_lock(kbdev);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ kbdev->csf.coresight.disable_on_pmode_enter = true;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+ kbase_pm_update_state(kbdev);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ kbase_pm_wait_for_desired_state(kbdev);
+
+ kbase_pm_unlock(kbdev);
+}
+
+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit",
+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter);
+
+ kbdev->csf.coresight.enable_on_pmode_exit = true;
+ kbase_pm_update_state(kbdev);
+}
+
+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state)
+{
+ if (unlikely(!kbdev))
+ return;
+
+ if (unlikely(!kbdev->csf.coresight.workq))
+ return;
+
+ dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state));
+
+ switch (state) {
+ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
+ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work);
+ break;
+ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
+ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work);
+ break;
+ default:
+ dev_err(kbdev->dev, "Invalid Coresight state %d", state);
+ break;
+ }
+}
+
+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state)
+{
+ struct kbase_debug_coresight_csf_config *config_entry;
+ unsigned long flags;
+ bool success = true;
+
+ dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state));
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+ if (state != config_entry->state) {
+ success = false;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ return success;
+}
+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check);
+
+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state)
+{
+ const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
+ unsigned long flags;
+ bool success = true;
+
+ dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state));
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
+ link) {
+ const enum kbase_debug_coresight_csf_state prev_state = config_entry->state;
+ long remaining;
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+ remaining = wait_event_timeout(kbdev->csf.coresight.event_wait,
+ state == config_entry->state, wait_timeout);
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ if (!remaining) {
+ success = false;
+ dev_err(kbdev->dev,
+ "Timeout waiting for Coresight state transition %s to %s",
+ coresight_state_to_string(prev_state),
+ coresight_state_to_string(state));
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+ return success;
+}
+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait);
+
+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev)
+{
+ kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0);
+ if (kbdev->csf.coresight.workq == NULL)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&kbdev->csf.coresight.clients);
+ INIT_LIST_HEAD(&kbdev->csf.coresight.configs);
+ INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all);
+ INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all);
+ init_waitqueue_head(&kbdev->csf.coresight.event_wait);
+ spin_lock_init(&kbdev->csf.coresight.lock);
+
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+
+ return 0;
+}
+
+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev)
+{
+ struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry;
+ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
+ unsigned long flags;
+
+ kbdev->csf.coresight.disable_on_pmode_enter = false;
+ kbdev->csf.coresight.enable_on_pmode_exit = false;
+
+ cancel_work_sync(&kbdev->csf.coresight.enable_work);
+ cancel_work_sync(&kbdev->csf.coresight.disable_work);
+ destroy_workqueue(kbdev->csf.coresight.workq);
+ kbdev->csf.coresight.workq = NULL;
+
+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
+ link) {
+ list_del_init(&config_entry->link);
+ kfree(config_entry);
+ }
+
+ list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients,
+ link) {
+ list_del_init(&client_entry->link);
+ kfree(client_entry);
+ }
+
+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+}
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h
new file mode 100644
index 0000000..06d62dc
--- /dev/null
+++ b/mali_kbase/debug/backend/mali_kbase_debug_coresight_internal_csf.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
+#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
+
+#include <mali_kbase.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+
+/**
+ * struct kbase_debug_coresight_csf_client - Coresight client definition
+ *
+ * @drv_data: Pointer to driver device data.
+ * @addr_ranges: Arrays of address ranges used by the registered client.
+ * @nr_ranges: Size of @addr_ranges array.
+ * @link: Link item of a Coresight client.
+ * Linked to &struct_kbase_device.csf.coresight.clients.
+ */
+struct kbase_debug_coresight_csf_client {
+ void *drv_data;
+ struct kbase_debug_coresight_csf_address_range *addr_ranges;
+ u32 nr_ranges;
+ struct list_head link;
+};
+
+/**
+ * enum kbase_debug_coresight_csf_state - Coresight configuration states
+ *
+ * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled.
+ * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled.
+ */
+enum kbase_debug_coresight_csf_state {
+ KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0,
+ KBASE_DEBUG_CORESIGHT_CSF_ENABLED,
+};
+
+/**
+ * struct kbase_debug_coresight_csf_config - Coresight configuration definition
+ *
+ * @client: Pointer to the client for which the configuration is created.
+ * @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL.
+ * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL.
+ * @state: Current Coresight configuration state.
+ * @error: Error code used to know if an error occurred during the execution
+ * of the enable or disable sequences.
+ * @link: Link item of a Coresight configuration.
+ * Linked to &struct_kbase_device.csf.coresight.configs.
+ */
+struct kbase_debug_coresight_csf_config {
+ void *client;
+ struct kbase_debug_coresight_csf_sequence *enable_seq;
+ struct kbase_debug_coresight_csf_sequence *disable_seq;
+ enum kbase_debug_coresight_csf_state state;
+ int error;
+ struct list_head link;
+};
+
+/**
+ * struct kbase_debug_coresight_device - Object representing the Coresight device
+ *
+ * @clients: List head to maintain Coresight clients.
+ * @configs: List head to maintain Coresight configs.
+ * @lock: A lock to protect client/config lists.
+ * Lists can be accessed concurrently by
+ * Coresight kernel modules and kernel threads.
+ * @workq: Work queue for Coresight enable/disable execution.
+ * @enable_work: Work item used to enable Coresight.
+ * @disable_work: Work item used to disable Coresight.
+ * @event_wait: Wait queue for Coresight events.
+ * @enable_on_pmode_exit: Flag used by the PM state machine to
+ * identify if Coresight enable is needed.
+ * @disable_on_pmode_enter: Flag used by the PM state machine to
+ * identify if Coresight disable is needed.
+ */
+struct kbase_debug_coresight_device {
+ struct list_head clients;
+ struct list_head configs;
+ spinlock_t lock;
+ struct workqueue_struct *workq;
+ struct work_struct enable_work;
+ struct work_struct disable_work;
+ wait_queue_head_t event_wait;
+ bool enable_on_pmode_exit;
+ bool disable_on_pmode_enter;
+};
+
+/**
+ * kbase_debug_coresight_csf_init - Initialize Coresight resources.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called once at device initialization.
+ *
+ * Return: 0 on success.
+ */
+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_term - Terminate Coresight resources.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called at device termination to prevent any
+ * memory leaks if Coresight module would have been removed without calling
+ * kbasep_debug_coresight_csf_trace_disable().
+ */
+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected
+ * mode enter.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called just before requesting to enter protected mode.
+ * It will trigger a PM state machine transition from MCU_ON
+ * to ON_PMODE_ENTER_CORESIGHT_DISABLE.
+ */
+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected
+ * mode enter.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called after protected mode exit is acknowledged.
+ * It will trigger a PM state machine transition from MCU_ON
+ * to ON_PMODE_EXIT_CORESIGHT_ENABLE.
+ */
+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_state_request - Request Coresight state transition.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @state: Coresight state to check for.
+ */
+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state);
+
+/**
+ * kbase_debug_coresight_csf_state_check - Check Coresight state.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @state: Coresight state to check for.
+ *
+ * Return: true if all states of configs are @state.
+ */
+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state);
+
+/**
+ * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @state: Coresight state to wait for.
+ *
+ * Return: true if all configs become @state in pre-defined time period.
+ */
+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
+ enum kbase_debug_coresight_csf_state state);
+
+#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
index bd40baa..87e13e5 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
@@ -42,19 +42,25 @@ int dummy_array[] = {
/*
* Generic CSF events
*/
+ /* info_val = 0 */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START),
+ /* info_val == number of CSGs supported */
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END),
/* info_val[0:7] == fw version_minor
* info_val[15:8] == fw version_major
* info_val[63:32] == fw version_hash
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_INVOKE),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_INVOKE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END),
/* info_val == total number of runnable groups across all kctxs */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_END),
/* info_val = timeout in ms */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START),
/* info_val = remaining ms timeout, or 0 if timedout */
@@ -101,6 +107,8 @@ int dummy_array[] = {
* purpose.
*/
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END),
+
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP),
@@ -126,6 +134,8 @@ int dummy_array[] = {
* group->csg_nr indicates which bit was set
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NO_NON_IDLE_GROUPS),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NON_IDLE_GROUPS),
/* info_val = scheduler's new csg_slots_idle_mask[0]
* group->csg_nr indicates which bit was cleared
*
@@ -190,10 +200,37 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC),
/* info_val == new count of off-slot non-idle groups */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC),
+ /* info_val = scheduler's new csg_slots_idle_mask[0]
+ * group->csg_nr indicates which bit was set
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HANDLE_IDLE_SLOTS),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END),
+ /* info_val = scheduler state */
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED),
+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING),
+
+ /* info_val = mcu state */
+#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n),
+#include "backend/gpu/mali_kbase_pm_mcu_states.h"
+#undef KBASEP_MCU_STATE
+
+ /* info_val = number of runnable groups */
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_INACTIVE),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_RUNNABLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_IDLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_IDLE),
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC),
+ /* info_val = new run state of the evicted group */
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_FAULT_EVICTED),
+ /* info_val = get the number of active CSGs */
+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_TERMINATED),
+
/*
* Group + Queue events
*/
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
index 914fcb5..e70a498 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
@@ -31,13 +31,17 @@
* Generic CSF events - using the common DEFINE_MALI_ADD_EVENT
*/
DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_END);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_INVOKE);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_INVOKE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT);
@@ -58,12 +62,20 @@ DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START);
+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP);
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
DEFINE_MALI_ADD_EVENT(SCHEDULER_ENTER_SC_RAIL);
DEFINE_MALI_ADD_EVENT(SCHEDULER_EXIT_SC_RAIL);
#endif
+DEFINE_MALI_ADD_EVENT(SCHED_BUSY);
+DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE);
+DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED);
+DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING);
+#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n);
+#include "backend/gpu/mali_kbase_pm_mcu_states.h"
+#undef KBASEP_MCU_STATE
DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
@@ -140,6 +152,8 @@ DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NO_NON_IDLE_GROUPS);
+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NON_IDLE_GROUPS);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE);
@@ -164,6 +178,7 @@ DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC);
+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_HANDLE_IDLE_SLOTS);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
@@ -171,6 +186,14 @@ DEFINE_MALI_CSF_GRP_EVENT(SC_RAIL_RECHECK_IDLE);
DEFINE_MALI_CSF_GRP_EVENT(SC_RAIL_RECHECK_NOT_IDLE);
DEFINE_MALI_CSF_GRP_EVENT(SC_RAIL_CAN_TURN_OFF);
#endif
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_INACTIVE);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_RUNNABLE);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_IDLE);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_IDLE);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_FAULT_EVICTED);
+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED);
#undef DEFINE_MALI_CSF_GRP_EVENT
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h b/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
index eeb7b87..e2a1e8c 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -142,6 +142,11 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK),
KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK),
+ /* info_val = l2 state */
+#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n),
+#include "backend/gpu/mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
+
/*
* Context Scheduler events
*/
diff --git a/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h b/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
index 52317ab..1b95306 100644
--- a/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
+++ b/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -98,6 +98,9 @@ DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ);
DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK);
DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK);
+#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n);
+#include "backend/gpu/mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK);
DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 51abad0..492684f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,16 +23,13 @@
#include <device/mali_kbase_device.h>
#include <mali_kbase_hwaccess_backend.h>
-#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
-#include <mali_kbase_hwcnt_watchdog_if_timer.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <csf/mali_kbase_csf.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>
-#endif
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -40,7 +37,7 @@
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
#include <csf/mali_kbase_csf_csg_debugfs.h>
-#include <mali_kbase_hwcnt_virtualizer.h>
+#include <hwcnt/mali_kbase_hwcnt_virtualizer.h>
#include <mali_kbase_kinstr_prfcnt.h>
#include <mali_kbase_vinstr.h>
#include <tl/mali_kbase_timeline.h>
@@ -92,13 +89,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_ipa_control_init(kbdev);
@@ -126,6 +123,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
+ err = kbase_backend_time_init(kbdev);
+ if (err)
+ goto fail_update_l2_features;
+
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
kbase_pm_context_idle(kbdev);
@@ -142,9 +143,9 @@ fail_pm_metrics_init:
kbase_ipa_control_term(kbdev);
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
fail_interrupt_test:
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
@@ -198,6 +199,7 @@ static int kbase_csf_early_init(struct kbase_device *kbdev)
static void kbase_csf_early_term(struct kbase_device *kbdev)
{
kbase_csf_scheduler_early_term(kbdev);
+ kbase_csf_firmware_early_term(kbdev);
}
/**
@@ -282,12 +284,15 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
}
static const struct kbase_device_init dev_init[] = {
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
-#else
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+ { kbase_gpu_device_create, kbase_gpu_device_destroy,
+ "Dummy model initialization failed" },
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
-#endif
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
{ power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
@@ -322,6 +327,8 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
{ kbase_csf_late_init, NULL, "Late CSF initialization failed" },
{ NULL, kbase_device_firmware_hwcnt_term, NULL },
+ { kbase_debug_csf_fault_init, kbase_debug_csf_fault_term,
+ "CSF fault debug initialization failed" },
{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
/* Sysfs init needs to happen before registering the device with
* misc_register(), otherwise it causes a race condition between
@@ -341,6 +348,10 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ { kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term,
+ "Coresight initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
};
static void kbase_device_term_partial(struct kbase_device *kbdev,
@@ -354,7 +365,6 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,
void kbase_device_term(struct kbase_device *kbdev)
{
- kbdev->csf.mali_file_inode = NULL;
kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
kbase_mem_halt(kbdev);
}
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index fcd0c50..5e27094 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -24,6 +24,7 @@
#include <backend/gpu/mali_kbase_instr_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <device/mali_kbase_device.h>
+#include <device/mali_kbase_device_internal.h>
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_ctx_sched.h>
@@ -146,6 +147,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
GPU_EXCEPTION_TYPE_SW_FAULT_0,
} } };
+ kbase_debug_csf_fault_notify(kbdev, scheduler->active_protm_grp->kctx,
+ DF_GPU_PROTECTED_FAULT);
+
scheduler->active_protm_grp->faulted = true;
kbase_csf_add_group_fatal_error(
scheduler->active_protm_grp, &err_payload);
@@ -177,9 +181,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-#ifdef CONFIG_MALI_DEBUG
- WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev));
-#endif
kbase_pm_disable_db_mirror_interrupt(kbdev);
kbdev->pm.backend.exit_gpu_sleep_mode = true;
kbase_csf_scheduler_invoke_tick(kbdev);
@@ -227,7 +228,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
}
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
-static bool kbase_is_register_accessible(u32 offset)
+bool kbase_is_register_accessible(u32 offset)
{
#ifdef CONFIG_MALI_DEBUG
if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) ||
@@ -239,11 +240,16 @@ static bool kbase_is_register_accessible(u32 offset)
return true;
}
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+ if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+ return;
+
+ if (WARN_ON(kbdev->dev == NULL))
+ return;
if (!kbase_is_register_accessible(offset))
return;
@@ -263,8 +269,11 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
u32 val;
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+ if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+ return 0;
+
+ if (WARN_ON(kbdev->dev == NULL))
+ return 0;
if (!kbase_is_register_accessible(offset))
return 0;
@@ -281,4 +290,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index e6f0197..38223af 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -106,7 +106,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
}
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
WARN_ON(!kbdev->pm.backend.gpu_powered);
@@ -140,4 +140,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 9287d73..14b5602 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,13 +29,10 @@
#include <mali_kbase_hwaccess_backend.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
-#include <mali_kbase_hwcnt_watchdog_if_timer.h>
-#include <mali_kbase_hwcnt_backend_jm.h>
-#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
#include <backend/gpu/mali_kbase_model_linux.h>
-#endif /* CONFIG_MALI_NO_MALI */
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include <arbiter/mali_kbase_arbiter_pm.h>
@@ -76,13 +73,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
err = kbase_job_slot_init(kbdev);
@@ -105,6 +102,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
+ err = kbase_backend_time_init(kbdev);
+ if (err)
+ goto fail_update_l2_features;
+
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
/* Idle the GPU and/or cores, if the policy wants it to */
@@ -121,9 +122,9 @@ fail_devfreq_init:
fail_job_slot:
#ifdef CONFIG_MALI_DEBUG
-#ifndef CONFIG_MALI_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
fail_interrupt_test:
-#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
@@ -215,17 +216,20 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd
}
static const struct kbase_device_init dev_init[] = {
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
-#else
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
-#endif
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
+ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -241,7 +245,6 @@ static const struct kbase_device_init dev_init[] = {
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
- { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_instr_backend_init, kbase_instr_backend_term,
"Instrumentation backend initialization failed" },
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 9571830..15839ae 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_hw.h>
#include <mali_kbase_config_defaults.h>
#include <linux/priority_control_manager.h>
@@ -42,8 +43,8 @@
#include <tl/mali_kbase_timeline.h>
#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_vinstr.h"
-#include "mali_kbase_hwcnt_context.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_device.h"
#include "mali_kbase_device_internal.h"
@@ -56,17 +57,15 @@
#include "arbiter/mali_kbase_arbiter_pm.h"
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
-/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
- * Supports tracing feature provided in the base module.
- * Please keep it in sync with the value of base module.
- */
-#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
/* Number of register accesses for the buffer that we allocate during
* initialization time. The buffer size can be changed later via debugfs.
*/
#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+
static DEFINE_MUTEX(kbase_dev_list_lock);
static LIST_HEAD(kbase_dev_list);
static int kbase_dev_nr;
@@ -310,7 +309,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
#endif /* MALI_USE_CSF */
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
-
+ kbdev->mmu_as_inactive_wait_time_ms =
+ kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
mutex_init(&kbdev->kctx_list_lock);
INIT_LIST_HEAD(&kbdev->kctx_list);
@@ -323,6 +323,10 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
"Unable to register OOM notifier for Mali - but will continue\n");
kbdev->oom_notifier_block.notifier_call = NULL;
}
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+ atomic_set(&kbdev->live_fence_metadata, 0);
+#endif
return 0;
term_as:
@@ -346,6 +350,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
if (kbdev->oom_notifier_block.notifier_call)
unregister_oom_notifier(&kbdev->oom_notifier_block);
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+ if (atomic_read(&kbdev->live_fence_metadata) > 0)
+ dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!");
+#endif
}
void kbase_device_free(struct kbase_device *kbdev)
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index 6706a61..f025011 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -130,7 +130,11 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev);
*
* Return: 0 if successful or a negative error code on failure.
*/
-#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
+#if MALI_USE_CSF
+int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys,
+ size_t nr_bytes, u32 flush_op);
+#endif /* MALI_USE_CSF */
+
/**
* kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
* @kbdev: Kbase device
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index 4e03e44..8b4588e 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -27,9 +27,6 @@
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
-#define U64_LO_MASK ((1ULL << 32) - 1)
-#define U64_HI_MASK (~U64_LO_MASK)
-
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
@@ -86,7 +83,38 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
return 0;
}
-#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
+#if MALI_USE_CSF
+#define U64_LO_MASK ((1ULL << 32) - 1)
+#define U64_HI_MASK (~U64_LO_MASK)
+
+int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys,
+ size_t nr_bytes, u32 flush_op)
+{
+ u64 start_pa, end_pa;
+ int ret = 0;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED);
+
+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */
+ start_pa = phys;
+ end_pa = start_pa + nr_bytes - 1;
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI),
+ (start_pa & U64_HI_MASK) >> 32);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
+
+ /* 3. Busy-wait irq status to be enabled. */
+ ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
+
+ return ret;
+}
+#endif /* MALI_USE_CSF */
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
u32 flush_op)
diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h
index d4f6875..de54c83 100644
--- a/mali_kbase/device/mali_kbase_device_internal.h
+++ b/mali_kbase/device/mali_kbase_device_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -89,3 +89,13 @@ int kbase_device_late_init(struct kbase_device *kbdev);
* @kbdev: Device pointer
*/
void kbase_device_late_term(struct kbase_device *kbdev);
+
+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/**
+ * kbase_is_register_accessible - Checks if register is accessible
+ * @offset: Register offset
+ *
+ * Return: true if the register is accessible, false otherwise.
+ */
+bool kbase_is_register_accessible(u32 offset);
+#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) */
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index 15bfd03..60ba9be 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -105,6 +105,70 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT:
e = "GPU_CACHEABILITY_FAULT";
break;
+ /* MMU Fault */
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0:
+ e = "TRANSLATION_FAULT at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1:
+ e = "TRANSLATION_FAULT at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2:
+ e = "TRANSLATION_FAULT at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3:
+ e = "TRANSLATION_FAULT at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4:
+ e = "TRANSLATION_FAULT";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0:
+ e = "PERMISSION_FAULT at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1:
+ e = "PERMISSION_FAULT at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2:
+ e = "PERMISSION_FAULT at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3:
+ e = "PERMISSION_FAULT at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1:
+ e = "ACCESS_FLAG at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2:
+ e = "ACCESS_FLAG at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3:
+ e = "ACCESS_FLAG at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN:
+ e = "ADDRESS_SIZE_FAULT_IN";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0:
+ e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1:
+ e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2:
+ e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3:
+ e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0:
+ e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1:
+ e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2:
+ e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2";
+ break;
+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3:
+ e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3";
+ break;
/* Any other exception code is unknown */
default:
e = "UNKNOWN";
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_jm.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_jm.c
index 37015cc..7f3743c 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_jm.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -170,7 +170,7 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
default:
e = "UNKNOWN";
break;
- };
+ }
return e;
}
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
index 6ef61ce..e7457dd 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -124,8 +124,16 @@
#define MCU_STATUS_HALTED (1 << 1)
+#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
+#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
+#define L2_CONFIG_PBHA_HWU_GET(reg_val) \
+ (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
+#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \
+ (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \
+ (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
+
/* JOB IRQ flags */
-#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */
+#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */
/* GPU_COMMAND codes */
#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
index c349f4b..f86f493 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -108,7 +108,6 @@
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
-#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
@@ -125,31 +124,12 @@
#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
-#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
-#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
-#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
-#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
-#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
-#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
-#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
-#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
-/* (RO) Extended affinity mask for job slot n*/
-#define JS_XAFFINITY 0x1C
+#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
-#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
-#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
-
-#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
-#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
-#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
-/* (RW) Next extended affinity mask for job slot n */
-#define JS_XAFFINITY_NEXT 0x5C
-
-#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
+#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */
#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h
index 8b50a5d..6a937a5 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_fault.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,9 @@
*
* @exception_code: exception code
*
- * This function is called from the interrupt handler when a GPU fault occurs.
+ * This function is called by error handlers when GPU reports an error.
*
- * Return: name associated with the exception code
+ * Return: Error string associated with the exception code
*/
const char *kbase_gpu_exception_name(u32 exception_code);
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index 1f4e5f0..e51791f 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -45,18 +45,13 @@
/* Begin Register Offsets */
/* GPU control registers */
-#define GPU_CONTROL_BASE 0x0000
-#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
-#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
#define MEM_FEATURES 0x010 /* (RO) Memory system features */
#define MMU_FEATURES 0x014 /* (RO) MMU features */
#define AS_PRESENT 0x018 /* (RO) Address space slots present */
#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
-#define GPU_IRQ_CLEAR 0x024 /* (WO) */
#define GPU_IRQ_MASK 0x028 /* (RW) */
-#define GPU_IRQ_STATUS 0x02C /* (RO) */
#define GPU_COMMAND 0x030 /* (WO) */
#define GPU_STATUS 0x034 /* (RO) */
@@ -100,6 +95,10 @@
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
+#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
+#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
+#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
@@ -113,26 +112,10 @@
#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
-#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
-#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
-
-#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
-#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
-
-#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
-#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
-
#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
-#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
-#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
-
-#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
-#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
-
-#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
-#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
+#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */
#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
@@ -181,6 +164,8 @@
#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
+#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */
+#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */
#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
@@ -188,22 +173,10 @@
/* Job control registers */
-#define JOB_CONTROL_BASE 0x1000
-
-#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
-
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
-#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
-#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
-#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
/* MMU control registers */
-#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
-
-#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
@@ -221,25 +194,13 @@
#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
/* MMU address space control registers */
-
-#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
-
-#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
-#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
-#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
-#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
-#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
-/* (RW) Translation table configuration for address space n, low word */
-#define AS_TRANSCFG_LO 0x30
-/* (RW) Translation table configuration for address space n, high word */
-#define AS_TRANSCFG_HI 0x34
/* (RO) Secondary fault address for address space n, low word */
#define AS_FAULTEXTRA_LO 0x38
/* (RO) Secondary fault address for address space n, high word */
@@ -464,6 +425,80 @@
#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
/* End L2_CONFIG register */
+/* AMBA_FEATURES register */
+#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0)
+#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT)
+#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \
+ (((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \
+ AMBA_FEATURES_ACE_LITE_SHIFT)
+#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \
+ (((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \
+ AMBA_FEATURES_ACE_LITE_MASK))
+#define AMBA_FEATURES_ACE_SHIFT GPU_U(1)
+#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT)
+#define AMBA_FEATURES_ACE_GET(reg_val) \
+ (((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT)
+#define AMBA_FEATURES_ACE_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \
+ (((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK))
+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5)
+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \
+ (GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT)
+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \
+ (((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \
+ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT)
+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \
+ (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \
+ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK))
+#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6)
+#define AMBA_FEATURES_INVALIDATE_HINT_MASK \
+ (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
+#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \
+ (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \
+ AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
+#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \
+ (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \
+ AMBA_FEATURES_INVALIDATE_HINT_MASK))
+
+/* AMBA_ENABLE register */
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0)
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \
+ (GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT)
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \
+ (((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \
+ AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT)
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \
+ (((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \
+ AMBA_ENABLE_COHERENCY_PROTOCOL_MASK))
+/* AMBA_ENABLE_coherency_protocol values */
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1
+#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F
+/* End of AMBA_ENABLE_coherency_protocol values */
+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5)
+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \
+ (GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT)
+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \
+ (((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \
+ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT)
+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \
+ (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \
+ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK))
+#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6)
+#define AMBA_ENABLE_INVALIDATE_HINT_MASK \
+ (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
+#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \
+ (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \
+ AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
+#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \
+ (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \
+ (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \
+ AMBA_ENABLE_INVALIDATE_HINT_MASK))
/* IDVS_GROUP register */
#define IDVS_GROUP_SIZE_SHIFT (16)
diff --git a/mali_kbase/hwcnt/Kbuild b/mali_kbase/hwcnt/Kbuild
new file mode 100644
index 0000000..8c8775f
--- /dev/null
+++ b/mali_kbase/hwcnt/Kbuild
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+mali_kbase-y += \
+ hwcnt/mali_kbase_hwcnt.o \
+ hwcnt/mali_kbase_hwcnt_gpu.o \
+ hwcnt/mali_kbase_hwcnt_gpu_narrow.o \
+ hwcnt/mali_kbase_hwcnt_types.o \
+ hwcnt/mali_kbase_hwcnt_virtualizer.o \
+ hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o
+
+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
+ mali_kbase-y += \
+ hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \
+ hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o
+else
+ mali_kbase-y += \
+ hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \
+ hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o
+endif
diff --git a/mali_kbase/mali_kbase_hwcnt_backend.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
index b069fc1..6cfa6f5 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -56,8 +56,8 @@ struct kbase_hwcnt_backend;
*
* Return: Non-NULL pointer to immutable hardware counter metadata.
*/
-typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn(
- const struct kbase_hwcnt_backend_info *info);
+typedef const struct kbase_hwcnt_metadata *
+kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info);
/**
* typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
@@ -69,9 +69,8 @@ typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_init_fn(
- const struct kbase_hwcnt_backend_info *info,
- struct kbase_hwcnt_backend **out_backend);
+typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend);
/**
* typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
@@ -86,8 +85,7 @@ typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend);
*
* Return: Backend timestamp in nanoseconds.
*/
-typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(
- struct kbase_hwcnt_backend *backend);
+typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
@@ -102,9 +100,8 @@ typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_dump_enable_fn(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map);
+typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map);
/**
* typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
@@ -118,9 +115,9 @@ typedef int kbase_hwcnt_backend_dump_enable_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_dump_enable_nolock_fn(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map);
+typedef int
+kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map);
/**
* typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
@@ -130,8 +127,7 @@ typedef int kbase_hwcnt_backend_dump_enable_nolock_fn(
* If the backend is already disabled, does nothing.
* Any undumped counter values since the last dump get will be lost.
*/
-typedef void kbase_hwcnt_backend_dump_disable_fn(
- struct kbase_hwcnt_backend *backend);
+typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
@@ -142,8 +138,7 @@ typedef void kbase_hwcnt_backend_dump_disable_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_dump_clear_fn(
- struct kbase_hwcnt_backend *backend);
+typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
@@ -157,9 +152,8 @@ typedef int kbase_hwcnt_backend_dump_clear_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_dump_request_fn(
- struct kbase_hwcnt_backend *backend,
- u64 *dump_time_ns);
+typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns);
/**
* typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
@@ -170,8 +164,7 @@ typedef int kbase_hwcnt_backend_dump_request_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_dump_wait_fn(
- struct kbase_hwcnt_backend *backend);
+typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
@@ -189,11 +182,10 @@ typedef int kbase_hwcnt_backend_dump_wait_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_dump_get_fn(
- struct kbase_hwcnt_backend *backend,
- struct kbase_hwcnt_dump_buffer *dump_buffer,
- const struct kbase_hwcnt_enable_map *enable_map,
- bool accumulate);
+typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ bool accumulate);
/**
* struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index 99e8be7..27acfc6 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,9 +19,9 @@
*
*/
-#include "mali_kbase_hwcnt_backend_csf.h"
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/log2.h>
#include <linux/kernel.h>
@@ -267,8 +267,7 @@ struct kbase_hwcnt_backend_csf {
struct work_struct hwc_threshold_work;
};
-static bool kbasep_hwcnt_backend_csf_backend_exists(
- struct kbase_hwcnt_backend_csf_info *csf_info)
+static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
{
WARN_ON(!csf_info);
csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
@@ -282,19 +281,22 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(
* @backend_csf: Non-NULL pointer to backend.
* @enable_map: Non-NULL pointer to enable map specifying enabled counters.
*/
-static void kbasep_hwcnt_backend_csf_cc_initial_sample(
- struct kbase_hwcnt_backend_csf *backend_csf,
- const struct kbase_hwcnt_enable_map *enable_map)
+static void
+kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
u64 clk_enable_map = enable_map->clk_enable_map;
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
+ memset(cycle_counts, 0, sizeof(cycle_counts));
+
/* Read cycle count from CSF interface for both clock domains. */
- backend_csf->info->csf_if->get_gpu_cycle_count(
- backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map);
+ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
+ clk_enable_map);
- kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) {
+ kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk)
+ {
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk))
backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
}
@@ -303,42 +305,37 @@ static void kbasep_hwcnt_backend_csf_cc_initial_sample(
backend_csf->clk_enable_map = clk_enable_map;
}
-static void
-kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf)
+static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf)
{
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
- backend_csf->info->csf_if->assert_lock_held(
- backend_csf->info->csf_if->ctx);
+ memset(cycle_counts, 0, sizeof(cycle_counts));
- backend_csf->info->csf_if->get_gpu_cycle_count(
- backend_csf->info->csf_if->ctx, cycle_counts,
- backend_csf->clk_enable_map);
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
- kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) {
- if (kbase_hwcnt_clk_enable_map_enabled(
- backend_csf->clk_enable_map, clk)) {
+ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
+ backend_csf->clk_enable_map);
+
+ kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) {
backend_csf->cycle_count_elapsed[clk] =
- cycle_counts[clk] -
- backend_csf->prev_cycle_count[clk];
+ cycle_counts[clk] - backend_csf->prev_cycle_count[clk];
backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
}
}
}
/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
-static u64
-kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
+static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
{
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if)
return 0;
- return backend_csf->info->csf_if->timestamp_ns(
- backend_csf->info->csf_if->ctx);
+ return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx);
}
/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
@@ -347,8 +344,8 @@ kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
* required.
*@phys_enable_map: HWC physical enable map to be processed.
*/
-static void kbasep_hwcnt_backend_csf_process_enable_map(
- struct kbase_hwcnt_physical_enable_map *phys_enable_map)
+static void
+kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map)
{
WARN_ON(!phys_enable_map);
@@ -408,19 +405,19 @@ static void kbasep_hwcnt_backend_csf_init_layout(
};
}
-static void kbasep_hwcnt_backend_csf_reset_internal_buffers(
- struct kbase_hwcnt_backend_csf *backend_csf)
+static void
+kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf)
{
size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
memset(backend_csf->to_user_buf, 0, user_buf_bytes);
memset(backend_csf->accum_buf, 0, user_buf_bytes);
- memset(backend_csf->old_sample_buf, 0,
- backend_csf->info->prfcnt_info.dump_bytes);
+ memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes);
}
-static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
- struct kbase_hwcnt_backend_csf *backend_csf, u32 *sample)
+static void
+kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf,
+ u32 *sample)
{
u32 block_idx;
const struct kbase_hwcnt_csf_physical_layout *phys_layout;
@@ -434,8 +431,8 @@ static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
}
}
-static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(
- struct kbase_hwcnt_backend_csf *backend_csf)
+static void
+kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf)
{
u32 idx;
u32 *sample;
@@ -446,19 +443,16 @@ static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(
for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) {
sample = (u32 *)&cpu_dump_base[idx * dump_bytes];
- kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
- backend_csf, sample);
+ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample);
}
}
-static void kbasep_hwcnt_backend_csf_update_user_sample(
- struct kbase_hwcnt_backend_csf *backend_csf)
+static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf)
{
size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
/* Copy the data into the sample and wait for the user to get it. */
- memcpy(backend_csf->to_user_buf, backend_csf->accum_buf,
- user_buf_bytes);
+ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes);
/* After copied data into user sample, clear the accumulator values to
* prepare for the next accumulator, such as the next request or
@@ -468,9 +462,8 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(
}
static void kbasep_hwcnt_backend_csf_accumulate_sample(
- const struct kbase_hwcnt_csf_physical_layout *phys_layout,
- size_t dump_bytes, u64 *accum_buf, const u32 *old_sample_buf,
- const u32 *new_sample_buf, bool clearing_samples)
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
+ u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples)
{
size_t block_idx;
const u32 *old_block = old_sample_buf;
@@ -487,10 +480,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt;
block_idx++) {
- const u32 old_enable_mask =
- old_block[phys_layout->enable_mask_offset];
- const u32 new_enable_mask =
- new_block[phys_layout->enable_mask_offset];
+ const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
+ const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
if (new_enable_mask == 0) {
/* Hardware block was unavailable or we didn't turn on
@@ -503,9 +494,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
size_t ctr_idx;
/* Unconditionally copy the headers. */
- for (ctr_idx = 0;
- ctr_idx < phys_layout->headers_per_block;
- ctr_idx++) {
+ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) {
acc_block[ctr_idx] = new_block[ctr_idx];
}
@@ -534,34 +523,25 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
* counters only, as we know previous
* values are zeroes.
*/
- for (ctr_idx =
- phys_layout
- ->headers_per_block;
- ctr_idx < values_per_block;
- ctr_idx++) {
- acc_block[ctr_idx] +=
- new_block[ctr_idx];
+ for (ctr_idx = phys_layout->headers_per_block;
+ ctr_idx < values_per_block; ctr_idx++) {
+ acc_block[ctr_idx] += new_block[ctr_idx];
}
} else {
/* Hardware block was previously
* available. Accumulate the delta
* between old and new counter values.
*/
- for (ctr_idx =
- phys_layout
- ->headers_per_block;
- ctr_idx < values_per_block;
- ctr_idx++) {
+ for (ctr_idx = phys_layout->headers_per_block;
+ ctr_idx < values_per_block; ctr_idx++) {
acc_block[ctr_idx] +=
- new_block[ctr_idx] -
- old_block[ctr_idx];
+ new_block[ctr_idx] - old_block[ctr_idx];
}
}
} else {
for (ctr_idx = phys_layout->headers_per_block;
ctr_idx < values_per_block; ctr_idx++) {
- acc_block[ctr_idx] +=
- new_block[ctr_idx];
+ acc_block[ctr_idx] += new_block[ctr_idx];
}
}
}
@@ -570,21 +550,19 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
acc_block += values_per_block;
}
- WARN_ON(old_block !=
- old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
- WARN_ON(new_block !=
- new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) -
(values_per_block * phys_layout->fw_block_cnt));
(void)dump_bytes;
}
-static void kbasep_hwcnt_backend_csf_accumulate_samples(
- struct kbase_hwcnt_backend_csf *backend_csf, u32 extract_index_to_start,
- u32 insert_index_to_stop)
+static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf,
+ u32 extract_index_to_start,
+ u32 insert_index_to_stop)
{
u32 raw_idx;
- unsigned long flags;
+ unsigned long flags = 0UL;
u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
@@ -598,25 +576,22 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
/* Sync all the buffers to CPU side before read the data. */
backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
- backend_csf->ring_buf,
- extract_index_to_start,
+ backend_csf->ring_buf, extract_index_to_start,
insert_index_to_stop, true);
/* Consider u32 wrap case, '!=' is used here instead of '<' operator */
- for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop;
- raw_idx++) {
+ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) {
/* The logical "&" acts as a modulo operation since buf_count
* must be a power of two.
*/
const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
- new_sample_buf =
- (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+ new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
- kbasep_hwcnt_backend_csf_accumulate_sample(
- &backend_csf->phys_layout, buf_dump_bytes,
- backend_csf->accum_buf, old_sample_buf, new_sample_buf,
- clearing_samples);
+ kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout,
+ buf_dump_bytes, backend_csf->accum_buf,
+ old_sample_buf, new_sample_buf,
+ clearing_samples);
old_sample_buf = new_sample_buf;
}
@@ -625,19 +600,16 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes);
/* Reset the prfcnt_en header on each sample before releasing them. */
- for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop;
- raw_idx++) {
+ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) {
const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
- kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
- backend_csf, sample);
+ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample);
}
/* Sync zeroed buffers to avoid coherency issues on future use. */
backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
- backend_csf->ring_buf,
- extract_index_to_start,
+ backend_csf->ring_buf, extract_index_to_start,
insert_index_to_stop, false);
/* After consuming all samples between extract_idx and insert_idx,
@@ -645,22 +617,20 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
* can be released back to the ring buffer pool.
*/
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
- backend_csf->info->csf_if->set_extract_index(
- backend_csf->info->csf_if->ctx, insert_index_to_stop);
+ backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx,
+ insert_index_to_stop);
/* Update the watchdog last seen index to check any new FW auto samples
* in next watchdog callback.
*/
backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop;
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
}
static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
struct kbase_hwcnt_backend_csf *backend_csf,
enum kbase_hwcnt_backend_csf_enable_state new_state)
{
- backend_csf->info->csf_if->assert_lock_held(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
if (backend_csf->enable_state != new_state) {
backend_csf->enable_state = new_state;
@@ -673,7 +643,7 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
{
struct kbase_hwcnt_backend_csf_info *csf_info = info;
struct kbase_hwcnt_backend_csf *backend_csf;
- unsigned long flags;
+ unsigned long flags = 0UL;
csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
@@ -691,26 +661,22 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
(!csf_info->fw_in_protected_mode) &&
/* 3. dump state indicates no other dumping is in progress. */
((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
- (backend_csf->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
- u32 extract_index;
- u32 insert_index;
+ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
+ u32 extract_index = 0U;
+ u32 insert_index = 0U;
/* Read the raw extract and insert indexes from the CSF interface. */
- csf_info->csf_if->get_indexes(csf_info->csf_if->ctx,
- &extract_index, &insert_index);
+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
/* Do watchdog request if no new FW auto samples. */
- if (insert_index ==
- backend_csf->watchdog_last_seen_insert_idx) {
+ if (insert_index == backend_csf->watchdog_last_seen_insert_idx) {
/* Trigger the watchdog request. */
csf_info->csf_if->dump_request(csf_info->csf_if->ctx);
/* A watchdog dump is required, change the state to
* start the request process.
*/
- backend_csf->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED;
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED;
}
}
@@ -719,12 +685,10 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
* counter enabled interrupt.
*/
if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ||
- (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
/* Reschedule the timer for next watchdog callback. */
- csf_info->watchdog_if->modify(
- csf_info->watchdog_if->timer,
- HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+ csf_info->watchdog_if->modify(csf_info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
}
csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
@@ -740,15 +704,14 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
*/
static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
{
- unsigned long flags;
+ unsigned long flags = 0ULL;
struct kbase_hwcnt_backend_csf *backend_csf;
u32 insert_index_to_acc;
- u32 extract_index;
- u32 insert_index;
+ u32 extract_index = 0U;
+ u32 insert_index = 0U;
WARN_ON(!work);
- backend_csf = container_of(work, struct kbase_hwcnt_backend_csf,
- hwc_dump_work);
+ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
/* Assert the backend is not destroyed. */
WARN_ON(backend_csf != backend_csf->info->backend);
@@ -757,26 +720,22 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
* launched.
*/
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
- WARN_ON(backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
WARN_ON(!completion_done(&backend_csf->dump_completed));
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return;
}
- WARN_ON(backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED);
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED);
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING;
insert_index_to_acc = backend_csf->insert_index_to_accumulate;
/* Read the raw extract and insert indexes from the CSF interface. */
- backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx,
- &extract_index, &insert_index);
+ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index,
+ &insert_index);
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* Accumulate up to the insert we grabbed at the prfcnt request
* interrupt.
@@ -797,22 +756,18 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
/* The backend was disabled or had an error while we were accumulating.
*/
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
- WARN_ON(backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
WARN_ON(!completion_done(&backend_csf->dump_completed));
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return;
}
- WARN_ON(backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING);
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING);
/* Our work here is done - set the wait object and unblock waiters. */
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
complete_all(&backend_csf->dump_completed);
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
}
/**
@@ -825,30 +780,28 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
*/
static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
{
- unsigned long flags;
+ unsigned long flags = 0ULL;
struct kbase_hwcnt_backend_csf *backend_csf;
- u32 extract_index;
- u32 insert_index;
+ u32 extract_index = 0U;
+ u32 insert_index = 0U;
WARN_ON(!work);
- backend_csf = container_of(work, struct kbase_hwcnt_backend_csf,
- hwc_threshold_work);
+ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work);
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
/* Assert the backend is not destroyed. */
WARN_ON(backend_csf != backend_csf->info->backend);
/* Read the raw extract and insert indexes from the CSF interface. */
- backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx,
- &extract_index, &insert_index);
+ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index,
+ &insert_index);
/* The backend was disabled or had an error while the worker was being
* launched.
*/
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return;
}
@@ -857,14 +810,11 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
* interfere.
*/
if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
- (backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return;
}
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* Accumulate everything we possibly can. We grabbed the insert index
* immediately after we acquired the lock but before we checked whether
@@ -873,14 +823,13 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
* fact that our insert will not exceed the concurrent dump's
* insert_to_accumulate, so we don't risk accumulating too much data.
*/
- kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index,
- insert_index);
+ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index);
/* No need to wake up anything since it is not a user dump request. */
}
-static void kbase_hwcnt_backend_csf_submit_dump_worker(
- struct kbase_hwcnt_backend_csf_info *csf_info)
+static void
+kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info)
{
u32 extract_index;
@@ -888,31 +837,26 @@ static void kbase_hwcnt_backend_csf_submit_dump_worker(
csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info));
- WARN_ON(csf_info->backend->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_ENABLED);
- WARN_ON(csf_info->backend->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT);
+ WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED);
+ WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT);
/* Save insert index now so that the dump worker only accumulates the
* HWC data associated with this request. Extract index is not stored
* as that needs to be checked when accumulating to prevent re-reading
* buffers that have already been read and returned to the GPU.
*/
- csf_info->csf_if->get_indexes(
- csf_info->csf_if->ctx, &extract_index,
- &csf_info->backend->insert_index_to_accumulate);
- csf_info->backend->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index,
+ &csf_info->backend->insert_index_to_accumulate);
+ csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
/* Submit the accumulator task into the work queue. */
- queue_work(csf_info->backend->hwc_dump_workq,
- &csf_info->backend->hwc_dump_work);
+ queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work);
}
-static void kbasep_hwcnt_backend_csf_get_physical_enable(
- struct kbase_hwcnt_backend_csf *backend_csf,
- const struct kbase_hwcnt_enable_map *enable_map,
- struct kbase_hwcnt_backend_csf_if_enable *enable)
+static void
+kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_backend_csf_if_enable *enable)
{
enum kbase_hwcnt_physical_set phys_counter_set;
struct kbase_hwcnt_physical_enable_map phys_enable_map;
@@ -924,8 +868,7 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable(
*/
kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map);
- kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
- backend_csf->info->counter_set);
+ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set);
/* Use processed enable_map to enable HWC in HW level. */
enable->fe_bm = phys_enable_map.fe_bm;
@@ -937,33 +880,29 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable(
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
-static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
+static int
+kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
struct kbase_hwcnt_backend_csf_if_enable enable;
int err;
- if (!backend_csf || !enable_map ||
- (enable_map->metadata != backend_csf->info->metadata))
+ if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata))
return -EINVAL;
- backend_csf->info->csf_if->assert_lock_held(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
- kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map,
- &enable);
+ kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable);
/* enable_state should be DISABLED before we transfer it to enabled */
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
return -EIO;
- err = backend_csf->info->watchdog_if->enable(
- backend_csf->info->watchdog_if->timer,
- HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS,
- kbasep_hwcnt_backend_watchdog_timer_cb, backend_csf->info);
+ err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS,
+ kbasep_hwcnt_backend_watchdog_timer_cb,
+ backend_csf->info);
if (err)
return err;
@@ -981,58 +920,46 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */
-static int kbasep_hwcnt_backend_csf_dump_enable(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
+static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
- unsigned long flags;
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ unsigned long flags = 0UL;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend_csf)
return -EINVAL;
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
- errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend,
- enable_map);
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return errcode;
}
static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
{
- backend_csf->info->csf_if->assert_lock_held(
- backend_csf->info->csf_if->ctx);
-
- while ((backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) ||
- (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) {
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, *lock_flags);
-
- wait_event(
- backend_csf->enable_state_waitq,
- (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) &&
- (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED));
-
- backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx,
- lock_flags);
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) ||
+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) {
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags);
+
+ wait_event(backend_csf->enable_state_waitq,
+ (backend_csf->enable_state !=
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) &&
+ (backend_csf->enable_state !=
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED));
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags);
}
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void
-kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
+static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
{
- unsigned long flags;
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ unsigned long flags = 0UL;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_disable = false;
WARN_ON(!backend_csf);
@@ -1042,24 +969,20 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
/* Make sure we wait until any previous enable or disable have completed
* before doing anything.
*/
- kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf,
- &flags);
+ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags);
if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED ||
- backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+ backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
/* If we are already disabled or in an unrecoverable error
* state, there is nothing for us to do.
*/
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return;
}
if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
- backend_csf,
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
complete_all(&backend_csf->dump_completed);
/* Only disable if we were previously enabled - in all other
@@ -1071,15 +994,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
WARN_ON(!completion_done(&backend_csf->dump_completed));
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* Deregister the timer and block until any timer callback has completed.
* We've transitioned out of the ENABLED state so we can guarantee it
* won't reschedule itself.
*/
- backend_csf->info->watchdog_if->disable(
- backend_csf->info->watchdog_if->timer);
+ backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer);
/* Block until any async work has completed. We have transitioned out of
* the ENABLED state so we can guarantee no new work will concurrently
@@ -1090,11 +1011,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
if (do_disable)
- backend_csf->info->csf_if->dump_disable(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
- kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf,
- &flags);
+ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags);
switch (backend_csf->enable_state) {
case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
@@ -1103,8 +1022,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
break;
case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
- backend_csf,
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
break;
default:
WARN_ON(true);
@@ -1114,8 +1032,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
backend_csf->user_requested = false;
backend_csf->watchdog_last_seen_insert_idx = 0;
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* After disable, zero the header of all buffers in the ring buffer back
* to 0 to prepare for the next enable.
@@ -1123,9 +1040,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
/* Sync zeroed buffers to avoid coherency issues on future use. */
- backend_csf->info->csf_if->ring_buf_sync(
- backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0,
- backend_csf->info->ring_buf_cnt, false);
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, 0,
+ backend_csf->info->ring_buf_cnt, false);
/* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare
* for next enable.
@@ -1134,13 +1051,11 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */
-static int
-kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
- u64 *dump_time_ns)
+static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns)
{
- unsigned long flags;
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ unsigned long flags = 0UL;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_request = false;
bool watchdog_dumping = false;
@@ -1153,22 +1068,18 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
* the user dump buffer is already zeroed. We can just short circuit to
* the DUMP_COMPLETED state.
*/
- if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
- backend_csf->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
kbasep_hwcnt_backend_csf_cc_update(backend_csf);
backend_csf->user_requested = true;
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return 0;
}
/* Otherwise, make sure we're already enabled. */
if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return -EIO;
}
@@ -1181,15 +1092,12 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
* request can be processed instead of ignored.
*/
if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
- (backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) &&
- (backend_csf->dump_state !=
- KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) {
+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) &&
+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) {
/* HWC is disabled or another user dump is ongoing,
* or we're on fault.
*/
- backend_csf->info->csf_if->unlock(
- backend_csf->info->csf_if->ctx, flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* HWC is disabled or another dump is ongoing, or we are on
* fault.
*/
@@ -1199,8 +1107,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
/* Reset the completion so dump_wait() has something to wait on. */
reinit_completion(&backend_csf->dump_completed);
- if (backend_csf->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)
+ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)
watchdog_dumping = true;
if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
@@ -1208,15 +1115,13 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
/* Only do the request if we are fully enabled and not in
* protected mode.
*/
- backend_csf->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED;
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED;
do_request = true;
} else {
/* Skip the request and waiting for ack and go straight to
* checking the insert and kicking off the worker to do the dump
*/
- backend_csf->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
}
/* CSF firmware might enter protected mode now, but still call request.
@@ -1238,31 +1143,26 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
* ownership of the sample which watchdog requested.
*/
if (!watchdog_dumping)
- backend_csf->info->csf_if->dump_request(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx);
} else
kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* Modify watchdog timer to delay the regular check time since
* just requested.
*/
- backend_csf->info->watchdog_if->modify(
- backend_csf->info->watchdog_if->timer,
- HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+ backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
return 0;
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
-static int
-kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
+static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
{
- unsigned long flags;
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ unsigned long flags = 0UL;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
int errcode;
if (!backend_csf)
@@ -1275,26 +1175,21 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
* set.
*/
if (backend_csf->user_requested &&
- ((backend_csf->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ||
- (backend_csf->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)))
+ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ||
+ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)))
errcode = 0;
else
errcode = -EIO;
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
return errcode;
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */
-static int
-kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend)
+static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend)
{
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
int errcode;
u64 ts;
@@ -1313,13 +1208,12 @@ kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend)
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */
-static int kbasep_hwcnt_backend_csf_dump_get(
- struct kbase_hwcnt_backend *backend,
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
+static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
{
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
int ret;
size_t clk;
@@ -1329,9 +1223,9 @@ static int kbasep_hwcnt_backend_csf_dump_get(
return -EINVAL;
/* Extract elapsed cycle count for each clock domain if enabled. */
- kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
- if (!kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk))
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
+ {
+ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
continue;
/* Reset the counter to zero if accumulation is off. */
@@ -1344,8 +1238,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(
* as it is undefined to call this function without a prior succeeding
* one to dump_wait().
*/
- ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
- dst_enable_map, accumulate);
+ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate);
return ret;
}
@@ -1357,8 +1250,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(
* Can be safely called on a backend in any state of partial construction.
*
*/
-static void
-kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
+static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
{
if (!backend_csf)
return;
@@ -1388,9 +1280,8 @@ kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
*
* Return: 0 on success, else error code.
*/
-static int
-kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
- struct kbase_hwcnt_backend_csf **out_backend)
+static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
+ struct kbase_hwcnt_backend_csf **out_backend)
{
struct kbase_hwcnt_backend_csf *backend_csf = NULL;
int errcode = -ENOMEM;
@@ -1403,27 +1294,23 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
goto alloc_error;
backend_csf->info = csf_info;
- kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info,
- &backend_csf->phys_layout);
+ kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout);
- backend_csf->accum_buf =
- kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
+ backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
if (!backend_csf->accum_buf)
goto err_alloc_acc_buf;
- backend_csf->old_sample_buf =
- kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+ backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
if (!backend_csf->old_sample_buf)
goto err_alloc_pre_sample_buf;
- backend_csf->to_user_buf =
- kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
+ backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
if (!backend_csf->to_user_buf)
goto err_alloc_user_sample_buf;
- errcode = csf_info->csf_if->ring_buf_alloc(
- csf_info->csf_if->ctx, csf_info->ring_buf_cnt,
- &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf);
+ errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt,
+ &backend_csf->ring_buf_cpu_base,
+ &backend_csf->ring_buf);
if (errcode)
goto err_ring_buf_alloc;
errcode = -ENOMEM;
@@ -1432,9 +1319,9 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
/* Sync zeroed buffers to avoid coherency issues on use. */
- backend_csf->info->csf_if->ring_buf_sync(
- backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0,
- backend_csf->info->ring_buf_cnt, false);
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, 0,
+ backend_csf->info->ring_buf_cnt, false);
init_completion(&backend_csf->dump_completed);
@@ -1448,10 +1335,8 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
if (!backend_csf->hwc_dump_workq)
goto err_alloc_workqueue;
- INIT_WORK(&backend_csf->hwc_dump_work,
- kbasep_hwcnt_backend_csf_dump_worker);
- INIT_WORK(&backend_csf->hwc_threshold_work,
- kbasep_hwcnt_backend_csf_threshold_worker);
+ INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker);
+ INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker);
backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED;
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
@@ -1481,14 +1366,12 @@ alloc_error:
}
/* CSF backend implementation of kbase_hwcnt_backend_init_fn */
-static int
-kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
- struct kbase_hwcnt_backend **out_backend)
+static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = NULL;
- struct kbase_hwcnt_backend_csf_info *csf_info =
- (struct kbase_hwcnt_backend_csf_info *)info;
+ struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
int errcode;
bool success = false;
@@ -1509,11 +1392,9 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
*out_backend = (struct kbase_hwcnt_backend *)backend_csf;
success = true;
if (csf_info->unrecoverable_error_happened)
- backend_csf->enable_state =
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR;
+ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR;
}
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
/* Destroy the new created backend if the backend has already created
* before. In normal case, this won't happen if the client call init()
@@ -1530,9 +1411,8 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
/* CSF backend implementation of kbase_hwcnt_backend_term_fn */
static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
{
- unsigned long flags;
- struct kbase_hwcnt_backend_csf *backend_csf =
- (struct kbase_hwcnt_backend_csf *)backend;
+ unsigned long flags = 0UL;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend)
return;
@@ -1544,8 +1424,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
*/
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
backend_csf->info->backend = NULL;
- backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
- flags);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
kbasep_hwcnt_backend_csf_destroy(backend_csf);
}
@@ -1557,8 +1436,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
* Can be safely called on a backend info in any state of partial construction.
*
*/
-static void kbasep_hwcnt_backend_csf_info_destroy(
- const struct kbase_hwcnt_backend_csf_info *info)
+static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info)
{
if (!info)
return;
@@ -1585,10 +1463,10 @@ static void kbasep_hwcnt_backend_csf_info_destroy(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_backend_csf_info_create(
- struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
- struct kbase_hwcnt_watchdog_interface *watchdog_if,
- const struct kbase_hwcnt_backend_csf_info **out_info)
+static int
+kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ const struct kbase_hwcnt_backend_csf_info **out_info)
{
struct kbase_hwcnt_backend_csf_info *info = NULL;
@@ -1611,8 +1489,7 @@ static int kbasep_hwcnt_backend_csf_info_create(
.counter_set = KBASE_HWCNT_SET_PRIMARY,
#endif
.backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt,
- .fw_in_protected_mode = false,
- .unrecoverable_error_happened = false,
+ .fw_in_protected_mode = false, .unrecoverable_error_happened = false,
.watchdog_if = watchdog_if,
};
*out_info = info;
@@ -1632,19 +1509,17 @@ kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info)
return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata;
}
-static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- struct kbase_hwcnt_backend_csf *backend_csf)
+static void
+kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf)
{
bool do_disable = false;
- backend_csf->info->csf_if->assert_lock_held(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
/* We are already in or transitioning to the unrecoverable error state.
* Early out.
*/
- if ((backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) ||
+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) ||
(backend_csf->enable_state ==
KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER))
return;
@@ -1654,8 +1529,7 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
*/
if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
- backend_csf,
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
return;
}
@@ -1663,12 +1537,11 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
* disabled, we don't want to disable twice if an unrecoverable error
* happens while we are disabling.
*/
- do_disable = (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+ do_disable =
+ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
- backend_csf,
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER);
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER);
/* Transition the dump to the IDLE state and unblock any waiters. The
* IDLE state signifies an error.
@@ -1681,15 +1554,13 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
* happens while we are disabling.
*/
if (do_disable)
- backend_csf->info->csf_if->dump_disable(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
}
-static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
- struct kbase_hwcnt_backend_csf *backend_csf)
+static void
+kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf)
{
- backend_csf->info->csf_if->assert_lock_held(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
switch (backend_csf->enable_state) {
case KBASE_HWCNT_BACKEND_CSF_DISABLED:
@@ -1705,8 +1576,7 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
/* A seemingly recoverable error that occurs while we are
* transitioning to enabled is probably unrecoverable.
*/
- kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- backend_csf);
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf);
return;
case KBASE_HWCNT_BACKEND_CSF_ENABLED:
/* Start transitioning to the disabled state. We can't wait for
@@ -1715,22 +1585,19 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
* disable().
*/
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
- backend_csf,
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
/* Transition the dump to the IDLE state and unblock any
* waiters. The IDLE state signifies an error.
*/
backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
complete_all(&backend_csf->dump_completed);
- backend_csf->info->csf_if->dump_disable(
- backend_csf->info->csf_if->ctx);
+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
return;
}
}
-void kbase_hwcnt_backend_csf_protm_entered(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info =
(struct kbase_hwcnt_backend_csf_info *)iface->info;
@@ -1744,8 +1611,7 @@ void kbase_hwcnt_backend_csf_protm_entered(
kbase_hwcnt_backend_csf_on_prfcnt_sample(iface);
}
-void kbase_hwcnt_backend_csf_protm_exited(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
@@ -1755,10 +1621,9 @@ void kbase_hwcnt_backend_csf_protm_exited(
csf_info->fw_in_protected_mode = false;
}
-void kbase_hwcnt_backend_csf_on_unrecoverable_error(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf_info *csf_info;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
@@ -1776,10 +1641,9 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(
csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
}
-void kbase_hwcnt_backend_csf_on_before_reset(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
{
- unsigned long flags;
+ unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
@@ -1795,8 +1659,7 @@ void kbase_hwcnt_backend_csf_on_before_reset(
backend_csf = csf_info->backend;
if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) &&
- (backend_csf->enable_state !=
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) {
+ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) {
/* Before a reset occurs, we must either have been disabled
* (else we lose data) or we should have encountered an
* unrecoverable error. Either way, we will have disabled the
@@ -1807,13 +1670,11 @@ void kbase_hwcnt_backend_csf_on_before_reset(
* We can't wait for this disable to complete, but it doesn't
* really matter, the power is being pulled.
*/
- kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- csf_info->backend);
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
}
/* A reset is the only way to exit the unrecoverable error state */
- if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
}
@@ -1821,8 +1682,7 @@ void kbase_hwcnt_backend_csf_on_before_reset(
csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
}
-void kbase_hwcnt_backend_csf_on_prfcnt_sample(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
@@ -1836,10 +1696,8 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
backend_csf = csf_info->backend;
/* Skip the dump_work if it's a watchdog request. */
- if (backend_csf->dump_state ==
- KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) {
- backend_csf->dump_state =
- KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) {
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
return;
}
@@ -1853,8 +1711,7 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
kbase_hwcnt_backend_csf_submit_dump_worker(csf_info);
}
-void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
@@ -1871,12 +1728,10 @@ void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
/* Submit the threshold work into the work queue to consume the
* available samples.
*/
- queue_work(backend_csf->hwc_dump_workq,
- &backend_csf->hwc_threshold_work);
+ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work);
}
-void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
@@ -1897,8 +1752,7 @@ void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend);
}
-void kbase_hwcnt_backend_csf_on_prfcnt_enable(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
@@ -1911,12 +1765,10 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(
return;
backend_csf = csf_info->backend;
- if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED);
- } else if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
/* Unexpected, but we are already in the right state so just
* ignore it.
*/
@@ -1924,13 +1776,11 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(
/* Unexpected state change, assume everything is broken until
* we reset.
*/
- kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- csf_info->backend);
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
}
}
-void kbase_hwcnt_backend_csf_on_prfcnt_disable(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;
@@ -1943,13 +1793,10 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable(
return;
backend_csf = csf_info->backend;
- if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) {
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) {
kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
- backend_csf,
- KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER);
- } else if (backend_csf->enable_state ==
- KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER);
+ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
/* Unexpected, but we are already in the right state so just
* ignore it.
*/
@@ -1957,13 +1804,11 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable(
/* Unexpected state change, assume everything is broken until
* we reset.
*/
- kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
- csf_info->backend);
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
}
}
-int kbase_hwcnt_backend_csf_metadata_init(
- struct kbase_hwcnt_backend_interface *iface)
+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_gpu_info gpu_info;
@@ -1975,8 +1820,7 @@ int kbase_hwcnt_backend_csf_metadata_init(
WARN_ON(!csf_info->csf_if->get_prfcnt_info);
- csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx,
- &csf_info->prfcnt_info);
+ csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info);
/* The clock domain counts should not exceed the number of maximum
* number of clock regulators.
@@ -1988,14 +1832,12 @@ int kbase_hwcnt_backend_csf_metadata_init(
gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
gpu_info.prfcnt_values_per_block =
- csf_info->prfcnt_info.prfcnt_block_size /
- KBASE_HWCNT_VALUE_HW_BYTES;
+ csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set,
&csf_info->metadata);
}
-void kbase_hwcnt_backend_csf_metadata_term(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
@@ -2009,10 +1851,9 @@ void kbase_hwcnt_backend_csf_metadata_term(
}
}
-int kbase_hwcnt_backend_csf_create(
- struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
- struct kbase_hwcnt_watchdog_interface *watchdog_if,
- struct kbase_hwcnt_backend_interface *iface)
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ struct kbase_hwcnt_backend_interface *iface)
{
int errcode;
const struct kbase_hwcnt_backend_csf_info *info = NULL;
@@ -2024,8 +1865,7 @@ int kbase_hwcnt_backend_csf_create(
if (!is_power_of_2(ring_buf_cnt))
return -EINVAL;
- errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt,
- watchdog_if, &info);
+ errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info);
if (errcode)
return errcode;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
index e0cafbe..9c5a5c9 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,9 @@
#ifndef _KBASE_HWCNT_BACKEND_CSF_H_
#define _KBASE_HWCNT_BACKEND_CSF_H_
-#include "mali_kbase_hwcnt_backend.h"
-#include "mali_kbase_hwcnt_backend_csf_if.h"
-#include "mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
/**
* kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@@ -47,10 +47,9 @@
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_backend_csf_create(
- struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
- struct kbase_hwcnt_watchdog_interface *watchdog_if,
- struct kbase_hwcnt_backend_interface *iface);
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
@@ -58,16 +57,14 @@ int kbase_hwcnt_backend_csf_create(
* @iface: Non-NULL pointer to backend interface structure
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_backend_csf_metadata_init(
- struct kbase_hwcnt_backend_interface *iface);
+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF
* hardware counter backend.
* @iface: Non-NULL pointer to backend interface structure.
*/
-void kbase_hwcnt_backend_csf_metadata_term(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend
@@ -77,8 +74,7 @@ void kbase_hwcnt_backend_csf_metadata_term(
* Can be safely called on an all-zeroed interface, or on an already destroyed
* interface.
*/
-void kbase_hwcnt_backend_csf_destroy(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
@@ -86,8 +82,7 @@ void kbase_hwcnt_backend_csf_destroy(
* has been entered.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_protm_entered(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
@@ -95,8 +90,7 @@ void kbase_hwcnt_backend_csf_protm_entered(
* been exited.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_protm_exited(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
@@ -108,8 +102,7 @@ void kbase_hwcnt_backend_csf_protm_exited(
* with reset, or that may put HWC logic in state that could result in hang. For
* example, on bus error, or when FW becomes unresponsive.
*/
-void kbase_hwcnt_backend_csf_on_unrecoverable_error(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
@@ -119,16 +112,14 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(
* were in it.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_on_before_reset(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
* complete interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_on_prfcnt_sample(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter
@@ -136,31 +127,27 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
* interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer
* overflow interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled
* interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_on_prfcnt_enable(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter
* disabled interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
-void kbase_hwcnt_backend_csf_on_prfcnt_disable(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
index 24b26c2..382a3ad 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -85,8 +85,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
* held.
* @ctx: Non-NULL pointer to a CSF context.
*/
-typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef void
+kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
@@ -95,9 +95,8 @@ typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn(
* @flags: Pointer to the memory location that would store the previous
* interrupt state.
*/
-typedef void kbase_hwcnt_backend_csf_if_lock_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- unsigned long *flags);
+typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long *flags);
/**
* typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
@@ -106,9 +105,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn(
* @flags: Previously stored interrupt state when Scheduler interrupt
* spinlock was acquired.
*/
-typedef void kbase_hwcnt_backend_csf_if_unlock_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- unsigned long flags);
+typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long flags);
/**
* typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
@@ -137,10 +135,10 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
- void **cpu_dump_base,
- struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
+typedef int
+kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 buf_count, void **cpu_dump_base,
+ struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
@@ -159,10 +157,10 @@ typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(
* Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
* are correctly observed.
*/
-typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- u32 buf_index_first, u32 buf_index_last, bool for_cpu);
+typedef void
+kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ u32 buf_index_first, u32 buf_index_last, bool for_cpu);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
@@ -171,9 +169,9 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(
* @ctx: Non-NULL pointer to a CSF interface context.
* @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
*/
-typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
+typedef void
+kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
/**
* typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
@@ -183,8 +181,7 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn(
*
* Return: CSF interface timestamp in nanoseconds.
*/
-typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
@@ -195,10 +192,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- struct kbase_hwcnt_backend_csf_if_enable *enable);
+typedef void
+kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ struct kbase_hwcnt_backend_csf_if_enable *enable);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
@@ -207,8 +204,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn(
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
@@ -217,8 +213,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
@@ -231,9 +226,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
- u32 *insert_index);
+typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 *extract_index, u32 *insert_index);
/**
* typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
@@ -245,8 +239,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index);
+typedef void
+kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 extract_index);
/**
* typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
@@ -260,9 +255,9 @@ typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn(
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
- u64 clk_enable_map);
+typedef void
+kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u64 *cycle_counts, u64 clk_enable_map);
/**
* struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index b9920f3..9a409f6 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -26,24 +26,19 @@
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <device/mali_kbase_device.h>
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <csf/mali_kbase_csf_registers.h>
#include "csf/mali_kbase_csf_firmware.h"
-#include "mali_kbase_hwcnt_backend_csf_if_fw.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
#include "mali_kbase_hwaccess_time.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <linux/log2.h>
#include "mali_kbase_ccswe.h"
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
-
-/** The number of nanoseconds in a second. */
-#define NSECS_IN_SEC 1000000000ull /* ns */
/* Ring buffer virtual address start at 4GB */
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
@@ -90,8 +85,8 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx {
struct kbase_ccswe ccswe_shader_cores;
};
-static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+static void
+kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -104,9 +99,10 @@ static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
}
-static void
-kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- unsigned long *flags)
+static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long *flags)
+ __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
+ ctx->kbdev->csf.scheduler.interrupt_lock)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -119,8 +115,10 @@ kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
kbase_csf_scheduler_spin_lock(kbdev, flags);
}
-static void kbasep_hwcnt_backend_csf_if_fw_unlock(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
+static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long flags)
+ __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
+ ctx->kbdev->csf.scheduler.interrupt_lock)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -141,22 +139,19 @@ static void kbasep_hwcnt_backend_csf_if_fw_unlock(
* @clk_index: Clock index
* @clk_rate_hz: Clock frequency(hz)
*/
-static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
- struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
- u32 clk_rate_hz)
+static void
+kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
+ u32 clk_index, u32 clk_rate_hz)
{
- struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
- container_of(rate_listener,
- struct kbase_hwcnt_backend_csf_if_fw_ctx,
- rate_listener);
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
+ rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
u64 timestamp_ns;
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
return;
timestamp_ns = ktime_get_raw_ns();
- kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
- clk_rate_hz);
+ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
}
/**
@@ -165,17 +160,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
* @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
*/
-static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
- struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
+static void
+kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
+ u64 clk_enable_map)
{
struct kbase_device *kbdev = fw_ctx->kbdev;
- if (kbase_hwcnt_clk_enable_map_enabled(
- clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
/* software estimation for non-top clock domains */
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
- const struct kbase_clk_data *clk_data =
- rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
u32 cur_freq;
unsigned long flags;
u64 timestamp_ns;
@@ -186,11 +180,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
cur_freq = (u32)clk_data->clock_val;
kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
- kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
- timestamp_ns, cur_freq);
+ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
- kbase_clk_rate_trace_manager_subscribe_no_lock(
- rtm, &fw_ctx->rate_listener);
+ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
spin_unlock_irqrestore(&rtm->lock, flags);
}
@@ -203,17 +195,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
*
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
*/
-static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
- struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+static void
+kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
{
struct kbase_device *kbdev = fw_ctx->kbdev;
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
u64 clk_enable_map = fw_ctx->clk_enable_map;
- if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
- KBASE_CLOCK_DOMAIN_SHADER_CORES))
- kbase_clk_rate_trace_manager_unsubscribe(
- rtm, &fw_ctx->rate_listener);
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
+ kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
@@ -244,8 +234,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
u32 prfcnt_size;
u32 prfcnt_hw_size;
u32 prfcnt_fw_size;
- u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
- KBASE_HWCNT_VALUE_HW_BYTES;
+ u32 prfcnt_block_size =
+ KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
WARN_ON(!ctx);
WARN_ON(!prfcnt_info);
@@ -262,10 +252,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
*/
if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
GPU_ID2_PRODUCT_TTUX) {
- prfcnt_block_size =
- PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read(
- kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
- << 8;
+ prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
+ << 8;
}
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
@@ -280,17 +269,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
};
/* Block size must be multiple of counter size. */
- WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
- 0);
+ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
/* Total size must be multiple of block size. */
- WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
- 0);
+ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
#endif
}
static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
- void **cpu_dump_base,
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
{
struct kbase_device *kbdev;
@@ -359,10 +345,9 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
/* Update MMU table */
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- gpu_va_base >> PAGE_SHIFT, phys, num_pages,
- flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
- mmu_sync_info);
+ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
+ num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+ mmu_sync_info, NULL, false);
if (ret)
goto mmu_insert_failed;
@@ -380,17 +365,15 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
fw_ring_buf->as_nr = MCU_AS_NR;
*cpu_dump_base = fw_ring_buf->cpu_dump_base;
- *out_ring_buf =
- (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
+ *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
return 0;
mmu_insert_failed:
vunmap(cpu_addr);
vmap_error:
- kbase_mem_pool_free_pages(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
- phys, false, false);
+ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
+ false, false);
phys_mem_pool_alloc_error:
kfree(page_list);
page_list_alloc_error:
@@ -400,10 +383,10 @@ phys_alloc_error:
return -ENOMEM;
}
-static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- u32 buf_index_first, u32 buf_index_last, bool for_cpu)
+static void
+kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ u32 buf_index_first, u32 buf_index_last, bool for_cpu)
{
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
@@ -434,8 +417,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
* inclusive at both ends so full flushes are not 0 -> 0.
*/
ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
- ring_buf_index_last =
- (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
+ ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
/* The start address is the offset of the first buffer. */
start_address = fw_ctx->buf_bytes * ring_buf_index_first;
@@ -452,15 +434,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
struct page *pg = as_page(fw_ring_buf->phys[i]);
if (for_cpu) {
- kbase_sync_single_for_cpu(fw_ctx->kbdev,
- kbase_dma_addr(pg),
- PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
} else {
- kbase_sync_single_for_device(fw_ctx->kbdev,
- kbase_dma_addr(pg),
- PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
}
}
@@ -472,28 +450,24 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
struct page *pg = as_page(fw_ring_buf->phys[i]);
if (for_cpu) {
- kbase_sync_single_for_cpu(fw_ctx->kbdev,
- kbase_dma_addr(pg), PAGE_SIZE,
+ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
DMA_BIDIRECTIONAL);
} else {
- kbase_sync_single_for_device(fw_ctx->kbdev,
- kbase_dma_addr(pg),
- PAGE_SIZE,
+ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
DMA_BIDIRECTIONAL);
}
}
}
-static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
CSTD_UNUSED(ctx);
return ktime_get_raw_ns();
}
-static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
+static void
+kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
{
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
@@ -508,14 +482,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
- fw_ring_buf->num_pages, MCU_AS_NR));
+ fw_ring_buf->num_pages, fw_ring_buf->num_pages,
+ MCU_AS_NR, true));
vunmap(fw_ring_buf->cpu_dump_base);
- kbase_mem_pool_free_pages(
- &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- fw_ring_buf->num_pages, fw_ring_buf->phys, false,
- false);
+ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+ fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
kfree(fw_ring_buf->phys);
@@ -523,10 +496,10 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
}
}
-static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- struct kbase_hwcnt_backend_csf_if_enable *enable)
+static void
+kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ struct kbase_hwcnt_backend_csf_if_enable *enable)
{
u32 prfcnt_config;
struct kbase_device *kbdev;
@@ -549,8 +522,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
/* Configure the ring buffer base address */
- kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
- fw_ring_buf->as_nr);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
fw_ring_buf->gpu_dump_base & U32_MAX);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
@@ -560,38 +532,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
/* Configure the enable bitmap */
- kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
- enable->fe_bm);
- kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
- enable->shader_bm);
- kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
- enable->mmu_l2_bm);
- kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
- enable->tiler_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
/* Configure the HWC set and buffer size */
- kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
- prfcnt_config);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
kbdev->csf.hwcnt.enable_pending = true;
/* Unmask the interrupts */
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
- GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
/* Enable the HWC */
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
@@ -599,15 +562,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
- prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
- GLB_PRFCNT_CONFIG);
+ prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
- kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
- enable->clk_enable_map);
+ kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
}
-static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
@@ -622,20 +582,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
/* Disable the HWC */
kbdev->csf.hwcnt.enable_pending = true;
- kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
- GLB_REQ_PRFCNT_ENABLE_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
/* mask the interrupts */
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK, 0,
- GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK, 0,
- GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_ACK_IRQ_MASK, 0,
- GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
/* In case we have a previous request in flight when the disable
* happens.
@@ -645,8 +601,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
}
-static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
u32 glb_req;
struct kbase_device *kbdev;
@@ -669,9 +624,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
-static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
- u32 *insert_index)
+static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 *extract_index, u32 *insert_index)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
@@ -681,14 +635,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
WARN_ON(!insert_index);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
- *extract_index = kbase_csf_firmware_global_input_read(
- &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
- *insert_index = kbase_csf_firmware_global_output(
- &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
+ *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
+ GLB_PRFCNT_EXTRACT);
+ *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
+ GLB_PRFCNT_INSERT);
}
-static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
+static void
+kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 extract_idx)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
@@ -699,13 +654,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
/* Set the raw extract index to release the buffer back to the ring
* buffer.
*/
- kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
- GLB_PRFCNT_EXTRACT, extract_idx);
+ kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
+ extract_idx);
}
-static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
- struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
- u64 clk_enable_map)
+static void
+kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u64 *cycle_counts, u64 clk_enable_map)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
@@ -722,12 +677,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
/* Read cycle count for top clock domain. */
- kbase_backend_get_gpu_time_norequest(
- fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
+ kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
+ NULL, NULL);
} else {
/* Estimate cycle count for non-top clock domain. */
- cycle_counts[clk] = kbase_ccswe_cycle_at(
- &fw_ctx->ccswe_shader_cores, timestamp_ns);
+ cycle_counts[clk] =
+ kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
}
}
}
@@ -737,8 +692,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
*
* @fw_ctx: Pointer to context to destroy.
*/
-static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
- struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+static void
+kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
{
if (!fw_ctx)
return;
@@ -753,9 +708,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
* @out_ctx: Non-NULL pointer to where info is stored on success.
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
- struct kbase_device *kbdev,
- struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
+static int
+kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
{
u8 clk;
int errcode = -ENOMEM;
@@ -779,8 +734,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
ctx->clk_enable_map = 0;
kbase_ccswe_init(&ctx->ccswe_shader_cores);
- ctx->rate_listener.notify =
- kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
+ ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
*out_ctx = ctx;
@@ -790,8 +744,7 @@ error:
return errcode;
}
-void kbase_hwcnt_backend_csf_if_fw_destroy(
- struct kbase_hwcnt_backend_csf_if *if_fw)
+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
{
if (!if_fw)
return;
@@ -801,8 +754,8 @@ void kbase_hwcnt_backend_csf_if_fw_destroy(
memset(if_fw, 0, sizeof(*if_fw));
}
-int kbase_hwcnt_backend_csf_if_fw_create(
- struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_csf_if *if_fw)
{
int errcode;
struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
@@ -815,8 +768,7 @@ int kbase_hwcnt_backend_csf_if_fw_create(
return errcode;
if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
- if_fw->assert_lock_held =
- kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
+ if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
@@ -827,11 +779,9 @@ int kbase_hwcnt_backend_csf_if_fw_create(
if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
- if_fw->get_gpu_cycle_count =
- kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
+ if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
- if_fw->set_extract_index =
- kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
+ if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
return 0;
}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h
index b69668b..71d1506 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,7 @@
#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
-#include "mali_kbase_hwcnt_backend_csf_if.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
/**
* kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface
@@ -36,15 +36,14 @@
* creation success.
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_backend_csf_if_fw_create(
- struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw);
+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_csf_if *if_fw);
/**
* kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of
* hardware counter backend.
* @if_fw: Pointer to a CSF interface to destroy.
*/
-void kbase_hwcnt_backend_csf_if_fw_destroy(
- struct kbase_hwcnt_backend_csf_if *if_fw);
+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw);
#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 98019e7..8b3caac 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -19,18 +19,15 @@
*
*/
-#include "mali_kbase_hwcnt_backend_jm.h"
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include "mali_kbase.h"
#include "backend/gpu/mali_kbase_pm_ca.h"
#include "mali_kbase_hwaccess_instr.h"
#include "mali_kbase_hwaccess_time.h"
#include "mali_kbase_ccswe.h"
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include "backend/gpu/mali_kbase_model_dummy.h"
-#endif /* CONFIG_MALI_NO_MALI */
+#include "backend/gpu/mali_kbase_model_linux.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
@@ -136,9 +133,8 @@ struct kbase_hwcnt_backend_jm {
*
* Return: 0 on success, else error code.
*/
-static int
-kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
- struct kbase_hwcnt_gpu_info *info)
+static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
+ struct kbase_hwcnt_gpu_info *info)
{
size_t clk;
@@ -153,13 +149,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
{
const struct base_gpu_props *props = &kbdev->gpu_props.props;
const size_t l2_count = props->l2_props.num_l2_slices;
- const size_t core_mask =
- props->coherency_info.group[0].core_mask;
+ const size_t core_mask = props->coherency_info.group[0].core_mask;
info->l2_count = l2_count;
info->core_mask = core_mask;
- info->prfcnt_values_per_block =
- KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
}
#endif /* CONFIG_MALI_NO_MALI */
@@ -173,9 +167,8 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
return 0;
}
-static void kbasep_hwcnt_backend_jm_init_layout(
- const struct kbase_hwcnt_gpu_info *gpu_info,
- struct kbase_hwcnt_jm_physical_layout *phys_layout)
+static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info,
+ struct kbase_hwcnt_jm_physical_layout *phys_layout)
{
u8 shader_core_cnt;
@@ -189,32 +182,29 @@ static void kbasep_hwcnt_backend_jm_init_layout(
.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
.mmu_l2_cnt = gpu_info->l2_count,
.shader_cnt = shader_core_cnt,
- .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
- KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
gpu_info->l2_count + shader_core_cnt,
.shader_avail_mask = gpu_info->core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = gpu_info->prfcnt_values_per_block,
- .counters_per_block = gpu_info->prfcnt_values_per_block -
- KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .counters_per_block =
+ gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
};
}
-static void kbasep_hwcnt_backend_jm_dump_sample(
- const struct kbase_hwcnt_backend_jm *const backend_jm)
+static void
+kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm)
{
size_t block_idx;
const u32 *new_sample_buf = backend_jm->cpu_dump_va;
const u32 *new_block = new_sample_buf;
u64 *dst_buf = backend_jm->to_user_buf;
u64 *dst_block = dst_buf;
- const size_t values_per_block =
- backend_jm->phys_layout.values_per_block;
+ const size_t values_per_block = backend_jm->phys_layout.values_per_block;
const size_t dump_bytes = backend_jm->info->dump_bytes;
- for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt;
- block_idx++) {
+ for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) {
size_t ctr_idx;
for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
@@ -224,10 +214,8 @@ static void kbasep_hwcnt_backend_jm_dump_sample(
dst_block += values_per_block;
}
- WARN_ON(new_block !=
- new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
- WARN_ON(dst_block !=
- dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
}
/**
@@ -237,21 +225,18 @@ static void kbasep_hwcnt_backend_jm_dump_sample(
* @clk_index: Clock index
* @clk_rate_hz: Clock frequency(hz)
*/
-static void kbasep_hwcnt_backend_jm_on_freq_change(
- struct kbase_clk_rate_listener *rate_listener,
- u32 clk_index,
- u32 clk_rate_hz)
+static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
+ u32 clk_index, u32 clk_rate_hz)
{
- struct kbase_hwcnt_backend_jm *backend_jm = container_of(
- rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
u64 timestamp_ns;
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
return;
timestamp_ns = ktime_get_raw_ns();
- kbase_ccswe_freq_change(
- &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
}
/**
@@ -261,53 +246,42 @@ static void kbasep_hwcnt_backend_jm_on_freq_change(
* @enable_map: Non-NULL pointer to enable map specifying enabled counters.
* @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
*/
-static void kbasep_hwcnt_backend_jm_cc_enable(
- struct kbase_hwcnt_backend_jm *backend_jm,
- const struct kbase_hwcnt_enable_map *enable_map,
- u64 timestamp_ns)
+static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 timestamp_ns)
{
struct kbase_device *kbdev = backend_jm->kctx->kbdev;
u64 clk_enable_map = enable_map->clk_enable_map;
u64 cycle_count;
- if (kbase_hwcnt_clk_enable_map_enabled(
- clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
/* turn on the cycle counter */
kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
/* Read cycle count for top clock domain. */
- kbase_backend_get_gpu_time_norequest(
- kbdev, &cycle_count, NULL, NULL);
+ kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL);
- backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] =
- cycle_count;
+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count;
}
- if (kbase_hwcnt_clk_enable_map_enabled(
- clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
/* software estimation for non-top clock domains */
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
- const struct kbase_clk_data *clk_data =
- rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
u32 cur_freq;
unsigned long flags;
spin_lock_irqsave(&rtm->lock, flags);
- cur_freq = (u32) clk_data->clock_val;
+ cur_freq = (u32)clk_data->clock_val;
kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
- kbase_ccswe_freq_change(
- &backend_jm->ccswe_shader_cores,
- timestamp_ns,
- cur_freq);
+ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq);
- kbase_clk_rate_trace_manager_subscribe_no_lock(
- rtm, &backend_jm->rate_listener);
+ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener);
spin_unlock_irqrestore(&rtm->lock, flags);
/* ccswe was reset. The estimated cycle is zero. */
- backend_jm->prev_cycle_count[
- KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
}
/* Keep clk_enable_map for dump_request. */
@@ -319,28 +293,22 @@ static void kbasep_hwcnt_backend_jm_cc_enable(
*
* @backend_jm: Non-NULL pointer to backend.
*/
-static void kbasep_hwcnt_backend_jm_cc_disable(
- struct kbase_hwcnt_backend_jm *backend_jm)
+static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm)
{
struct kbase_device *kbdev = backend_jm->kctx->kbdev;
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
u64 clk_enable_map = backend_jm->clk_enable_map;
- if (kbase_hwcnt_clk_enable_map_enabled(
- clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
/* turn off the cycle counter */
kbase_pm_release_gpu_cycle_counter(kbdev);
}
- if (kbase_hwcnt_clk_enable_map_enabled(
- clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
-
- kbase_clk_rate_trace_manager_unsubscribe(
- rtm, &backend_jm->rate_listener);
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener);
}
}
-
/**
* kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
* current config information.
@@ -356,38 +324,33 @@ static void kbasep_hwcnt_backend_jm_cc_disable(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_gpu_update_curr_config(
- struct kbase_device *kbdev,
- struct kbase_hwcnt_curr_config *curr_config)
+static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev,
+ struct kbase_hwcnt_curr_config *curr_config)
{
if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
return -EINVAL;
lockdep_assert_held(&kbdev->hwaccess_lock);
- curr_config->num_l2_slices =
- kbdev->gpu_props.curr_config.l2_slices;
- curr_config->shader_present =
- kbdev->gpu_props.curr_config.shader_present;
+ curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices;
+ curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present;
return 0;
}
/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
-static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
- struct kbase_hwcnt_backend *backend)
+static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend)
{
(void)backend;
return ktime_get_raw_ns();
}
/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
-static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
+static int
+kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_context *kctx;
struct kbase_device *kbdev;
struct kbase_hwcnt_physical_enable_map phys_enable_map;
@@ -406,8 +369,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
- kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
- backend_jm->info->counter_set);
+ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
enable.fe_bm = phys_enable_map.fe_bm;
enable.shader_bm = phys_enable_map.shader_bm;
@@ -425,8 +387,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
/* Update the current configuration information. */
- errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
- &backend_jm->curr_config);
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
if (errcode)
goto error;
@@ -446,14 +407,12 @@ error:
}
/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
-static int kbasep_hwcnt_backend_jm_dump_enable(
- struct kbase_hwcnt_backend *backend,
- const struct kbase_hwcnt_enable_map *enable_map)
+static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
unsigned long flags;
int errcode;
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_device *kbdev;
if (!backend_jm)
@@ -463,8 +422,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable(
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(
- backend, enable_map);
+ errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -472,12 +430,10 @@ static int kbasep_hwcnt_backend_jm_dump_enable(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void kbasep_hwcnt_backend_jm_dump_disable(
- struct kbase_hwcnt_backend *backend)
+static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
{
int errcode;
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (WARN_ON(!backend_jm) || !backend_jm->enabled)
return;
@@ -491,11 +447,9 @@ static void kbasep_hwcnt_backend_jm_dump_disable(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
-static int kbasep_hwcnt_backend_jm_dump_clear(
- struct kbase_hwcnt_backend *backend)
+static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend)
{
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (!backend_jm || !backend_jm->enabled)
return -EINVAL;
@@ -504,12 +458,10 @@ static int kbasep_hwcnt_backend_jm_dump_clear(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
-static int kbasep_hwcnt_backend_jm_dump_request(
- struct kbase_hwcnt_backend *backend,
- u64 *dump_time_ns)
+static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns)
{
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_device *kbdev;
const struct kbase_hwcnt_metadata *metadata;
u64 current_cycle_count;
@@ -528,28 +480,25 @@ static int kbasep_hwcnt_backend_jm_dump_request(
*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
- kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
- if (!kbase_hwcnt_clk_enable_map_enabled(
- backend_jm->clk_enable_map, clk))
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
continue;
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
/* Read cycle count for top clock domain. */
- kbase_backend_get_gpu_time_norequest(
- kbdev, &current_cycle_count,
- NULL, NULL);
+ kbase_backend_get_gpu_time_norequest(kbdev, &current_cycle_count,
+ NULL, NULL);
} else {
/*
* Estimate cycle count for non-top clock
* domain.
*/
current_cycle_count = kbase_ccswe_cycle_at(
- &backend_jm->ccswe_shader_cores,
- *dump_time_ns);
+ &backend_jm->ccswe_shader_cores, *dump_time_ns);
}
backend_jm->cycle_count_elapsed[clk] =
- current_cycle_count -
- backend_jm->prev_cycle_count[clk];
+ current_cycle_count - backend_jm->prev_cycle_count[clk];
/*
* Keep the current cycle count for later calculation.
@@ -563,11 +512,9 @@ static int kbasep_hwcnt_backend_jm_dump_request(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
-static int kbasep_hwcnt_backend_jm_dump_wait(
- struct kbase_hwcnt_backend *backend)
+static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
{
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (!backend_jm || !backend_jm->enabled)
return -EINVAL;
@@ -576,14 +523,12 @@ static int kbasep_hwcnt_backend_jm_dump_wait(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
-static int kbasep_hwcnt_backend_jm_dump_get(
- struct kbase_hwcnt_backend *backend,
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- bool accumulate)
+static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
{
- struct kbase_hwcnt_backend_jm *backend_jm =
- (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
size_t clk;
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
struct kbase_device *kbdev;
@@ -597,16 +542,15 @@ static int kbasep_hwcnt_backend_jm_dump_get(
return -EINVAL;
/* Invalidate the kernel buffer before reading from it. */
- kbase_sync_mem_regions(
- backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
+ kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
/* Dump sample to the internal 64-bit user buffer. */
kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
/* Extract elapsed cycle count for each clock domain if enabled. */
- kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
- if (!kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk))
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
+ {
+ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
continue;
/* Reset the counter to zero if accumulation is off. */
@@ -621,17 +565,16 @@ static int kbasep_hwcnt_backend_jm_dump_get(
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Update the current configuration information. */
- errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
- &backend_jm->curr_config);
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (errcode)
return errcode;
#endif /* CONFIG_MALI_NO_MALI */
- return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf,
- dst_enable_map, backend_jm->pm_core_mask,
- &backend_jm->curr_config, accumulate);
+ return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
+ backend_jm->pm_core_mask, &backend_jm->curr_config,
+ accumulate);
}
/**
@@ -643,10 +586,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_backend_jm_dump_alloc(
- const struct kbase_hwcnt_backend_jm_info *info,
- struct kbase_context *kctx,
- u64 *gpu_dump_va)
+static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info,
+ struct kbase_context *kctx, u64 *gpu_dump_va)
{
struct kbase_va_region *reg;
u64 flags;
@@ -661,16 +602,12 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
WARN_ON(!kctx);
WARN_ON(!gpu_dump_va);
- flags = BASE_MEM_PROT_CPU_RD |
- BASE_MEM_PROT_GPU_WR |
- BASEP_MEM_PERMANENT_KERNEL_MAPPING |
- BASE_MEM_CACHED_CPU |
- BASE_MEM_UNCACHED_GPU;
+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+ BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU;
nr_pages = PFN_UP(info->dump_bytes);
- reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va,
- mmu_sync_info);
+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info);
if (!reg)
return -ENOMEM;
@@ -683,9 +620,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
* @kctx: Non-NULL pointer to kbase context.
* @gpu_dump_va: GPU dump buffer virtual address.
*/
-static void kbasep_hwcnt_backend_jm_dump_free(
- struct kbase_context *kctx,
- u64 gpu_dump_va)
+static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va)
{
WARN_ON(!kctx);
if (gpu_dump_va)
@@ -698,8 +633,7 @@ static void kbasep_hwcnt_backend_jm_dump_free(
*
* Can be safely called on a backend in any state of partial construction.
*/
-static void kbasep_hwcnt_backend_jm_destroy(
- struct kbase_hwcnt_backend_jm *backend)
+static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend)
{
if (!backend)
return;
@@ -712,8 +646,7 @@ static void kbasep_hwcnt_backend_jm_destroy(
kbase_phy_alloc_mapping_put(kctx, backend->vmap);
if (backend->gpu_dump_va)
- kbasep_hwcnt_backend_jm_dump_free(
- kctx, backend->gpu_dump_va);
+ kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va);
kbasep_js_release_privileged_ctx(kbdev, kctx);
kbase_destroy_context(kctx);
@@ -731,9 +664,8 @@ static void kbasep_hwcnt_backend_jm_destroy(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_backend_jm_create(
- const struct kbase_hwcnt_backend_jm_info *info,
- struct kbase_hwcnt_backend_jm **out_backend)
+static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info,
+ struct kbase_hwcnt_backend_jm **out_backend)
{
int errcode;
struct kbase_device *kbdev;
@@ -749,28 +681,25 @@ static int kbasep_hwcnt_backend_jm_create(
goto alloc_error;
backend->info = info;
- kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info,
- &backend->phys_layout);
+ kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout);
backend->kctx = kbase_create_context(kbdev, true,
- BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
if (!backend->kctx)
goto alloc_error;
kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
- errcode = kbasep_hwcnt_backend_jm_dump_alloc(
- info, backend->kctx, &backend->gpu_dump_va);
+ errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va);
if (errcode)
goto error;
- backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
- backend->gpu_dump_va, &backend->vmap);
+ backend->cpu_dump_va =
+ kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap);
if (!backend->cpu_dump_va || !backend->vmap)
goto alloc_error;
- backend->to_user_buf =
- kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
+ backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
if (!backend->to_user_buf)
goto alloc_error;
@@ -798,9 +727,8 @@ kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
}
/* JM backend implementation of kbase_hwcnt_backend_init_fn */
-static int kbasep_hwcnt_backend_jm_init(
- const struct kbase_hwcnt_backend_info *info,
- struct kbase_hwcnt_backend **out_backend)
+static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
{
int errcode;
struct kbase_hwcnt_backend_jm *backend = NULL;
@@ -808,8 +736,8 @@ static int kbasep_hwcnt_backend_jm_init(
if (!info || !out_backend)
return -EINVAL;
- errcode = kbasep_hwcnt_backend_jm_create(
- (const struct kbase_hwcnt_backend_jm_info *) info, &backend);
+ errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info,
+ &backend);
if (errcode)
return errcode;
@@ -825,8 +753,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
return;
kbasep_hwcnt_backend_jm_dump_disable(backend);
- kbasep_hwcnt_backend_jm_destroy(
- (struct kbase_hwcnt_backend_jm *)backend);
+ kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
}
/**
@@ -835,8 +762,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
*
* Can be safely called on a backend info in any state of partial construction.
*/
-static void kbasep_hwcnt_backend_jm_info_destroy(
- const struct kbase_hwcnt_backend_jm_info *info)
+static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info)
{
if (!info)
return;
@@ -852,9 +778,8 @@ static void kbasep_hwcnt_backend_jm_info_destroy(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_backend_jm_info_create(
- struct kbase_device *kbdev,
- const struct kbase_hwcnt_backend_jm_info **out_info)
+static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev,
+ const struct kbase_hwcnt_backend_jm_info **out_info)
{
int errcode = -ENOMEM;
struct kbase_hwcnt_backend_jm_info *info = NULL;
@@ -877,15 +802,12 @@ static int kbasep_hwcnt_backend_jm_info_create(
info->counter_set = KBASE_HWCNT_SET_PRIMARY;
#endif
- errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev,
- &info->hwcnt_gpu_info);
+ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info);
if (errcode)
goto error;
- errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info,
- info->counter_set,
- &info->metadata,
- &info->dump_bytes);
+ errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set,
+ &info->metadata, &info->dump_bytes);
if (errcode)
goto error;
@@ -897,9 +819,8 @@ error:
return errcode;
}
-int kbase_hwcnt_backend_jm_create(
- struct kbase_device *kbdev,
- struct kbase_hwcnt_backend_interface *iface)
+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface)
{
int errcode;
const struct kbase_hwcnt_backend_jm_info *info = NULL;
@@ -928,8 +849,7 @@ int kbase_hwcnt_backend_jm_create(
return 0;
}
-void kbase_hwcnt_backend_jm_destroy(
- struct kbase_hwcnt_backend_interface *iface)
+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface)
{
if (!iface)
return;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h
index 1bc3906..4a6293c 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,7 @@
#ifndef _KBASE_HWCNT_BACKEND_JM_H_
#define _KBASE_HWCNT_BACKEND_JM_H_
-#include "mali_kbase_hwcnt_backend.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
struct kbase_device;
@@ -42,9 +42,8 @@ struct kbase_device;
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_backend_jm_create(
- struct kbase_device *kbdev,
- struct kbase_hwcnt_backend_interface *iface);
+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
@@ -54,7 +53,6 @@ int kbase_hwcnt_backend_jm_create(
* Can be safely called on an all-zeroed interface, or on an already destroyed
* interface.
*/
-void kbase_hwcnt_backend_jm_destroy(
- struct kbase_hwcnt_backend_interface *iface);
+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface);
#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
index 3d786ca..a8654ea 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -21,11 +21,12 @@
#include <mali_kbase.h>
-#include <mali_kbase_hwcnt_gpu.h>
-#include <mali_kbase_hwcnt_types.h>
+#include <hwcnt/mali_kbase_hwcnt_gpu.h>
+#include <hwcnt/mali_kbase_hwcnt_types.h>
-#include <mali_kbase_hwcnt_backend.h>
-#include <mali_kbase_hwcnt_watchdog_if.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
/* Backend watch dog timer interval in milliseconds: 18 seconds. */
@@ -118,8 +119,7 @@ enum backend_watchdog_state {
*/
enum wd_init_state {
HWCNT_JM_WD_INIT_START,
- HWCNT_JM_WD_INIT_ALLOC = HWCNT_JM_WD_INIT_START,
- HWCNT_JM_WD_INIT_BACKEND,
+ HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START,
HWCNT_JM_WD_INIT_ENABLE_MAP,
HWCNT_JM_WD_INIT_DUMP_BUFFER,
HWCNT_JM_WD_INIT_END
@@ -296,16 +296,10 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
if (!wd_backend)
return;
- /* disable timer thread to avoid concurrent access to shared resources */
- wd_backend->info->dump_watchdog_iface->disable(
- wd_backend->info->dump_watchdog_iface->timer);
+ WARN_ON(state > HWCNT_JM_WD_INIT_END);
- /*will exit the loop when state reaches HWCNT_JM_WD_INIT_START*/
while (state-- > HWCNT_JM_WD_INIT_START) {
switch (state) {
- case HWCNT_JM_WD_INIT_ALLOC:
- kfree(wd_backend);
- break;
case HWCNT_JM_WD_INIT_BACKEND:
wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend);
break;
@@ -319,6 +313,8 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
break;
}
}
+
+ kfree(wd_backend);
}
/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
@@ -326,11 +322,17 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
*/
static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend)
{
+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend =
+ (struct kbase_hwcnt_backend_jm_watchdog *)backend;
+
if (!backend)
return;
- kbasep_hwcnt_backend_jm_watchdog_term_partial(
- (struct kbase_hwcnt_backend_jm_watchdog *)backend, HWCNT_JM_WD_INIT_END);
+ /* disable timer thread to avoid concurrent access to shared resources */
+ wd_backend->info->dump_watchdog_iface->disable(
+ wd_backend->info->dump_watchdog_iface->timer);
+
+ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END);
}
/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */
@@ -350,20 +352,20 @@ static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backen
jm_info = wd_info->jm_backend_iface->info;
metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+ wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
+ if (!wd_backend) {
+ *out_backend = NULL;
+ return -ENOMEM;
+ }
+
+ *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
+ .info = wd_info,
+ .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
+ .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false }
+ };
+
while (state < HWCNT_JM_WD_INIT_END && !errcode) {
switch (state) {
- case HWCNT_JM_WD_INIT_ALLOC:
- wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
- if (wd_backend) {
- *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
- .info = wd_info,
- .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
- .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
- .is_enabled = false }
- };
- } else
- errcode = -ENOMEM;
- break;
case HWCNT_JM_WD_INIT_BACKEND:
errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend);
break;
@@ -823,5 +825,5 @@ void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interfac
kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info);
/*blanking the watchdog backend interface*/
- *iface = (struct kbase_hwcnt_backend_interface){ NULL };
+ memset(iface, 0, sizeof(*iface));
}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h
index 5021b4f..02a7952 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,8 +32,8 @@
#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
-#include <mali_kbase_hwcnt_backend.h>
-#include <mali_kbase_hwcnt_watchdog_if.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
/**
* kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
index a54f005..34deb5d 100644
--- a/mali_kbase/mali_kbase_hwcnt.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,10 +23,10 @@
* Implementation of hardware counter context and accumulator APIs.
*/
-#include "mali_kbase_hwcnt_context.h"
-#include "mali_kbase_hwcnt_accumulator.h"
-#include "mali_kbase_hwcnt_backend.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/mutex.h>
#include <linux/spinlock.h>
@@ -39,11 +39,7 @@
* @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are
* any enabled counters.
*/
-enum kbase_hwcnt_accum_state {
- ACCUM_STATE_ERROR,
- ACCUM_STATE_DISABLED,
- ACCUM_STATE_ENABLED
-};
+enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED };
/**
* struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
@@ -130,9 +126,8 @@ struct kbase_hwcnt_context {
struct workqueue_struct *wq;
};
-int kbase_hwcnt_context_init(
- const struct kbase_hwcnt_backend_interface *iface,
- struct kbase_hwcnt_context **out_hctx)
+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
+ struct kbase_hwcnt_context **out_hctx)
{
struct kbase_hwcnt_context *hctx = NULL;
@@ -149,8 +144,7 @@ int kbase_hwcnt_context_init(
mutex_init(&hctx->accum_lock);
hctx->accum_inited = false;
- hctx->wq =
- alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
+ hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
if (!hctx->wq)
goto err_alloc_workqueue;
@@ -208,35 +202,30 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
WARN_ON(!hctx);
WARN_ON(!hctx->accum_inited);
- errcode = hctx->iface->init(
- hctx->iface->info, &hctx->accum.backend);
+ errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend);
if (errcode)
goto error;
hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info);
hctx->accum.state = ACCUM_STATE_ERROR;
- errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
- &hctx->accum.enable_map);
+ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map);
if (errcode)
goto error;
hctx->accum.enable_map_any_enabled = false;
- errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata,
- &hctx->accum.accum_buf);
+ errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf);
if (errcode)
goto error;
- errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
- &hctx->accum.scratch_map);
+ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map);
if (errcode)
goto error;
hctx->accum.accumulated = false;
- hctx->accum.ts_last_dump_ns =
- hctx->iface->timestamp_ns(hctx->accum.backend);
+ hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend);
return 0;
@@ -252,8 +241,7 @@ error:
* @hctx: Non-NULL pointer to hardware counter context.
* @accumulate: True if we should accumulate before disabling, else false.
*/
-static void kbasep_hwcnt_accumulator_disable(
- struct kbase_hwcnt_context *hctx, bool accumulate)
+static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
{
int errcode = 0;
bool backend_enabled = false;
@@ -272,8 +260,7 @@ static void kbasep_hwcnt_accumulator_disable(
WARN_ON(hctx->disable_count != 0);
WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
- if ((hctx->accum.state == ACCUM_STATE_ENABLED) &&
- (accum->enable_map_any_enabled))
+ if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled))
backend_enabled = true;
if (!backend_enabled)
@@ -297,8 +284,8 @@ static void kbasep_hwcnt_accumulator_disable(
if (errcode)
goto disable;
- errcode = hctx->iface->dump_get(accum->backend,
- &accum->accum_buf, &accum->enable_map, accum->accumulated);
+ errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map,
+ accum->accumulated);
if (errcode)
goto disable;
@@ -336,8 +323,7 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
/* The backend only needs enabling if any counters are enabled */
if (accum->enable_map_any_enabled)
- errcode = hctx->iface->dump_enable_nolock(
- accum->backend, &accum->enable_map);
+ errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map);
if (!errcode)
accum->state = ACCUM_STATE_ENABLED;
@@ -364,12 +350,9 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_accumulator_dump(
- struct kbase_hwcnt_context *hctx,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf,
- const struct kbase_hwcnt_enable_map *new_map)
+static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns,
+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf,
+ const struct kbase_hwcnt_enable_map *new_map)
{
int errcode = 0;
unsigned long flags;
@@ -379,7 +362,7 @@ static int kbasep_hwcnt_accumulator_dump(
bool cur_map_any_enabled;
struct kbase_hwcnt_enable_map *cur_map;
bool new_map_any_enabled = false;
- u64 dump_time_ns;
+ u64 dump_time_ns = 0;
struct kbase_hwcnt_accumulator *accum;
WARN_ON(!hctx);
@@ -398,8 +381,7 @@ static int kbasep_hwcnt_accumulator_dump(
kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
if (new_map)
- new_map_any_enabled =
- kbase_hwcnt_enable_map_any_enabled(new_map);
+ new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map);
/*
* We're holding accum_lock, so the accumulator state might transition
@@ -426,8 +408,7 @@ static int kbasep_hwcnt_accumulator_dump(
* then we'll do it ourselves after the dump.
*/
if (new_map) {
- kbase_hwcnt_enable_map_copy(
- &accum->enable_map, new_map);
+ kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map);
accum->enable_map_any_enabled = new_map_any_enabled;
}
@@ -440,12 +421,10 @@ static int kbasep_hwcnt_accumulator_dump(
/* Initiate the dump if the backend is enabled. */
if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
if (dump_buf) {
- errcode = hctx->iface->dump_request(
- accum->backend, &dump_time_ns);
+ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
dump_requested = true;
} else {
- dump_time_ns = hctx->iface->timestamp_ns(
- accum->backend);
+ dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
errcode = hctx->iface->dump_clear(accum->backend);
}
@@ -457,8 +436,7 @@ static int kbasep_hwcnt_accumulator_dump(
/* Copy any accumulation into the dest buffer */
if (accum->accumulated && dump_buf) {
- kbase_hwcnt_dump_buffer_copy(
- dump_buf, &accum->accum_buf, cur_map);
+ kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map);
dump_written = true;
}
@@ -483,8 +461,7 @@ static int kbasep_hwcnt_accumulator_dump(
* we're already enabled and holding accum_lock is impossible.
*/
if (new_map_any_enabled) {
- errcode = hctx->iface->dump_enable(
- accum->backend, new_map);
+ errcode = hctx->iface->dump_enable(accum->backend, new_map);
if (errcode)
goto error;
}
@@ -495,11 +472,8 @@ static int kbasep_hwcnt_accumulator_dump(
/* If we dumped, copy or accumulate it into the destination */
if (dump_requested) {
WARN_ON(state != ACCUM_STATE_ENABLED);
- errcode = hctx->iface->dump_get(
- accum->backend,
- dump_buf,
- cur_map,
- dump_written);
+ errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map,
+ dump_written);
if (errcode)
goto error;
dump_written = true;
@@ -540,8 +514,7 @@ error:
* @hctx: Non-NULL pointer to hardware counter context.
* @accumulate: True if we should accumulate before disabling, else false.
*/
-static void kbasep_hwcnt_context_disable(
- struct kbase_hwcnt_context *hctx, bool accumulate)
+static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
{
unsigned long flags;
@@ -563,9 +536,8 @@ static void kbasep_hwcnt_context_disable(
}
}
-int kbase_hwcnt_accumulator_acquire(
- struct kbase_hwcnt_context *hctx,
- struct kbase_hwcnt_accumulator **accum)
+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_accumulator **accum)
{
int errcode = 0;
unsigned long flags;
@@ -618,9 +590,7 @@ int kbase_hwcnt_accumulator_acquire(
* Regardless of initial state, counters don't need to be enabled via
* the backend, as the initial enable map has no enabled counters.
*/
- hctx->accum.state = (hctx->disable_count == 0) ?
- ACCUM_STATE_ENABLED :
- ACCUM_STATE_DISABLED;
+ hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED;
spin_unlock_irqrestore(&hctx->state_lock, flags);
@@ -728,8 +698,7 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
spin_unlock_irqrestore(&hctx->state_lock, flags);
}
-const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
- struct kbase_hwcnt_context *hctx)
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx)
{
if (!hctx)
return NULL;
@@ -737,8 +706,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
return hctx->iface->metadata(hctx->iface->info);
}
-bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
- struct work_struct *work)
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work)
{
if (WARN_ON(!hctx) || WARN_ON(!work))
return false;
@@ -746,12 +714,10 @@ bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
return queue_work(hctx->wq, work);
}
-int kbase_hwcnt_accumulator_set_counters(
- struct kbase_hwcnt_accumulator *accum,
- const struct kbase_hwcnt_enable_map *new_map,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
+ const struct kbase_hwcnt_enable_map *new_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_context *hctx;
@@ -767,19 +733,15 @@ int kbase_hwcnt_accumulator_set_counters(
mutex_lock(&hctx->accum_lock);
- errcode = kbasep_hwcnt_accumulator_dump(
- hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
+ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
mutex_unlock(&hctx->accum_lock);
return errcode;
}
-int kbase_hwcnt_accumulator_dump(
- struct kbase_hwcnt_accumulator *accum,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_context *hctx;
@@ -794,8 +756,7 @@ int kbase_hwcnt_accumulator_dump(
mutex_lock(&hctx->accum_lock);
- errcode = kbasep_hwcnt_accumulator_dump(
- hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
+ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
mutex_unlock(&hctx->accum_lock);
diff --git a/mali_kbase/mali_kbase_hwcnt_accumulator.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h
index af542ea..069e020 100644
--- a/mali_kbase/mali_kbase_hwcnt_accumulator.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -67,9 +67,8 @@ struct kbase_hwcnt_dump_buffer;
*
* Return: 0 on success or error code.
*/
-int kbase_hwcnt_accumulator_acquire(
- struct kbase_hwcnt_context *hctx,
- struct kbase_hwcnt_accumulator **accum);
+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_accumulator **accum);
/**
* kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
@@ -102,12 +101,10 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
*
* Return: 0 on success or error code.
*/
-int kbase_hwcnt_accumulator_set_counters(
- struct kbase_hwcnt_accumulator *accum,
- const struct kbase_hwcnt_enable_map *new_map,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf);
+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
+ const struct kbase_hwcnt_enable_map *new_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
@@ -127,11 +124,8 @@ int kbase_hwcnt_accumulator_set_counters(
*
* Return: 0 on success or error code.
*/
-int kbase_hwcnt_accumulator_dump(
- struct kbase_hwcnt_accumulator *accum,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf);
+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend
diff --git a/mali_kbase/mali_kbase_hwcnt_context.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h
index 34423d1..89732a9 100644
--- a/mali_kbase/mali_kbase_hwcnt_context.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,9 +43,8 @@ struct kbase_hwcnt_context;
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_context_init(
- const struct kbase_hwcnt_backend_interface *iface,
- struct kbase_hwcnt_context **out_hctx);
+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
+ struct kbase_hwcnt_context **out_hctx);
/**
* kbase_hwcnt_context_term() - Terminate a hardware counter context.
@@ -61,8 +60,7 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
*
* Return: Non-NULL pointer to metadata, or NULL on error.
*/
-const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
- struct kbase_hwcnt_context *hctx);
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx);
/**
* kbase_hwcnt_context_disable() - Increment the disable count of the context.
@@ -145,7 +143,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
* this meant progress through the power management states could be stalled
* for however long that higher priority thread took.
*/
-bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
- struct work_struct *work);
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work);
#endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
index 5f5c36f..74916da 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -19,8 +19,8 @@
*
*/
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/err.h>
@@ -32,8 +32,7 @@ enum enable_map_idx {
EM_COUNT,
};
-static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
- bool is_csf)
+static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@@ -56,8 +55,7 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
}
}
-static void kbasep_get_tiler_block_type(u64 *dst,
- enum kbase_hwcnt_set counter_set)
+static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@@ -72,8 +70,7 @@ static void kbasep_get_tiler_block_type(u64 *dst,
}
}
-static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
- bool is_csf)
+static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@@ -93,8 +90,7 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
}
}
-static void kbasep_get_memsys_block_type(u64 *dst,
- enum kbase_hwcnt_set counter_set)
+static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@@ -122,15 +118,14 @@ static void kbasep_get_memsys_block_type(u64 *dst,
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_backend_gpu_metadata_create(
- const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf,
- enum kbase_hwcnt_set counter_set,
- const struct kbase_hwcnt_metadata **metadata)
+static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+ const bool is_csf,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **metadata)
{
struct kbase_hwcnt_description desc;
struct kbase_hwcnt_group_description group;
- struct kbase_hwcnt_block_description
- blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
size_t non_sc_block_count;
size_t sc_block_count;
@@ -156,22 +151,19 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
blks[0].inst_cnt = 1;
blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block -
- KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
/* One Tiler block */
kbasep_get_tiler_block_type(&blks[1].type, counter_set);
blks[1].inst_cnt = 1;
blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block -
- KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
/* l2_count memsys blks */
kbasep_get_memsys_block_type(&blks[2].type, counter_set);
blks[2].inst_cnt = gpu_info->l2_count;
blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block -
- KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
/*
* There are as many shader cores in the system as there are bits set in
@@ -192,8 +184,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
blks[3].inst_cnt = sc_block_count;
blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block -
- KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
@@ -220,8 +211,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
*
* Return: Size of buffer the GPU needs to perform a counter dump.
*/
-static size_t
-kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
+static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
{
WARN_ON(!gpu_info);
@@ -229,11 +219,10 @@ kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
}
-int kbase_hwcnt_jm_metadata_create(
- const struct kbase_hwcnt_gpu_info *gpu_info,
- enum kbase_hwcnt_set counter_set,
- const struct kbase_hwcnt_metadata **out_metadata,
- size_t *out_dump_bytes)
+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata,
+ size_t *out_dump_bytes)
{
int errcode;
const struct kbase_hwcnt_metadata *metadata;
@@ -250,8 +239,7 @@ int kbase_hwcnt_jm_metadata_create(
* all the available L2 cache and Shader cores are allocated.
*/
dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
- errcode = kbasep_hwcnt_backend_gpu_metadata_create(
- gpu_info, false, counter_set, &metadata);
+ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
if (errcode)
return errcode;
@@ -276,10 +264,9 @@ void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata
kbase_hwcnt_metadata_destroy(metadata);
}
-int kbase_hwcnt_csf_metadata_create(
- const struct kbase_hwcnt_gpu_info *gpu_info,
- enum kbase_hwcnt_set counter_set,
- const struct kbase_hwcnt_metadata **out_metadata)
+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata)
{
int errcode;
const struct kbase_hwcnt_metadata *metadata;
@@ -287,8 +274,7 @@ int kbase_hwcnt_csf_metadata_create(
if (!gpu_info || !out_metadata)
return -EINVAL;
- errcode = kbasep_hwcnt_backend_gpu_metadata_create(
- gpu_info, true, counter_set, &metadata);
+ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
if (errcode)
return errcode;
@@ -297,8 +283,7 @@ int kbase_hwcnt_csf_metadata_create(
return 0;
}
-void kbase_hwcnt_csf_metadata_destroy(
- const struct kbase_hwcnt_metadata *metadata)
+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
{
if (!metadata)
return;
@@ -306,10 +291,7 @@ void kbase_hwcnt_csf_metadata_destroy(
kbase_hwcnt_metadata_destroy(metadata);
}
-static bool is_block_type_shader(
- const u64 grp_type,
- const u64 blk_type,
- const size_t blk)
+static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
{
bool is_shader = false;
@@ -326,9 +308,7 @@ static bool is_block_type_shader(
return is_shader;
}
-static bool is_block_type_l2_cache(
- const u64 grp_type,
- const u64 blk_type)
+static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
{
bool is_l2_cache = false;
@@ -348,10 +328,8 @@ static bool is_block_type_l2_cache(
}
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- u64 pm_core_mask,
- const struct kbase_hwcnt_curr_config *curr_config,
- bool accumulate)
+ const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
+ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
@@ -362,28 +340,21 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/* Variables to deal with the current configuration */
int l2_count = 0;
- if (!dst || !src || !dst_enable_map ||
- (dst_enable_map->metadata != dst->metadata))
+ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(
- metadata, grp, blk, blk_inst) {
- const size_t hdr_cnt =
- kbase_hwcnt_metadata_block_headers_count(
- metadata, grp, blk);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
const size_t ctr_cnt =
- kbase_hwcnt_metadata_block_counters_count(
- metadata, grp, blk);
- const u64 blk_type = kbase_hwcnt_metadata_block_type(
- metadata, grp, blk);
+ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
const bool is_shader_core = is_block_type_shader(
- kbase_hwcnt_metadata_group_type(metadata, grp),
- blk_type, blk);
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
const bool is_l2_cache = is_block_type_l2_cache(
- kbase_hwcnt_metadata_group_type(metadata, grp),
- blk_type);
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
bool hw_res_available = true;
@@ -412,10 +383,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/*
* Skip block if no values in the destination block are enabled.
*/
- if (kbase_hwcnt_enable_map_block_enabled(
- dst_enable_map, grp, blk, blk_inst)) {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
+ u64 *dst_blk =
+ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
bool blk_powered;
@@ -435,13 +405,11 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
if (blk_powered && !is_undefined && hw_res_available) {
/* Only powered and defined blocks have valid data. */
if (accumulate) {
- kbase_hwcnt_dump_buffer_block_accumulate(
- dst_blk, src_blk, hdr_cnt,
- ctr_cnt);
+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
+ hdr_cnt, ctr_cnt);
} else {
- kbase_hwcnt_dump_buffer_block_copy(
- dst_blk, src_blk,
- (hdr_cnt + ctr_cnt));
+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
+ (hdr_cnt + ctr_cnt));
}
} else {
/* Even though the block might be undefined, the
@@ -469,26 +437,23 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
}
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- bool accumulate)
+ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
const u64 *dump_src = src;
size_t src_offset = 0;
size_t grp, blk, blk_inst;
- if (!dst || !src || !dst_enable_map ||
- (dst_enable_map->metadata != dst->metadata))
+ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
- metadata, grp, blk);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
const size_t ctr_cnt =
- kbase_hwcnt_metadata_block_counters_count(metadata, grp,
- blk);
+ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
@@ -496,10 +461,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/*
* Skip block if no values in the destination block are enabled.
*/
- if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp,
- blk, blk_inst)) {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
+ u64 *dst_blk =
+ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
if (!is_undefined) {
@@ -542,12 +506,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
* @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction
* will be stored.
*/
-static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
- u32 phys,
- u64 *lo,
- u64 *hi)
+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
{
- u64 dwords[2] = {0, 0};
+ u64 dwords[2] = { 0, 0 };
size_t dword_idx;
@@ -572,9 +533,8 @@ static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
*hi = dwords[1];
}
-void kbase_hwcnt_gpu_enable_map_to_physical(
- struct kbase_hwcnt_physical_enable_map *dst,
- const struct kbase_hwcnt_enable_map *src)
+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
{
const struct kbase_hwcnt_metadata *metadata;
u64 fe_bm[EM_COUNT] = { 0 };
@@ -588,17 +548,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
metadata = src->metadata;
- kbase_hwcnt_metadata_for_each_block(
- metadata, grp, blk, blk_inst) {
- const u64 grp_type = kbase_hwcnt_metadata_group_type(
- metadata, grp);
- const u64 blk_type = kbase_hwcnt_metadata_block_type(
- metadata, grp, blk);
- const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
- src, grp, blk, blk_inst);
-
- if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
- KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
+
+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
const size_t map_stride =
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
size_t map_idx;
@@ -649,8 +605,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
}
-void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
- enum kbase_hwcnt_set src)
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
{
switch (src) {
case KBASE_HWCNT_SET_PRIMARY:
@@ -667,9 +622,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
}
}
-void kbase_hwcnt_gpu_enable_map_from_physical(
- struct kbase_hwcnt_enable_map *dst,
- const struct kbase_hwcnt_physical_enable_map *src)
+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_physical_enable_map *src)
{
const struct kbase_hwcnt_metadata *metadata;
@@ -692,16 +646,13 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
&mmu_l2_bm[EM_HI]);
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- const u64 grp_type = kbase_hwcnt_metadata_group_type(
- metadata, grp);
- const u64 blk_type = kbase_hwcnt_metadata_block_type(
- metadata, grp, blk);
- u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
- dst, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
- if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
- KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
const size_t map_stride =
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
size_t map_idx;
@@ -744,29 +695,25 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
}
}
-void kbase_hwcnt_gpu_patch_dump_headers(
- struct kbase_hwcnt_dump_buffer *buf,
- const struct kbase_hwcnt_enable_map *enable_map)
+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
- if (WARN_ON(!buf) || WARN_ON(!enable_map) ||
- WARN_ON(buf->metadata != enable_map->metadata))
+ if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
return;
metadata = buf->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- const u64 grp_type =
- kbase_hwcnt_metadata_group_type(metadata, grp);
- u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
- buf, grp, blk, blk_inst);
- const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
- enable_map, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+ u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
+ const u64 *blk_map =
+ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
- if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
- KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
const size_t map_stride =
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
u64 prfcnt_bm[EM_COUNT] = { 0 };
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
index f890d45..a49c31e 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -34,9 +34,8 @@ struct kbase_hwcnt_dump_buffer;
#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
-#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
- (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \
- KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
+#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
+ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
/* FrontEnd block count in V5 GPU hardware counter. */
#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
@@ -228,19 +227,17 @@ static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_jm_metadata_create(
- const struct kbase_hwcnt_gpu_info *info,
- enum kbase_hwcnt_set counter_set,
- const struct kbase_hwcnt_metadata **out_metadata,
- size_t *out_dump_bytes);
+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata,
+ size_t *out_dump_bytes);
/**
* kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
*
* @metadata: Pointer to metadata to destroy.
*/
-void kbase_hwcnt_jm_metadata_destroy(
- const struct kbase_hwcnt_metadata *metadata);
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
/**
* kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
@@ -252,18 +249,16 @@ void kbase_hwcnt_jm_metadata_destroy(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_csf_metadata_create(
- const struct kbase_hwcnt_gpu_info *info,
- enum kbase_hwcnt_set counter_set,
- const struct kbase_hwcnt_metadata **out_metadata);
+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata);
/**
* kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
* metadata.
* @metadata: Pointer to metadata to destroy.
*/
-void kbase_hwcnt_csf_metadata_destroy(
- const struct kbase_hwcnt_metadata *metadata);
+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
/**
* kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
@@ -289,8 +284,7 @@ void kbase_hwcnt_csf_metadata_destroy(
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
const u64 pm_core_mask,
- const struct kbase_hwcnt_curr_config *curr_config,
- bool accumulate);
+ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
/**
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
@@ -310,8 +304,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map,
- bool accumulate);
+ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
/**
* kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@@ -365,9 +358,8 @@ static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
* individual counter block value, but the physical enable map uses 1 bit for
* every 4 counters, shared over all instances of a block.
*/
-void kbase_hwcnt_gpu_enable_map_to_physical(
- struct kbase_hwcnt_physical_enable_map *dst,
- const struct kbase_hwcnt_enable_map *src);
+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src);
/**
* kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
@@ -376,8 +368,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
* @dst: Non-NULL pointer to destination physical SET_SELECT value.
* @src: Non-NULL pointer to source counter set selection.
*/
-void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
- enum kbase_hwcnt_set src);
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src);
/**
* kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
@@ -393,9 +384,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
* more than 64, so the enable map abstraction has nowhere to store the enable
* information for the 64 non-existent counters.
*/
-void kbase_hwcnt_gpu_enable_map_from_physical(
- struct kbase_hwcnt_enable_map *dst,
- const struct kbase_hwcnt_physical_enable_map *src);
+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_physical_enable_map *src);
/**
* kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
@@ -411,8 +401,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
* kernel-user boundary, to ensure the header is accurate for the enable map
* used by the user.
*/
-void kbase_hwcnt_gpu_patch_dump_headers(
- struct kbase_hwcnt_dump_buffer *buf,
- const struct kbase_hwcnt_enable_map *enable_map);
+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
+ const struct kbase_hwcnt_enable_map *enable_map);
#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
index 2a1cde7..0cf2f94 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
@@ -19,21 +19,19 @@
*
*/
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_gpu_narrow.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/slab.h>
-int kbase_hwcnt_gpu_metadata_narrow_create(
- const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
- const struct kbase_hwcnt_metadata *src_md)
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md)
{
struct kbase_hwcnt_description desc;
struct kbase_hwcnt_group_description group;
- struct kbase_hwcnt_block_description
- blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
size_t prfcnt_values_per_block;
size_t blk;
int err;
@@ -47,18 +45,15 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
* count in the metadata.
*/
if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
- (kbase_hwcnt_metadata_block_count(src_md, 0) !=
- KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
+ (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
return -EINVAL;
/* Get the values count in the first block. */
- prfcnt_values_per_block =
- kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
+ prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
/* check all blocks should have same values count. */
for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
- size_t val_cnt =
- kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
if (val_cnt != prfcnt_values_per_block)
return -EINVAL;
}
@@ -75,12 +70,10 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
prfcnt_values_per_block = 64;
for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
- size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
- src_md, 0, blk);
+ size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
blks[blk] = (struct kbase_hwcnt_block_description){
.type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
- .inst_cnt = kbase_hwcnt_metadata_block_instance_count(
- src_md, 0, blk),
+ .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
.hdr_cnt = blk_hdr_cnt,
.ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
};
@@ -105,8 +98,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
* only supports 32-bit but the created metadata uses 64-bit for
* block entry.
*/
- metadata_narrow->dump_buf_bytes =
- metadata_narrow->metadata->dump_buf_bytes >> 1;
+ metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
*dst_md_narrow = metadata_narrow;
} else {
kfree(metadata_narrow);
@@ -115,8 +107,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
return err;
}
-void kbase_hwcnt_gpu_metadata_narrow_destroy(
- const struct kbase_hwcnt_metadata_narrow *md_narrow)
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
{
if (!md_narrow)
return;
@@ -125,9 +116,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy(
kfree(md_narrow);
}
-int kbase_hwcnt_dump_buffer_narrow_alloc(
- const struct kbase_hwcnt_metadata_narrow *md_narrow,
- struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
{
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
@@ -137,8 +127,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
return -EINVAL;
dump_buf_bytes = md_narrow->dump_buf_bytes;
- clk_cnt_buf_bytes =
- sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
/* Make a single allocation for both dump_buf and clk_cnt_buf. */
buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
@@ -154,14 +143,15 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
return 0;
}
-void kbase_hwcnt_dump_buffer_narrow_free(
- struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
{
if (!dump_buf_narrow)
return;
kfree(dump_buf_narrow->dump_buf);
- *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL };
+ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
+ .dump_buf = NULL,
+ .clk_cnt_buf = NULL };
}
int kbase_hwcnt_dump_buffer_narrow_array_alloc(
@@ -180,8 +170,7 @@ int kbase_hwcnt_dump_buffer_narrow_array_alloc(
return -EINVAL;
dump_buf_bytes = md_narrow->dump_buf_bytes;
- clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) *
- md_narrow->metadata->clk_cnt;
+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
/* Allocate memory for the dump buffer struct array */
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
@@ -234,27 +223,22 @@ void kbase_hwcnt_dump_buffer_narrow_array_free(
memset(dump_bufs, 0, sizeof(*dump_bufs));
}
-void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
- const u64 *src_blk,
- const u64 *blk_em,
- size_t val_cnt)
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt)
{
size_t val;
for (val = 0; val < val_cnt; val++) {
- bool val_enabled =
- kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
- u32 src_val =
- (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+ u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
dst_blk[val] = val_enabled ? src_val : 0;
}
}
-void kbase_hwcnt_dump_buffer_copy_strict_narrow(
- struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
size_t grp;
@@ -262,68 +246,53 @@ void kbase_hwcnt_dump_buffer_copy_strict_narrow(
if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
- WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt !=
- src->metadata->grp_cnt) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
WARN_ON(src->metadata->grp_cnt != 1) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
src->metadata->grp_metadata[0].blk_cnt) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
- WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0]
- .blk_metadata[0]
- .ctr_cnt >
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
return;
/* Don't use src metadata since src buffer is bigger than dst buffer. */
metadata_narrow = dst_narrow->md_narrow;
- for (grp = 0;
- grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow);
- grp++) {
+ for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
size_t blk;
- size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(
- metadata_narrow, grp);
+ size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
for (blk = 0; blk < blk_cnt; blk++) {
size_t blk_inst;
- size_t blk_inst_cnt =
- kbase_hwcnt_metadata_narrow_block_instance_count(
- metadata_narrow, grp, blk);
+ size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
+ metadata_narrow, grp, blk);
- for (blk_inst = 0; blk_inst < blk_inst_cnt;
- blk_inst++) {
+ for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
/* The narrowed down buffer is only 32-bit. */
- u32 *dst_blk =
- kbase_hwcnt_dump_buffer_narrow_block_instance(
- dst_narrow, grp, blk, blk_inst);
- const u64 *src_blk =
- kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- const u64 *blk_em =
- kbase_hwcnt_enable_map_block_instance(
- dst_enable_map, grp, blk,
- blk_inst);
- size_t val_cnt =
- kbase_hwcnt_metadata_narrow_block_values_count(
- metadata_narrow, grp, blk);
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
+ dst_narrow, grp, blk, blk_inst);
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
+ metadata_narrow, grp, blk);
/* Align upwards to include padding bytes */
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
- val_cnt,
- (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES));
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
- kbase_hwcnt_dump_buffer_block_copy_strict_narrow(
- dst_blk, src_blk, blk_em, val_cnt);
+ kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
+ blk_em, val_cnt);
}
}
}
for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
- bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk);
+ bool clk_enabled =
+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
- dst_narrow->clk_cnt_buf[clk] =
- clk_enabled ? src->clk_cnt_buf[clk] : 0;
+ dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
}
}
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
index af6fa19..afd236d 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,7 +22,7 @@
#ifndef _KBASE_HWCNT_GPU_NARROW_H_
#define _KBASE_HWCNT_GPU_NARROW_H_
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/types.h>
struct kbase_device;
@@ -86,8 +86,8 @@ struct kbase_hwcnt_dump_buffer_narrow_array {
*
* Return: Number of hardware counter groups described by narrow metadata.
*/
-static inline size_t kbase_hwcnt_metadata_narrow_group_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow)
+static inline size_t
+kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow)
{
return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
}
@@ -100,8 +100,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_group_count(
*
* Return: Type of the group grp.
*/
-static inline u64 kbase_hwcnt_metadata_narrow_group_type(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
+static inline u64
+kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp)
{
return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
}
@@ -114,8 +115,9 @@ static inline u64 kbase_hwcnt_metadata_narrow_group_type(
*
* Return: Number of blocks in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_narrow_block_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp)
{
return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
}
@@ -131,11 +133,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_count(
* Return: Number of instances of block blk in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
- size_t blk)
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
{
- return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata,
- grp, blk);
+ return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk);
}
/**
@@ -148,12 +148,11 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
*
* Return: Number of counter headers in each instance of block blk in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
- size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp, size_t blk)
{
- return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata,
- grp, blk);
+ return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk);
}
/**
@@ -167,11 +166,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
* Return: Number of counters in each instance of block blk in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
- size_t blk)
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
{
- return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata,
- grp, blk);
+ return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk);
}
/**
@@ -184,14 +181,12 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
* Return: Number of headers plus counters in each instance of block blk
* in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
- size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp, size_t blk)
{
- return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp,
- blk) +
- kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp,
- blk);
+ return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) +
+ kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk);
}
/**
@@ -205,18 +200,13 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
*
* Return: u32* to the dump buffer for the block instance.
*/
-static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
- const struct kbase_hwcnt_dump_buffer_narrow *buf, size_t grp,
- size_t blk, size_t blk_inst)
+static inline u32 *
+kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf,
+ size_t grp, size_t blk, size_t blk_inst)
{
- return buf->dump_buf +
- buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
- buf->md_narrow->metadata->grp_metadata[grp]
- .blk_metadata[blk]
- .dump_buf_index +
- (buf->md_narrow->metadata->grp_metadata[grp]
- .blk_metadata[blk]
- .dump_buf_stride *
+ return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
+ buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
+ (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride *
blk_inst);
}
@@ -239,17 +229,15 @@ static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_gpu_metadata_narrow_create(
- const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
- const struct kbase_hwcnt_metadata *src_md);
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md);
/**
* kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
* metadata object.
* @md_narrow: Pointer to hardware counter narrow metadata.
*/
-void kbase_hwcnt_gpu_metadata_narrow_destroy(
- const struct kbase_hwcnt_metadata_narrow *md_narrow);
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow);
/**
* kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
@@ -260,9 +248,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_dump_buffer_narrow_alloc(
- const struct kbase_hwcnt_metadata_narrow *md_narrow,
- struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
/**
* kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
@@ -271,8 +258,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
* Can be safely called on an all-zeroed narrow dump buffer structure, or on an
* already freed narrow dump buffer.
*/
-void kbase_hwcnt_dump_buffer_narrow_free(
- struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
/**
* kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
@@ -320,10 +306,8 @@ void kbase_hwcnt_dump_buffer_narrow_array_free(
* source value is bigger than U32_MAX, or copy the value from source if the
* corresponding source value is less than or equal to U32_MAX.
*/
-void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
- const u64 *src_blk,
- const u64 *blk_em,
- size_t val_cnt);
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt);
/**
* kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
@@ -339,9 +323,8 @@ void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
* corresponding source value is bigger than U32_MAX, or copy the value from
* source if the corresponding source value is less than or equal to U32_MAX.
*/
-void kbase_hwcnt_dump_buffer_copy_strict_narrow(
- struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
index d925ed7..763eb31 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,13 +19,12 @@
*
*/
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/slab.h>
-int kbase_hwcnt_metadata_create(
- const struct kbase_hwcnt_description *desc,
- const struct kbase_hwcnt_metadata **out_metadata)
+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
+ const struct kbase_hwcnt_metadata **out_metadata)
{
char *buf;
struct kbase_hwcnt_metadata *metadata;
@@ -56,8 +55,7 @@ int kbase_hwcnt_metadata_create(
/* Block metadata */
for (grp = 0; grp < desc->grp_cnt; grp++) {
- size += sizeof(struct kbase_hwcnt_block_metadata) *
- desc->grps[grp].blk_cnt;
+ size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt;
}
/* Single allocation for the entire metadata */
@@ -83,8 +81,7 @@ int kbase_hwcnt_metadata_create(
for (grp = 0; grp < desc->grp_cnt; grp++) {
size_t blk;
- const struct kbase_hwcnt_group_description *grp_desc =
- desc->grps + grp;
+ const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp;
struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
size_t group_enable_map_count = 0;
@@ -94,37 +91,28 @@ int kbase_hwcnt_metadata_create(
/* Bump allocate this group's block metadata */
struct kbase_hwcnt_block_metadata *blk_mds =
(struct kbase_hwcnt_block_metadata *)(buf + offset);
- offset += sizeof(struct kbase_hwcnt_block_metadata) *
- grp_desc->blk_cnt;
+ offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt;
/* Fill in each block in the group's information */
for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
- const struct kbase_hwcnt_block_description *blk_desc =
- grp_desc->blks + blk;
- struct kbase_hwcnt_block_metadata *blk_md =
- blk_mds + blk;
- const size_t n_values =
- blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+ const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk;
+ struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
+ const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
blk_md->type = blk_desc->type;
blk_md->inst_cnt = blk_desc->inst_cnt;
blk_md->hdr_cnt = blk_desc->hdr_cnt;
blk_md->ctr_cnt = blk_desc->ctr_cnt;
blk_md->enable_map_index = group_enable_map_count;
- blk_md->enable_map_stride =
- kbase_hwcnt_bitfield_count(n_values);
+ blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values);
blk_md->dump_buf_index = group_dump_buffer_count;
- blk_md->dump_buf_stride =
- KBASE_HWCNT_ALIGN_UPWARDS(
- n_values,
- (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES));
+ blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
+ n_values,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
blk_md->avail_mask_index = group_avail_mask_bits;
- group_enable_map_count +=
- blk_md->enable_map_stride * blk_md->inst_cnt;
- group_dump_buffer_count +=
- blk_md->dump_buf_stride * blk_md->inst_cnt;
+ group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
+ group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
group_avail_mask_bits += blk_md->inst_cnt;
}
@@ -144,8 +132,7 @@ int kbase_hwcnt_metadata_create(
/* Fill in the top level metadata's information */
metadata->grp_cnt = desc->grp_cnt;
metadata->grp_metadata = grp_mds;
- metadata->enable_map_bytes =
- enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
+ metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
metadata->avail_mask = desc->avail_mask;
metadata->clk_cnt = desc->clk_cnt;
@@ -155,8 +142,7 @@ int kbase_hwcnt_metadata_create(
* bit per 4 bytes in the dump buffer.
*/
WARN_ON(metadata->dump_buf_bytes !=
- (metadata->enable_map_bytes *
- BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+ (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
*out_metadata = metadata;
return 0;
@@ -167,9 +153,8 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
kfree(metadata);
}
-int kbase_hwcnt_enable_map_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- struct kbase_hwcnt_enable_map *enable_map)
+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_enable_map *enable_map)
{
u64 *enable_map_buf;
@@ -177,8 +162,7 @@ int kbase_hwcnt_enable_map_alloc(
return -EINVAL;
if (metadata->enable_map_bytes > 0) {
- enable_map_buf =
- kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+ enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
if (!enable_map_buf)
return -ENOMEM;
} else {
@@ -200,9 +184,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
enable_map->metadata = NULL;
}
-int kbase_hwcnt_dump_buffer_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
{
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
@@ -235,10 +218,8 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
memset(dump_buf, 0, sizeof(*dump_buf));
}
-int kbase_hwcnt_dump_buffer_array_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- size_t n,
- struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs)
{
struct kbase_hwcnt_dump_buffer *buffers;
size_t buf_idx;
@@ -251,8 +232,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
return -EINVAL;
dump_buf_bytes = metadata->dump_buf_bytes;
- clk_cnt_buf_bytes =
- sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
/* Allocate memory for the dump buffer struct array */
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
@@ -283,15 +263,13 @@ int kbase_hwcnt_dump_buffer_array_alloc(
buffers[buf_idx].metadata = metadata;
buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
- buffers[buf_idx].clk_cnt_buf =
- (u64 *)(addr + clk_cnt_buf_offset);
+ buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset);
}
return 0;
}
-void kbase_hwcnt_dump_buffer_array_free(
- struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs)
{
if (!dump_bufs)
return;
@@ -301,84 +279,71 @@ void kbase_hwcnt_dump_buffer_array_free(
memset(dump_bufs, 0, sizeof(*dump_bufs));
}
-void kbase_hwcnt_dump_buffer_zero(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
- if (WARN_ON(!dst) ||
- WARN_ON(!dst_enable_map) ||
+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
u64 *dst_blk;
size_t val_cnt;
- if (!kbase_hwcnt_enable_map_block_enabled(
- dst_enable_map, grp, blk, blk_inst))
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- val_cnt = kbase_hwcnt_metadata_block_values_count(
- metadata, grp, blk);
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
}
- memset(dst->clk_cnt_buf, 0,
- sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
+ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
}
-void kbase_hwcnt_dump_buffer_zero_strict(
- struct kbase_hwcnt_dump_buffer *dst)
+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
{
if (WARN_ON(!dst))
return;
memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
- memset(dst->clk_cnt_buf, 0,
- sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
+ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
}
-void kbase_hwcnt_dump_buffer_zero_non_enabled(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
- if (WARN_ON(!dst) ||
- WARN_ON(!dst_enable_map) ||
+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
- dst_enable_map, grp, blk, blk_inst);
- size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
- metadata, grp, blk);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
/* Align upwards to include padding bytes */
- val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
- (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES));
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
- if (kbase_hwcnt_metadata_block_instance_avail(
- metadata, grp, blk, blk_inst)) {
+ if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) {
/* Block available, so only zero non-enabled values */
- kbase_hwcnt_dump_buffer_block_zero_non_enabled(
- dst_blk, blk_em, val_cnt);
+ kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt);
} else {
/* Block not available, so zero the entire thing */
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
@@ -386,188 +351,159 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
}
}
-void kbase_hwcnt_dump_buffer_copy(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
- if (WARN_ON(!dst) ||
- WARN_ON(!src) ||
- WARN_ON(!dst_enable_map) ||
- WARN_ON(dst == src) ||
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
u64 *dst_blk;
const u64 *src_blk;
size_t val_cnt;
- if (!kbase_hwcnt_enable_map_block_enabled(
- dst_enable_map, grp, blk, blk_inst))
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- src_blk = kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- val_cnt = kbase_hwcnt_metadata_block_values_count(
- metadata, grp, blk);
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
- if (kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk))
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
}
}
-void kbase_hwcnt_dump_buffer_copy_strict(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
- if (WARN_ON(!dst) ||
- WARN_ON(!src) ||
- WARN_ON(!dst_enable_map) ||
- WARN_ON(dst == src) ||
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
- dst_enable_map, grp, blk, blk_inst);
- size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
- metadata, grp, blk);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *src_blk =
+ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
/* Align upwards to include padding bytes */
- val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
- (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES));
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
- kbase_hwcnt_dump_buffer_block_copy_strict(
- dst_blk, src_blk, blk_em, val_cnt);
+ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
bool clk_enabled =
- kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk);
+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
}
}
-void kbase_hwcnt_dump_buffer_accumulate(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
- if (WARN_ON(!dst) ||
- WARN_ON(!src) ||
- WARN_ON(!dst_enable_map) ||
- WARN_ON(dst == src) ||
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
u64 *dst_blk;
const u64 *src_blk;
size_t hdr_cnt;
size_t ctr_cnt;
- if (!kbase_hwcnt_enable_map_block_enabled(
- dst_enable_map, grp, blk, blk_inst))
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- src_blk = kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
- metadata, grp, blk);
- ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
- metadata, grp, blk);
-
- kbase_hwcnt_dump_buffer_block_accumulate(
- dst_blk, src_blk, hdr_cnt, ctr_cnt);
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
- if (kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk))
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
}
}
-void kbase_hwcnt_dump_buffer_accumulate_strict(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
- if (WARN_ON(!dst) ||
- WARN_ON(!src) ||
- WARN_ON(!dst_enable_map) ||
- WARN_ON(dst == src) ||
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
- dst_enable_map, grp, blk, blk_inst);
- size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
- metadata, grp, blk);
- size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
- metadata, grp, blk);
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *src_blk =
+ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+ size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
/* Align upwards to include padding bytes */
- ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt,
- (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
+ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ hdr_cnt + ctr_cnt,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
- kbase_hwcnt_dump_buffer_block_accumulate_strict(
- dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
+ kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt,
+ ctr_cnt);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
- if (kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk))
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
else
dst->clk_cnt_buf[clk] = 0;
diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
index 9397840..5c5ada4 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -104,8 +104,7 @@
#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
/* Minimum alignment of each block of hardware counters */
-#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \
- (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
/**
* KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value.
@@ -115,7 +114,7 @@
* Return: Input value if already aligned to the specified boundary, or next
* (incrementing upwards) aligned value.
*/
-#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
(value + ((alignment - (value % alignment)) % alignment))
/**
@@ -307,9 +306,8 @@ struct kbase_hwcnt_dump_buffer_array {
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_metadata_create(
- const struct kbase_hwcnt_description *desc,
- const struct kbase_hwcnt_metadata **metadata);
+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
+ const struct kbase_hwcnt_metadata **metadata);
/**
* kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object.
@@ -323,8 +321,7 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Number of hardware counter groups described by metadata.
*/
-static inline size_t
-kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
+static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
{
if (WARN_ON(!metadata))
return 0;
@@ -339,9 +336,8 @@ kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
*
* Return: Type of the group grp.
*/
-static inline u64
-kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
- size_t grp)
+static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
return 0;
@@ -356,9 +352,8 @@ kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
*
* Return: Number of blocks in group grp.
*/
-static inline size_t
-kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
- size_t grp)
+static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
return 0;
@@ -374,9 +369,8 @@ kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
*
* Return: Type of the block blk in group grp.
*/
-static inline u64
-kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
- size_t grp, size_t blk)
+static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp, size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -394,8 +388,9 @@ kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
*
* Return: Number of instances of block blk in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_block_instance_count(
- const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -413,8 +408,9 @@ static inline size_t kbase_hwcnt_metadata_block_instance_count(
*
* Return: Number of counter headers in each instance of block blk in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_block_headers_count(
- const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -431,8 +427,9 @@ static inline size_t kbase_hwcnt_metadata_block_headers_count(
*
* Return: Number of counters in each instance of block blk in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_block_counters_count(
- const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -449,8 +446,9 @@ static inline size_t kbase_hwcnt_metadata_block_counters_count(
*
* Return: enable map stride in each instance of block blk in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_block_enable_map_stride(
- const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp, size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -468,8 +466,9 @@ static inline size_t kbase_hwcnt_metadata_block_enable_map_stride(
* Return: Number of headers plus counters in each instance of block blk
* in group grp.
*/
-static inline size_t kbase_hwcnt_metadata_block_values_count(
- const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -490,10 +489,13 @@ static inline size_t kbase_hwcnt_metadata_block_values_count(
* Iteration order is group, then block, then block instance (i.e. linearly
* through memory).
*/
-#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
- for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
- for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
- for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++)
+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
+ for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
+ for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
+ for ((blk_inst) = 0; \
+ (blk_inst) < \
+ kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \
+ (blk_inst)++)
/**
* kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
@@ -504,10 +506,9 @@ static inline size_t kbase_hwcnt_metadata_block_values_count(
*
* Return: The bit index into the avail mask for the block.
*/
-static inline size_t kbase_hwcnt_metadata_block_avail_bit(
- const struct kbase_hwcnt_metadata *metadata,
- size_t grp,
- size_t blk)
+static inline size_t
+kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
{
if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
@@ -527,11 +528,9 @@ static inline size_t kbase_hwcnt_metadata_block_avail_bit(
*
* Return: true if the block instance is available, else false.
*/
-static inline bool kbase_hwcnt_metadata_block_instance_avail(
- const struct kbase_hwcnt_metadata *metadata,
- size_t grp,
- size_t blk,
- size_t blk_inst)
+static inline bool
+kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk, size_t blk_inst)
{
size_t bit;
u64 mask;
@@ -553,9 +552,8 @@ static inline bool kbase_hwcnt_metadata_block_instance_avail(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_enable_map_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- struct kbase_hwcnt_enable_map *enable_map);
+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_enable_map *enable_map);
/**
* kbase_hwcnt_enable_map_free() - Free an enable map.
@@ -577,9 +575,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
* Return: u64* to the bitfield(s) used as the enable map for the
* block instance.
*/
-static inline u64 *
-kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
- size_t grp, size_t blk, size_t blk_inst)
+static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
+ size_t grp, size_t blk, size_t blk_inst)
{
if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map))
return NULL;
@@ -589,15 +586,9 @@ kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
return map->hwcnt_enable_map;
- return map->hwcnt_enable_map +
- map->metadata->grp_metadata[grp].enable_map_index +
- map->metadata->grp_metadata[grp]
- .blk_metadata[blk]
- .enable_map_index +
- (map->metadata->grp_metadata[grp]
- .blk_metadata[blk]
- .enable_map_stride *
- blk_inst);
+ return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index +
+ map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index +
+ (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst);
}
/**
@@ -609,8 +600,7 @@ kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
*/
static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
{
- return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) /
- KBASE_HWCNT_BITFIELD_BITS;
+ return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS;
}
/**
@@ -620,11 +610,8 @@ static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
* @blk: Index of the block in the group.
* @blk_inst: Index of the block instance in the block.
*/
-static inline void kbase_hwcnt_enable_map_block_disable_all(
- struct kbase_hwcnt_enable_map *dst,
- size_t grp,
- size_t blk,
- size_t blk_inst)
+static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst,
+ size_t grp, size_t blk, size_t blk_inst)
{
size_t val_cnt;
size_t bitfld_cnt;
@@ -644,15 +631,13 @@ static inline void kbase_hwcnt_enable_map_block_disable_all(
* kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map.
* @dst: Non-NULL pointer to enable map to zero.
*/
-static inline void kbase_hwcnt_enable_map_disable_all(
- struct kbase_hwcnt_enable_map *dst)
+static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst)
{
if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
return;
if (dst->hwcnt_enable_map != NULL)
- memset(dst->hwcnt_enable_map, 0,
- dst->metadata->enable_map_bytes);
+ memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes);
dst->clk_enable_map = 0;
}
@@ -664,11 +649,8 @@ static inline void kbase_hwcnt_enable_map_disable_all(
* @blk: Index of the block in the group.
* @blk_inst: Index of the block instance in the block.
*/
-static inline void kbase_hwcnt_enable_map_block_enable_all(
- struct kbase_hwcnt_enable_map *dst,
- size_t grp,
- size_t blk,
- size_t blk_inst)
+static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst,
+ size_t grp, size_t blk, size_t blk_inst)
{
size_t val_cnt;
size_t bitfld_cnt;
@@ -683,8 +665,7 @@ static inline void kbase_hwcnt_enable_map_block_enable_all(
bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
- const u64 remaining_values = val_cnt -
- (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
u64 block_enable_map_mask = U64_MAX;
if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
@@ -699,8 +680,7 @@ static inline void kbase_hwcnt_enable_map_block_enable_all(
* map.
* @dst: Non-NULL pointer to enable map.
*/
-static inline void kbase_hwcnt_enable_map_enable_all(
- struct kbase_hwcnt_enable_map *dst)
+static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst)
{
size_t grp, blk, blk_inst;
@@ -708,8 +688,7 @@ static inline void kbase_hwcnt_enable_map_enable_all(
return;
kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
- kbase_hwcnt_enable_map_block_enable_all(
- dst, grp, blk, blk_inst);
+ kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst);
dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1;
}
@@ -721,9 +700,8 @@ static inline void kbase_hwcnt_enable_map_enable_all(
*
* The dst and src MUST have been created from the same metadata.
*/
-static inline void kbase_hwcnt_enable_map_copy(
- struct kbase_hwcnt_enable_map *dst,
- const struct kbase_hwcnt_enable_map *src)
+static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
{
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
WARN_ON(dst->metadata != src->metadata))
@@ -733,8 +711,7 @@ static inline void kbase_hwcnt_enable_map_copy(
if (WARN_ON(!src->hwcnt_enable_map))
return;
- memcpy(dst->hwcnt_enable_map,
- src->hwcnt_enable_map,
+ memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map,
dst->metadata->enable_map_bytes);
}
@@ -748,9 +725,8 @@ static inline void kbase_hwcnt_enable_map_copy(
*
* The dst and src MUST have been created from the same metadata.
*/
-static inline void kbase_hwcnt_enable_map_union(
- struct kbase_hwcnt_enable_map *dst,
- const struct kbase_hwcnt_enable_map *src)
+static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
{
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
WARN_ON(dst->metadata != src->metadata))
@@ -781,11 +757,9 @@ static inline void kbase_hwcnt_enable_map_union(
*
* Return: true if any values in the block are enabled, else false.
*/
-static inline bool kbase_hwcnt_enable_map_block_enabled(
- const struct kbase_hwcnt_enable_map *enable_map,
- size_t grp,
- size_t blk,
- size_t blk_inst)
+static inline bool
+kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp,
+ size_t blk, size_t blk_inst)
{
bool any_enabled = false;
size_t val_cnt;
@@ -801,15 +775,13 @@ static inline bool kbase_hwcnt_enable_map_block_enabled(
bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
- const u64 remaining_values = val_cnt -
- (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
u64 block_enable_map_mask = U64_MAX;
if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
block_enable_map_mask = (1ull << remaining_values) - 1;
- any_enabled = any_enabled ||
- (block_enable_map[bitfld_idx] & block_enable_map_mask);
+ any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask);
}
return any_enabled;
@@ -821,8 +793,8 @@ static inline bool kbase_hwcnt_enable_map_block_enabled(
*
* Return: true if any values are enabled, else false.
*/
-static inline bool kbase_hwcnt_enable_map_any_enabled(
- const struct kbase_hwcnt_enable_map *enable_map)
+static inline bool
+kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map)
{
size_t grp, blk, blk_inst;
u64 clk_enable_map_mask;
@@ -832,14 +804,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled(
clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1;
- if (enable_map->metadata->clk_cnt > 0 &&
- (enable_map->clk_enable_map & clk_enable_map_mask))
+ if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask))
return true;
- kbase_hwcnt_metadata_for_each_block(
- enable_map->metadata, grp, blk, blk_inst) {
- if (kbase_hwcnt_enable_map_block_enabled(
- enable_map, grp, blk, blk_inst))
+ kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst)
+ {
+ if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
return true;
}
@@ -855,9 +825,7 @@ static inline bool kbase_hwcnt_enable_map_any_enabled(
*
* Return: true if the value was enabled, else false.
*/
-static inline bool kbase_hwcnt_enable_map_block_value_enabled(
- const u64 *bitfld,
- size_t val_idx)
+static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx)
{
const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
@@ -873,9 +841,7 @@ static inline bool kbase_hwcnt_enable_map_block_value_enabled(
* kbase_hwcnt_enable_map_block_instance.
* @val_idx: Index of the value to enable in the block instance.
*/
-static inline void kbase_hwcnt_enable_map_block_enable_value(
- u64 *bitfld,
- size_t val_idx)
+static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx)
{
const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
@@ -891,9 +857,7 @@ static inline void kbase_hwcnt_enable_map_block_enable_value(
* kbase_hwcnt_enable_map_block_instance.
* @val_idx: Index of the value to disable in the block instance.
*/
-static inline void kbase_hwcnt_enable_map_block_disable_value(
- u64 *bitfld,
- size_t val_idx)
+static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx)
{
const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
@@ -911,9 +875,8 @@ static inline void kbase_hwcnt_enable_map_block_disable_value(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_dump_buffer_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- struct kbase_hwcnt_dump_buffer *dump_buf);
+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_dump_buffer_free() - Free a dump buffer.
@@ -936,10 +899,8 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_dump_buffer_array_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- size_t n,
- struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs);
/**
* kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
@@ -948,8 +909,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
* Can be safely called on an all-zeroed dump buffer array structure, or on an
* already freed dump buffer array.
*/
-void kbase_hwcnt_dump_buffer_array_free(
- struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs);
/**
* kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
@@ -961,9 +921,8 @@ void kbase_hwcnt_dump_buffer_array_free(
*
* Return: u64* to the dump buffer for the block instance.
*/
-static inline u64 *kbase_hwcnt_dump_buffer_block_instance(
- const struct kbase_hwcnt_dump_buffer *buf, size_t grp, size_t blk,
- size_t blk_inst)
+static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf,
+ size_t grp, size_t blk, size_t blk_inst)
{
if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf))
return NULL;
@@ -975,10 +934,7 @@ static inline u64 *kbase_hwcnt_dump_buffer_block_instance(
return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index +
buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
- (buf->metadata->grp_metadata[grp]
- .blk_metadata[blk]
- .dump_buf_stride *
- blk_inst);
+ (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst);
}
/**
@@ -990,9 +946,8 @@ static inline u64 *kbase_hwcnt_dump_buffer_block_instance(
*
* The dst and dst_enable_map MUST have been created from the same metadata.
*/
-void kbase_hwcnt_dump_buffer_zero(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
/**
* kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block.
@@ -1000,8 +955,7 @@ void kbase_hwcnt_dump_buffer_zero(
* kbase_hwcnt_dump_buffer_block_instance.
* @val_cnt: Number of values in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk,
- size_t val_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt)
{
if (WARN_ON(!dst_blk))
return;
@@ -1017,8 +971,7 @@ static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk,
* Slower than the non-strict variant.
* @dst: Non-NULL pointer to dump buffer.
*/
-void kbase_hwcnt_dump_buffer_zero_strict(
- struct kbase_hwcnt_dump_buffer *dst);
+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst);
/**
* kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in
@@ -1031,9 +984,8 @@ void kbase_hwcnt_dump_buffer_zero_strict(
*
* The dst and dst_enable_map MUST have been created from the same metadata.
*/
-void kbase_hwcnt_dump_buffer_zero_non_enabled(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
/**
* kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled
@@ -1047,9 +999,8 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
* kbase_hwcnt_enable_map_block_instance.
* @val_cnt: Number of values in the block.
*/
-static inline void
-kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
- size_t val_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
+ size_t val_cnt)
{
size_t val;
@@ -1073,10 +1024,9 @@ kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
* The dst, src, and dst_enable_map MUST have been created from the same
* metadata.
*/
-void kbase_hwcnt_dump_buffer_copy(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
/**
* kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst.
@@ -1086,8 +1036,7 @@ void kbase_hwcnt_dump_buffer_copy(
* kbase_hwcnt_dump_buffer_block_instance.
* @val_cnt: Number of values in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk,
- const u64 *src_blk,
+static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk,
size_t val_cnt)
{
if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
@@ -1113,10 +1062,9 @@ static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk,
* The dst, src, and dst_enable_map MUST have been created from the same
* metadata.
*/
-void kbase_hwcnt_dump_buffer_copy_strict(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
/**
* kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values
@@ -1134,10 +1082,8 @@ void kbase_hwcnt_dump_buffer_copy_strict(
*
* After the copy, any disabled values in dst will be zero.
*/
-static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk,
- const u64 *src_blk,
- const u64 *blk_em,
- size_t val_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt)
{
size_t val;
@@ -1145,8 +1091,7 @@ static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk,
return;
for (val = 0; val < val_cnt; val++) {
- bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(
- blk_em, val);
+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
dst_blk[val] = val_enabled ? src_blk[val] : 0;
}
@@ -1165,10 +1110,9 @@ static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk,
* The dst, src, and dst_enable_map MUST have been created from the same
* metadata.
*/
-void kbase_hwcnt_dump_buffer_accumulate(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
/**
* kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and
@@ -1181,10 +1125,8 @@ void kbase_hwcnt_dump_buffer_accumulate(
* @hdr_cnt: Number of headers in the block.
* @ctr_cnt: Number of counters in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk,
- const u64 *src_blk,
- size_t hdr_cnt,
- size_t ctr_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk,
+ size_t hdr_cnt, size_t ctr_cnt)
{
size_t ctr;
@@ -1219,10 +1161,9 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk,
* The dst, src, and dst_enable_map MUST have been created from the same
* metadata.
*/
-void kbase_hwcnt_dump_buffer_accumulate_strict(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
/**
* kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block
@@ -1241,21 +1182,19 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
* @hdr_cnt: Number of headers in the block.
* @ctr_cnt: Number of counters in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
- u64 *dst_blk, const u64 *src_blk, const u64 *blk_em, size_t hdr_cnt,
- size_t ctr_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em,
+ size_t hdr_cnt, size_t ctr_cnt)
{
size_t ctr;
if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
return;
- kbase_hwcnt_dump_buffer_block_copy_strict(
- dst_blk, src_blk, blk_em, hdr_cnt);
+ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt);
for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
- bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(
- blk_em, ctr);
+ bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr);
if (ctr_enabled)
dst_blk[ctr] += src_blk[ctr];
@@ -1270,8 +1209,7 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
* @md: Non-NULL pointer to metadata.
* @clk: size_t variable used as clock iterator.
*/
-#define kbase_hwcnt_metadata_for_each_clock(md, clk) \
- for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
+#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
/**
* kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled
@@ -1281,8 +1219,7 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
*
* Return: true if the index of the clock domain is enabled, else false.
*/
-static inline bool kbase_hwcnt_clk_enable_map_enabled(
- const u64 clk_enable_map, const size_t index)
+static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index)
{
if (WARN_ON(index >= 64))
return false;
diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
index 52ecb7b..d618764 100644
--- a/mali_kbase/mali_kbase_hwcnt_virtualizer.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,10 +19,10 @@
*
*/
-#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_accumulator.h"
-#include "mali_kbase_hwcnt_context.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/mutex.h>
#include <linux/slab.h>
@@ -75,8 +75,8 @@ struct kbase_hwcnt_virtualizer_client {
u64 ts_start_ns;
};
-const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
- struct kbase_hwcnt_virtualizer *hvirt)
+const struct kbase_hwcnt_metadata *
+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt)
{
if (!hvirt)
return NULL;
@@ -90,8 +90,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
*
* Will safely free a client in any partial state of construction.
*/
-static void kbasep_hwcnt_virtualizer_client_free(
- struct kbase_hwcnt_virtualizer_client *hvcli)
+static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli)
{
if (!hvcli)
return;
@@ -110,9 +109,8 @@ static void kbasep_hwcnt_virtualizer_client_free(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_virtualizer_client_alloc(
- const struct kbase_hwcnt_metadata *metadata,
- struct kbase_hwcnt_virtualizer_client **out_hvcli)
+static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli)
{
int errcode;
struct kbase_hwcnt_virtualizer_client *hvcli = NULL;
@@ -145,9 +143,9 @@ error:
* @hvcli: Non-NULL pointer to virtualizer client.
* @dump_buf: Non-NULL pointer to dump buffer to accumulate from.
*/
-static void kbasep_hwcnt_virtualizer_client_accumulate(
- struct kbase_hwcnt_virtualizer_client *hvcli,
- const struct kbase_hwcnt_dump_buffer *dump_buf)
+static void
+kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_dump_buffer *dump_buf)
{
WARN_ON(!hvcli);
WARN_ON(!dump_buf);
@@ -155,12 +153,10 @@ static void kbasep_hwcnt_virtualizer_client_accumulate(
if (hvcli->has_accum) {
/* If already some accumulation, accumulate */
- kbase_hwcnt_dump_buffer_accumulate(
- &hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
} else {
/* If no accumulation, copy */
- kbase_hwcnt_dump_buffer_copy(
- &hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+ kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
}
hvcli->has_accum = true;
}
@@ -173,8 +169,7 @@ static void kbasep_hwcnt_virtualizer_client_accumulate(
*
* Will safely terminate the accumulator in any partial state of initialisation.
*/
-static void kbasep_hwcnt_virtualizer_accumulator_term(
- struct kbase_hwcnt_virtualizer *hvirt)
+static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt)
{
WARN_ON(!hvirt);
lockdep_assert_held(&hvirt->lock);
@@ -194,8 +189,7 @@ static void kbasep_hwcnt_virtualizer_accumulator_term(
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_virtualizer_accumulator_init(
- struct kbase_hwcnt_virtualizer *hvirt)
+static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt)
{
int errcode;
@@ -204,18 +198,15 @@ static int kbasep_hwcnt_virtualizer_accumulator_init(
WARN_ON(hvirt->client_count);
WARN_ON(hvirt->accum);
- errcode = kbase_hwcnt_accumulator_acquire(
- hvirt->hctx, &hvirt->accum);
+ errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum);
if (errcode)
goto error;
- errcode = kbase_hwcnt_enable_map_alloc(
- hvirt->metadata, &hvirt->scratch_map);
+ errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map);
if (errcode)
goto error;
- errcode = kbase_hwcnt_dump_buffer_alloc(
- hvirt->metadata, &hvirt->scratch_buf);
+ errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf);
if (errcode)
goto error;
@@ -234,10 +225,9 @@ error:
*
* Return: 0 on success, else error code.
*/
-static int kbasep_hwcnt_virtualizer_client_add(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_hwcnt_virtualizer_client *hvcli,
- const struct kbase_hwcnt_enable_map *enable_map)
+static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode = 0;
u64 ts_start_ns;
@@ -258,28 +248,25 @@ static int kbasep_hwcnt_virtualizer_client_add(
if (hvirt->client_count == 1) {
/* First client, so just pass the enable map onwards as is */
- errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
- enable_map, &ts_start_ns, &ts_end_ns, NULL);
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map,
+ &ts_start_ns, &ts_end_ns, NULL);
} else {
struct kbase_hwcnt_virtualizer_client *pos;
/* Make the scratch enable map the union of all enable maps */
- kbase_hwcnt_enable_map_copy(
- &hvirt->scratch_map, enable_map);
- list_for_each_entry(pos, &hvirt->clients, node)
- kbase_hwcnt_enable_map_union(
- &hvirt->scratch_map, &pos->enable_map);
+ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
/* Set the counters with the new union enable map */
- errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
- &hvirt->scratch_map,
- &ts_start_ns, &ts_end_ns,
- &hvirt->scratch_buf);
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+ &ts_start_ns, &ts_end_ns,
+ &hvirt->scratch_buf);
/* Accumulate into only existing clients' accumulation bufs */
if (!errcode)
- list_for_each_entry(pos, &hvirt->clients, node)
- kbasep_hwcnt_virtualizer_client_accumulate(
- pos, &hvirt->scratch_buf);
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbasep_hwcnt_virtualizer_client_accumulate(pos,
+ &hvirt->scratch_buf);
}
if (errcode)
goto error;
@@ -307,9 +294,8 @@ error:
* @hvirt: Non-NULL pointer to the hardware counter virtualizer.
* @hvcli: Non-NULL pointer to the virtualizer client to remove.
*/
-static void kbasep_hwcnt_virtualizer_client_remove(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_hwcnt_virtualizer_client *hvcli)
+static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli)
{
int errcode = 0;
u64 ts_start_ns;
@@ -329,22 +315,21 @@ static void kbasep_hwcnt_virtualizer_client_remove(
struct kbase_hwcnt_virtualizer_client *pos;
/* Make the scratch enable map the union of all enable maps */
kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
- list_for_each_entry(pos, &hvirt->clients, node)
- kbase_hwcnt_enable_map_union(
- &hvirt->scratch_map, &pos->enable_map);
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
/* Set the counters with the new union enable map */
- errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
- &hvirt->scratch_map,
- &ts_start_ns, &ts_end_ns,
- &hvirt->scratch_buf);
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+ &ts_start_ns, &ts_end_ns,
+ &hvirt->scratch_buf);
/* Accumulate into remaining clients' accumulation bufs */
- if (!errcode)
- list_for_each_entry(pos, &hvirt->clients, node)
- kbasep_hwcnt_virtualizer_client_accumulate(
- pos, &hvirt->scratch_buf);
+ if (!errcode) {
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbasep_hwcnt_virtualizer_client_accumulate(pos,
+ &hvirt->scratch_buf);
- /* Store the most recent dump time for rate limiting */
- hvirt->ts_last_dump_ns = ts_end_ns;
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = ts_end_ns;
+ }
}
WARN_ON(errcode);
}
@@ -370,11 +355,8 @@ static void kbasep_hwcnt_virtualizer_client_remove(
* Return: 0 on success or error code.
*/
static int kbasep_hwcnt_virtualizer_client_set_counters(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_hwcnt_virtualizer_client *hvcli,
- const struct kbase_hwcnt_enable_map *enable_map,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
+ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
@@ -391,32 +373,29 @@ static int kbasep_hwcnt_virtualizer_client_set_counters(
/* Make the scratch enable map the union of all enable maps */
kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
- list_for_each_entry(pos, &hvirt->clients, node)
+ list_for_each_entry (pos, &hvirt->clients, node)
/* Ignore the enable map of the selected client */
if (pos != hvcli)
- kbase_hwcnt_enable_map_union(
- &hvirt->scratch_map, &pos->enable_map);
+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
/* Set the counters with the new union enable map */
- errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
- &hvirt->scratch_map, ts_start_ns, ts_end_ns,
- &hvirt->scratch_buf);
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+ ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
if (errcode)
return errcode;
/* Accumulate into all accumulation bufs except the selected client's */
- list_for_each_entry(pos, &hvirt->clients, node)
+ list_for_each_entry (pos, &hvirt->clients, node)
if (pos != hvcli)
- kbasep_hwcnt_virtualizer_client_accumulate(
- pos, &hvirt->scratch_buf);
+ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
/* Finally, write into the dump buf */
if (dump_buf) {
const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
if (hvcli->has_accum) {
- kbase_hwcnt_dump_buffer_accumulate(
- &hvcli->accum_buf, src, &hvcli->enable_map);
+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src,
+ &hvcli->enable_map);
src = &hvcli->accum_buf;
}
kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
@@ -436,12 +415,10 @@ static int kbasep_hwcnt_virtualizer_client_set_counters(
return errcode;
}
-int kbase_hwcnt_virtualizer_client_set_counters(
- struct kbase_hwcnt_virtualizer_client *hvcli,
- const struct kbase_hwcnt_enable_map *enable_map,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_virtualizer *hvirt;
@@ -464,14 +441,12 @@ int kbase_hwcnt_virtualizer_client_set_counters(
* to the accumulator, saving a fair few copies and
* accumulations.
*/
- errcode = kbase_hwcnt_accumulator_set_counters(
- hvirt->accum, enable_map,
- ts_start_ns, ts_end_ns, dump_buf);
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map,
+ ts_start_ns, ts_end_ns, dump_buf);
if (!errcode) {
/* Update the selected client's enable map */
- kbase_hwcnt_enable_map_copy(
- &hvcli->enable_map, enable_map);
+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
/* Fix up the timestamps */
*ts_start_ns = hvcli->ts_start_ns;
@@ -483,8 +458,7 @@ int kbase_hwcnt_virtualizer_client_set_counters(
} else {
/* Otherwise, do the full virtualize */
errcode = kbasep_hwcnt_virtualizer_client_set_counters(
- hvirt, hvcli, enable_map,
- ts_start_ns, ts_end_ns, dump_buf);
+ hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf);
}
mutex_unlock(&hvirt->lock);
@@ -507,12 +481,10 @@ int kbase_hwcnt_virtualizer_client_set_counters(
*
* Return: 0 on success or error code.
*/
-static int kbasep_hwcnt_virtualizer_client_dump(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_hwcnt_virtualizer_client *hvcli,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_virtualizer_client *pos;
@@ -525,24 +497,23 @@ static int kbasep_hwcnt_virtualizer_client_dump(
lockdep_assert_held(&hvirt->lock);
/* Perform the dump */
- errcode = kbase_hwcnt_accumulator_dump(hvirt->accum,
- ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns,
+ &hvirt->scratch_buf);
if (errcode)
return errcode;
/* Accumulate into all accumulation bufs except the selected client's */
- list_for_each_entry(pos, &hvirt->clients, node)
+ list_for_each_entry (pos, &hvirt->clients, node)
if (pos != hvcli)
- kbasep_hwcnt_virtualizer_client_accumulate(
- pos, &hvirt->scratch_buf);
+ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
/* Finally, write into the dump buf */
if (dump_buf) {
const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
if (hvcli->has_accum) {
- kbase_hwcnt_dump_buffer_accumulate(
- &hvcli->accum_buf, src, &hvcli->enable_map);
+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src,
+ &hvcli->enable_map);
src = &hvcli->accum_buf;
}
kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
@@ -578,11 +549,8 @@ static int kbasep_hwcnt_virtualizer_client_dump(
* Return: 0 on success or error code.
*/
static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
- struct kbase_hwcnt_virtualizer *hvirt,
- struct kbase_hwcnt_virtualizer_client *hvcli,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
{
bool rate_limited = true;
@@ -602,10 +570,8 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
*/
rate_limited = false;
} else {
- const u64 ts_ns =
- kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum);
- const u64 time_since_last_dump_ns =
- ts_ns - hvirt->ts_last_dump_ns;
+ const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum);
+ const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns;
/* Dump period equals or exceeds the threshold */
if (time_since_last_dump_ns >= hvirt->dump_threshold_ns)
@@ -613,8 +579,8 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
}
if (!rate_limited)
- return kbasep_hwcnt_virtualizer_client_dump(
- hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+ return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns,
+ dump_buf);
/* If we've gotten this far, the client must have something accumulated
* otherwise it is a logic error
@@ -622,8 +588,7 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
WARN_ON(!hvcli->has_accum);
if (dump_buf)
- kbase_hwcnt_dump_buffer_copy(
- dump_buf, &hvcli->accum_buf, &hvcli->enable_map);
+ kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map);
hvcli->has_accum = false;
*ts_start_ns = hvcli->ts_start_ns;
@@ -633,11 +598,9 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
return 0;
}
-int kbase_hwcnt_virtualizer_client_dump(
- struct kbase_hwcnt_virtualizer_client *hvcli,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf)
+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_virtualizer *hvirt;
@@ -659,8 +622,8 @@ int kbase_hwcnt_virtualizer_client_dump(
* to the accumulator, saving a fair few copies and
* accumulations.
*/
- errcode = kbase_hwcnt_accumulator_dump(
- hvirt->accum, ts_start_ns, ts_end_ns, dump_buf);
+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns,
+ dump_buf);
if (!errcode) {
/* Fix up the timestamps */
@@ -681,20 +644,17 @@ int kbase_hwcnt_virtualizer_client_dump(
return errcode;
}
-int kbase_hwcnt_virtualizer_client_create(
- struct kbase_hwcnt_virtualizer *hvirt,
- const struct kbase_hwcnt_enable_map *enable_map,
- struct kbase_hwcnt_virtualizer_client **out_hvcli)
+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli)
{
int errcode;
struct kbase_hwcnt_virtualizer_client *hvcli;
- if (!hvirt || !enable_map || !out_hvcli ||
- (enable_map->metadata != hvirt->metadata))
+ if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata))
return -EINVAL;
- errcode = kbasep_hwcnt_virtualizer_client_alloc(
- hvirt->metadata, &hvcli);
+ errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli);
if (errcode)
return errcode;
@@ -713,8 +673,7 @@ int kbase_hwcnt_virtualizer_client_create(
return 0;
}
-void kbase_hwcnt_virtualizer_client_destroy(
- struct kbase_hwcnt_virtualizer_client *hvcli)
+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli)
{
if (!hvcli)
return;
@@ -728,10 +687,8 @@ void kbase_hwcnt_virtualizer_client_destroy(
kbasep_hwcnt_virtualizer_client_free(hvcli);
}
-int kbase_hwcnt_virtualizer_init(
- struct kbase_hwcnt_context *hctx,
- u64 dump_threshold_ns,
- struct kbase_hwcnt_virtualizer **out_hvirt)
+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns,
+ struct kbase_hwcnt_virtualizer **out_hvirt)
{
struct kbase_hwcnt_virtualizer *virt;
const struct kbase_hwcnt_metadata *metadata;
@@ -758,8 +715,7 @@ int kbase_hwcnt_virtualizer_init(
return 0;
}
-void kbase_hwcnt_virtualizer_term(
- struct kbase_hwcnt_virtualizer *hvirt)
+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt)
{
if (!hvirt)
return;
@@ -768,7 +724,7 @@ void kbase_hwcnt_virtualizer_term(
if (WARN_ON(hvirt->client_count != 0)) {
struct kbase_hwcnt_virtualizer_client *pos, *n;
- list_for_each_entry_safe(pos, n, &hvirt->clients, node)
+ list_for_each_entry_safe (pos, n, &hvirt->clients, node)
kbase_hwcnt_virtualizer_client_destroy(pos);
}
diff --git a/mali_kbase/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h
index 08e8e9f..485ba74 100644
--- a/mali_kbase/mali_kbase_hwcnt_virtualizer.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,17 +51,14 @@ struct kbase_hwcnt_dump_buffer;
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_virtualizer_init(
- struct kbase_hwcnt_context *hctx,
- u64 dump_threshold_ns,
- struct kbase_hwcnt_virtualizer **out_hvirt);
+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns,
+ struct kbase_hwcnt_virtualizer **out_hvirt);
/**
* kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer.
* @hvirt: Pointer to virtualizer to be terminated.
*/
-void kbase_hwcnt_virtualizer_term(
- struct kbase_hwcnt_virtualizer *hvirt);
+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt);
/**
* kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by
@@ -71,8 +68,8 @@ void kbase_hwcnt_virtualizer_term(
*
* Return: Non-NULL pointer to metadata, or NULL on error.
*/
-const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
- struct kbase_hwcnt_virtualizer *hvirt);
+const struct kbase_hwcnt_metadata *
+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt);
/**
* kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client.
@@ -84,17 +81,15 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_virtualizer_client_create(
- struct kbase_hwcnt_virtualizer *hvirt,
- const struct kbase_hwcnt_enable_map *enable_map,
- struct kbase_hwcnt_virtualizer_client **out_hvcli);
+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli);
/**
* kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client.
* @hvcli: Pointer to the hardware counter client.
*/
-void kbase_hwcnt_virtualizer_client_destroy(
- struct kbase_hwcnt_virtualizer_client *hvcli);
+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli);
/**
* kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
@@ -115,12 +110,10 @@ void kbase_hwcnt_virtualizer_client_destroy(
*
* Return: 0 on success or error code.
*/
-int kbase_hwcnt_virtualizer_client_set_counters(
- struct kbase_hwcnt_virtualizer_client *hvcli,
- const struct kbase_hwcnt_enable_map *enable_map,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf);
+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's
@@ -136,11 +129,9 @@ int kbase_hwcnt_virtualizer_client_set_counters(
*
* Return: 0 on success or error code.
*/
-int kbase_hwcnt_virtualizer_client_dump(
- struct kbase_hwcnt_virtualizer_client *hvcli,
- u64 *ts_start_ns,
- u64 *ts_end_ns,
- struct kbase_hwcnt_dump_buffer *dump_buf);
+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h
index 1873318..501c008 100644
--- a/mali_kbase/mali_kbase_hwcnt_watchdog_if.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -50,17 +50,17 @@ typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data);
*
* Return: 0 if the watchdog timer enabled successfully, error code otherwise.
*/
-typedef int kbase_hwcnt_watchdog_enable_fn(
- const struct kbase_hwcnt_watchdog_info *timer, u32 period_ms,
- kbase_hwcnt_watchdog_callback_fn *callback, void *user_data);
+typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer,
+ u32 period_ms,
+ kbase_hwcnt_watchdog_callback_fn *callback,
+ void *user_data);
/**
* typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer
*
* @timer: Non-NULL pointer to a watchdog timer interface context
*/
-typedef void
-kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
+typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
/**
* typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout
@@ -68,9 +68,8 @@ kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
* @timer: Non-NULL pointer to a watchdog timer interface context
* @delay_ms: Watchdog timer expiration in milliseconds
*/
-typedef void
-kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer,
- u32 delay_ms);
+typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer,
+ u32 delay_ms);
/**
* struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface.
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c
index 69b957a..4caa832 100644
--- a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,8 +20,8 @@
*/
#include "mali_kbase.h"
-#include "mali_kbase_hwcnt_watchdog_if.h"
-#include "mali_kbase_hwcnt_watchdog_if_timer.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h"
#include <linux/workqueue.h>
#include <linux/slab.h>
@@ -62,12 +62,10 @@ static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work)
}
static int kbasep_hwcnt_watchdog_if_timer_enable(
- const struct kbase_hwcnt_watchdog_info *const timer,
- u32 const period_ms, kbase_hwcnt_watchdog_callback_fn *const callback,
- void *const user_data)
+ const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms,
+ kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data)
{
- struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
- (void *)timer;
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled))
return -EINVAL;
@@ -81,11 +79,10 @@ static int kbasep_hwcnt_watchdog_if_timer_enable(
return 0;
}
-static void kbasep_hwcnt_watchdog_if_timer_disable(
- const struct kbase_hwcnt_watchdog_info *const timer)
+static void
+kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer)
{
- struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
- (void *)timer;
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
if (WARN_ON(!timer))
return;
@@ -97,11 +94,11 @@ static void kbasep_hwcnt_watchdog_if_timer_disable(
timer_info->timer_enabled = false;
}
-static void kbasep_hwcnt_watchdog_if_timer_modify(
- const struct kbase_hwcnt_watchdog_info *const timer, u32 const delay_ms)
+static void
+kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer,
+ u32 const delay_ms)
{
- struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
- (void *)timer;
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled))
return;
@@ -109,8 +106,7 @@ static void kbasep_hwcnt_watchdog_if_timer_modify(
mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms));
}
-void kbase_hwcnt_watchdog_if_timer_destroy(
- struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if)
{
struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
@@ -125,11 +121,12 @@ void kbase_hwcnt_watchdog_if_timer_destroy(
destroy_workqueue(timer_info->workq);
kfree(timer_info);
- *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ NULL };
+ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+ .timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL
+ };
}
-int kbase_hwcnt_watchdog_if_timer_create(
- struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if)
{
struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
@@ -140,9 +137,7 @@ int kbase_hwcnt_watchdog_if_timer_create(
if (!timer_info)
return -ENOMEM;
- *timer_info =
- (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled =
- false };
+ *timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false };
INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback);
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h
index 3bd69c3..a545ad3 100644
--- a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,8 +35,7 @@ struct kbase_hwcnt_watchdog_interface;
*
* Return: 0 on success, error otherwise.
*/
-int kbase_hwcnt_watchdog_if_timer_create(
- struct kbase_hwcnt_watchdog_interface *watchdog_if);
+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if);
/**
* kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter
@@ -44,7 +43,6 @@ int kbase_hwcnt_watchdog_if_timer_create(
*
* @watchdog_if: Pointer to watchdog interface to destroy
*/
-void kbase_hwcnt_watchdog_if_timer_destroy(
- struct kbase_hwcnt_watchdog_interface *watchdog_if);
+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if);
#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
index 4479a4b..6089610 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -23,8 +23,8 @@
#define _KBASE_IPA_COUNTER_COMMON_JM_H_
#include "mali_kbase.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
/* Maximum number of IPA groups for an IPA model. */
#define KBASE_IPA_MAX_GROUP_DEF_NUM 16
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
index 66e56e2..21b4e52 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,10 +23,13 @@
#include "mali_kbase.h"
/* MEMSYS counter block offsets */
+#define L2_RD_MSG_IN_CU (13)
#define L2_RD_MSG_IN (16)
#define L2_WR_MSG_IN (18)
+#define L2_SNP_MSG_IN (20)
#define L2_RD_MSG_OUT (22)
#define L2_READ_LOOKUP (26)
+#define L2_EXT_READ_NOSNP (30)
#define L2_EXT_WRITE_NOSNP_FULL (43)
/* SC counter block offsets */
@@ -36,17 +39,23 @@
#define FULL_QUAD_WARPS (21)
#define EXEC_INSTR_FMA (27)
#define EXEC_INSTR_CVT (28)
+#define EXEC_INSTR_SFU (29)
#define EXEC_INSTR_MSG (30)
#define TEX_FILT_NUM_OPS (39)
#define LS_MEM_READ_SHORT (45)
#define LS_MEM_WRITE_SHORT (47)
#define VARY_SLOT_16 (51)
+#define BEATS_RD_LSC_EXT (57)
+#define BEATS_RD_TEX (58)
+#define BEATS_RD_TEX_EXT (59)
+#define FRAG_QUADS_COARSE (68)
/* Tiler counter block offsets */
#define IDVS_POS_SHAD_STALL (23)
#define PREFETCH_STALL (25)
#define VFETCH_POS_READ_WAIT (29)
#define VFETCH_VERTEX_WAIT (30)
+#define PRIMASSY_STALL (32)
#define IDVS_VAR_SHAD_STALL (38)
#define ITER_STALL (40)
#define PMGR_PTR_RD_STALL (48)
@@ -59,9 +68,6 @@
.counter_block_type = block_type, \
}
-#define CSHW_COUNTER_DEF(cnt_name, coeff, cnt_idx) \
- COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_CSHW)
-
#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \
COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS)
@@ -114,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT),
};
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = {
+ TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL),
+ TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL),
+
+ MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU),
+ MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN),
+ MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP),
+};
+
/* These tables provide a description of each performance counter
* used by the shader cores counter model for energy estimation.
*/
@@ -153,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
};
+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = {
+ SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA),
+ SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG),
+ SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX),
+ SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT),
+ SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE),
+ SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT),
+ SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT),
+ SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU),
+};
+
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
.name = "mali-" #gpu "-power-model", \
@@ -184,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
#define ALIAS_POWER_MODEL(gpu, as_gpu) \
IPA_POWER_MODEL_OPS(gpu, as_gpu)
-/* Reference voltage value is 750 mV.
- */
+/* Reference voltage value is 750 mV. */
STANDARD_POWER_MODEL(todx, 750);
STANDARD_POWER_MODEL(tgrx, 750);
STANDARD_POWER_MODEL(tvax, 750);
-
STANDARD_POWER_MODEL(ttux, 750);
+/* Reference voltage value is 550 mV. */
+STANDARD_POWER_MODEL(ttix, 550);
/* Assuming LODX is an alias of TODX for IPA */
ALIAS_POWER_MODEL(lodx, todx);
@@ -198,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx);
/* Assuming LTUX is an alias of TTUX for IPA */
ALIAS_POWER_MODEL(ltux, ttux);
+/* Assuming LTUX is an alias of TTUX for IPA */
+ALIAS_POWER_MODEL(ltix, ttix);
+
static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
&kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops,
&kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops,
- &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops
+ &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops,
+ &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops,
};
const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
@@ -240,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
return "mali-ttux-power-model";
case GPU_ID2_PRODUCT_LTUX:
return "mali-ltux-power-model";
+ case GPU_ID2_PRODUCT_TTIX:
+ return "mali-ttix-power-model";
+ case GPU_ID2_PRODUCT_LTIX:
+ return "mali-ltix-power-model";
default:
return NULL;
}
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index eaa2258..5a204ae 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,28 +23,19 @@
#include "mali_kbase_ipa_counter_common_jm.h"
#include "mali_kbase.h"
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
+#include <backend/gpu/mali_kbase_model_linux.h>
/* Performance counter blocks base offsets */
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
/* JM counter block offsets */
#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6)
-/* Tiler counter block offsets */
-#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
-
/* MEMSYS counter block offsets */
#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
/* SC counter block offsets */
-#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4)
-#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27)
#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30)
@@ -52,10 +43,6 @@
#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40)
#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49)
-#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50)
-#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51)
-#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56)
-#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61)
#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62)
/**
@@ -468,16 +455,14 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = {
},
};
-
-#define IPA_POWER_MODEL_OPS(gpu, init_token) \
- const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
- .name = "mali-" #gpu "-power-model", \
- .init = kbase_ ## init_token ## _power_model_init, \
- .term = kbase_ipa_vinstr_common_model_term, \
- .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
- .reset_counter_data = kbase_ipa_vinstr_reset_data, \
- }; \
- KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+ static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \
+ .name = "mali-" #gpu "-power-model", \
+ .init = kbase_##init_token##_power_model_init, \
+ .term = kbase_ipa_vinstr_common_model_term, \
+ .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+ .reset_counter_data = kbase_ipa_vinstr_reset_data, \
+ }
#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
static int kbase_ ## gpu ## _power_model_init(\
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index 8b8bbd1..0e8abb1 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -84,11 +84,11 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
static struct device_node *get_model_dt_node(struct kbase_ipa_model *model,
bool dt_required)
{
- struct device_node *model_dt_node;
+ struct device_node *model_dt_node = NULL;
char compat_string[64];
- snprintf(compat_string, sizeof(compat_string), "arm,%s",
- model->ops->name);
+ if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name)))
+ return NULL;
/* of_find_compatible_node() will call of_node_put() on the root node,
* so take a reference on it first.
@@ -111,12 +111,12 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
const char *name, s32 *addr,
size_t num_elems, bool dt_required)
{
- int err, i;
+ int err = -EINVAL, i;
struct device_node *model_dt_node = get_model_dt_node(model,
dt_required);
char *origin;
- err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+ err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems);
/* We're done with model_dt_node now, so drop the reference taken in
* get_model_dt_node()/of_find_compatible_node().
*/
@@ -138,11 +138,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
for (i = 0; i < num_elems; ++i) {
char elem_name[32];
- if (num_elems == 1)
- snprintf(elem_name, sizeof(elem_name), "%s", name);
- else
- snprintf(elem_name, sizeof(elem_name), "%s.%d",
- name, i);
+ if (num_elems == 1) {
+ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ } else {
+ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ }
dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
model->ops->name, elem_name, addr[i], origin);
@@ -164,7 +170,7 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
int err;
struct device_node *model_dt_node = get_model_dt_node(model,
dt_required);
- const char *string_prop_value;
+ const char *string_prop_value = "";
char *origin;
err = of_property_read_string(model_dt_node, name,
diff --git a/mali_kbase/ipa/mali_kbase_ipa.h b/mali_kbase/ipa/mali_kbase_ipa.h
index c668af9..4f35b9e 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.h
+++ b/mali_kbase/ipa/mali_kbase_ipa.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -266,7 +266,6 @@ int kbase_get_real_power(struct devfreq *df, u32 *power,
unsigned long freq,
unsigned long voltage);
-#if MALI_UNIT_TEST
/* Called by kbase_get_real_power() to invoke the power models.
* Must be called with kbdev->ipa.lock held.
* This function is only exposed for use by unit tests.
@@ -274,7 +273,6 @@ int kbase_get_real_power(struct devfreq *df, u32 *power,
int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
unsigned long freq,
unsigned long voltage);
-#endif /* MALI_UNIT_TEST */
extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c
index f748144..0fd2136 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_simple.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c
@@ -33,6 +33,8 @@
#include "mali_kbase_ipa_simple.h"
#include "mali_kbase_ipa_debugfs.h"
+#if MALI_USE_CSF
+
/* This is used if the dynamic power for top-level is estimated separately
* through the counter model. To roughly match the contribution of top-level
* power in the total dynamic power, when calculated through counter model,
@@ -43,6 +45,8 @@
*/
#define TOP_LEVEL_DYN_COEFF_SCALER (3)
+#endif /* MALI_USE_CSF */
+
#if MALI_UNIT_TEST
static int dummy_temp;
@@ -227,14 +231,12 @@ static int add_params(struct kbase_ipa_model *model)
(struct kbase_ipa_model_simple_data *)model->model_data;
err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
- &model_data->static_coefficient,
- 1, true);
+ (s32 *)&model_data->static_coefficient, 1, true);
if (err)
goto end;
err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
- &model_data->dynamic_coefficient,
- 1, true);
+ (s32 *)&model_data->dynamic_coefficient, 1, true);
if (err)
goto end;
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index 66cf323..639b35f 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -135,10 +135,17 @@
/**
* enum kbase_timeout_selector - The choice of which timeout to get scaled
* using the lowest GPU frequency.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ * of a MMU operation
+ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
+ * to be updated on HW side so a Job Slot is
+ * considered free.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
enum kbase_timeout_selector {
+ MMU_AS_INACTIVE_WAIT_TIMEOUT,
+ JM_DEFAULT_JS_FREE_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -404,16 +411,21 @@ enum kbase_atom_exit_protected_state {
* sync through soft jobs and for the implicit
* synchronization required on access to external
* resources.
- * @dma_fence.fence_in: Input fence
+ * @dma_fence.fence_in: Points to the dma-buf input fence for this atom.
+ * The atom would complete only after the fence is
+ * signaled.
* @dma_fence.fence: Points to the dma-buf output fence for this atom.
+ * @dma_fence.fence_cb: The object that is passed at the time of adding the
+ * callback that gets invoked when @dma_fence.fence_in
+ * is signaled.
+ * @dma_fence.fence_cb_added: Flag to keep a track if the callback was successfully
+ * added for @dma_fence.fence_in, which is supposed to be
+ * invoked on the signaling of fence.
* @dma_fence.context: The dma-buf fence context number for this atom. A
* unique context number is allocated to each katom in
* the context on context creation.
* @dma_fence.seqno: The dma-buf fence sequence number for this atom. This
* is increased every time this katom uses dma-buf fence
- * @dma_fence.callbacks: List of all callbacks set up to wait on other fences
- * @dma_fence.dep_count: Atomic counter of number of outstandind dma-buf fence
- * dependencies for this atom.
* @event_code: Event code for the job chain represented by the atom,
* both HW and low-level SW events are represented by
* event codes.
@@ -520,16 +532,12 @@ struct kbase_jd_atom {
u32 device_nr;
u64 jc;
void *softjob_data;
-#if defined(CONFIG_SYNC)
- struct sync_fence *fence;
- struct sync_fence_waiter sync_waiter;
-#endif /* CONFIG_SYNC */
-#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
struct {
/* Use the functions/API defined in mali_kbase_fence.h to
* when working with this sub struct
*/
-#if defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_in;
#else
@@ -552,38 +560,21 @@ struct kbase_jd_atom {
#else
struct dma_fence *fence;
#endif
+
+ /* This is the callback object that is registered for the fence_in.
+ * The callback is invoked when the fence_in is signaled.
+ */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence_cb fence_cb;
+#else
+ struct dma_fence_cb fence_cb;
+#endif
+ bool fence_cb_added;
+
unsigned int context;
atomic_t seqno;
- /* This contains a list of all callbacks set up to wait on
- * other fences. This atom must be held back from JS until all
- * these callbacks have been called and dep_count have reached
- * 0. The initial value of dep_count must be equal to the
- * number of callbacks on this list.
- *
- * This list is protected by jctx.lock. Callbacks are added to
- * this list when the atom is built and the wait are set up.
- * All the callbacks then stay on the list until all callbacks
- * have been called and the atom is queued, or cancelled, and
- * then all callbacks are taken off the list and freed.
- */
- struct list_head callbacks;
- /* Atomic counter of number of outstandind dma-buf fence
- * dependencies for this atom. When dep_count reaches 0 the
- * atom may be queued.
- *
- * The special value "-1" may only be set after the count
- * reaches 0, while holding jctx.lock. This indicates that the
- * atom has been handled, either queued in JS or cancelled.
- *
- * If anyone but the dma-fence worker sets this to -1 they must
- * ensure that any potentially queued worker must have
- * completed before allowing the atom to be marked as unused.
- * This can be done by flushing the fence work queue:
- * kctx->dma_fence.wq.
- */
- atomic_t dep_count;
} dma_fence;
-#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+#endif /* CONFIG_SYNC_FILE */
/* Note: refer to kbasep_js_atom_retained_state, which will take a copy
* of some of the following members
@@ -602,7 +593,7 @@ struct kbase_jd_atom {
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
int work_id;
#endif
- int slot_nr;
+ unsigned int slot_nr;
u32 atom_flags;
@@ -871,6 +862,10 @@ struct jsctx_queue {
* @pf_data: Data relating to Page fault.
* @bf_data: Data relating to Bus fault.
* @current_setup: Stores the MMU configuration for this address space.
+ * @is_unresponsive: Flag to indicate MMU is not responding.
+ * Set if a MMU command isn't completed within
+ * &kbase_device:mmu_as_inactive_wait_time_ms.
+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -880,6 +875,7 @@ struct kbase_as {
struct kbase_fault pf_data;
struct kbase_fault bf_data;
struct kbase_mmu_setup current_setup;
+ bool is_unresponsive;
};
#endif /* _KBASE_JM_DEFS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
index d03bcc0..53819ca 100644
--- a/mali_kbase/jm/mali_kbase_jm_js.h
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -132,15 +132,15 @@ void kbasep_js_kctx_term(struct kbase_context *kctx);
* Atoms of higher priority might still be able to be pulled from the context
* on @js. This helps with starting a high priority atom as soon as possible.
*/
-static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx,
- int js, int sched_prio)
+static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js,
+ int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
WARN(!slot_tracking->atoms_pulled_pri[sched_prio],
- "When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
+ "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
js, sched_prio);
slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio;
@@ -510,19 +510,6 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
struct kbase_jd_atom *katom);
/**
- * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
- * @kctx: Context Pointer
- * @prio: Priority (specifies the queue together with js).
- * @js: Job slot (specifies the queue together with prio).
- *
- * Pushes all possible atoms from the linked list to the ringbuffer.
- * Number of atoms are limited to free space in the ringbuffer and
- * number of available atoms in the linked list.
- *
- */
-void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
-
-/**
* kbase_js_pull - Pull an atom from a context in the job scheduler for
* execution.
*
@@ -536,7 +523,7 @@ void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
* Return: a pointer to an atom, or NULL if there are no atoms for this
* slot that can be currently run.
*/
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js);
/**
* kbase_js_unpull - Return an atom to the job scheduler ringbuffer.
@@ -617,7 +604,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
* been used.
*
*/
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask);
/**
* kbase_js_zap_context - Attempt to deschedule a context that is being
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 15576fb..5023eaa 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -277,6 +277,7 @@ typedef u32 kbase_atom_ordering_flag_t;
* @nr_contexts_runnable:Number of contexts that can either be pulled from or
* arecurrently running
* @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
+ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
* @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
* independently of the Run Pool.
* Of course, you don't need the Run Pool lock to access this.
@@ -329,6 +330,8 @@ struct kbasep_js_device_data {
u32 nr_contexts_pullable;
atomic_t nr_contexts_runnable;
atomic_t soft_job_timeout_ms;
+ u32 js_free_wait_time_ms;
+
struct rt_mutex queue_mutex;
/*
* Run Pool mutex, for managing contexts within the runpool.
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 3669f7e..11aedef 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -39,6 +39,8 @@ enum base_hw_feature {
BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_CORE_FEATURES,
+ BASE_HW_FEATURE_PBHA_HWU,
+ BASE_HW_FEATURE_LARGE_PAGE_ALLOC,
BASE_HW_FEATURE_END
};
@@ -130,25 +132,24 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[
BASE_HW_FEATURE_END
};
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
- BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
@@ -157,16 +158,18 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[
BASE_HW_FEATURE_END
};
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_ASN_HASH,
+ BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
@@ -174,6 +177,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_CORE_FEATURES,
+ BASE_HW_FEATURE_PBHA_HWU,
BASE_HW_FEATURE_END
};
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 3917301..0fbdec0 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -64,6 +64,9 @@ enum base_hw_issue {
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -88,6 +91,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -108,6 +113,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -128,6 +135,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -143,6 +152,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI
BASE_HW_ISSUE_TMIX_8343,
BASE_HW_ISSUE_TMIX_8456,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -156,6 +167,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -169,6 +182,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -182,6 +197,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -194,6 +211,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -204,6 +223,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHE
BASE_HW_ISSUE_TMIX_8042,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -217,6 +238,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -230,6 +253,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -242,6 +267,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -253,6 +280,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -263,6 +292,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSI
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -274,6 +305,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -284,6 +317,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDV
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -296,6 +331,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -306,6 +343,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNO
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -318,6 +357,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -330,6 +371,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -340,6 +383,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGO
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -356,6 +401,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -372,6 +419,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -387,6 +436,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -399,6 +450,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTR
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -415,6 +468,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -430,6 +485,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -442,6 +499,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNA
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -456,6 +515,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -469,6 +530,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -482,6 +545,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -495,6 +560,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -507,6 +574,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBE
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -521,6 +590,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -534,6 +605,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -547,6 +620,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -560,6 +635,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -572,90 +649,74 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBA
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
- BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
- BASE_HW_ISSUE_9435,
- BASE_HW_ISSUE_TSIX_2033,
- BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_921,
- BASE_HW_ISSUE_TTRX_3414,
- BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
- BASE_HW_ISSUE_5736,
- BASE_HW_ISSUE_9435,
- BASE_HW_ISSUE_TSIX_2033,
- BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_3414,
- BASE_HW_ISSUE_TTRX_3083,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
- BASE_HW_ISSUE_5736,
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
- BASE_HW_ISSUE_5736,
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
- BASE_HW_ISSUE_5736,
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_1997,
@@ -663,40 +724,110 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = {
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TURSEHW_1997,
+ BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
- BASE_HW_ISSUE_5736,
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
- BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = {
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2019_3901,
+ BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = {
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TURSEHW_2716,
+ BASE_HW_ISSUE_GPU2021PRO_290,
+ BASE_HW_ISSUE_TITANHW_2710,
+ BASE_HW_ISSUE_TITANHW_2679,
+ BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 0a8267b..7de793c 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -75,7 +75,9 @@
#include "mali_kbase_jd_debugfs.h"
#include "mali_kbase_jm.h"
#include "mali_kbase_js.h"
-#endif /* !MALI_USE_CSF */
+#else /* !MALI_USE_CSF */
+#include "csf/mali_kbase_debug_csf_fault.h"
+#endif /* MALI_USE_CSF */
#include "ipa/mali_kbase_ipa.h"
@@ -338,21 +340,8 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
-void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
- struct kbase_jd_atom *target_katom, u32 sw_flags);
-
-/**
- * kbase_job_slot_hardstop - Hard-stop the specified job slot
- * @kctx: The kbase context that contains the job(s) that should
- * be hard-stopped
- * @js: The job slot to hard-stop
- * @target_katom: The job that should be hard-stopped (or NULL for all
- * jobs from the context)
- * Context:
- * The job slot lock must be held when calling this function.
- */
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
- struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
+ struct kbase_jd_atom *target_katom, u32 sw_flags);
/**
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
@@ -454,7 +443,7 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom);
void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
#endif
int kbase_soft_event_update(struct kbase_context *kctx,
@@ -644,11 +633,6 @@ int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev);
*/
int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev);
-#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
-void kbase_pm_turn_on_sc_power_rails_locked(struct kbase_device *kbdev);
-void kbase_pm_turn_on_sc_power_rails(struct kbase_device *kbdev);
-void kbase_pm_turn_off_sc_power_rails(struct kbase_device *kbdev);
-#endif
#endif
#if !MALI_USE_CSF
diff --git a/mali_kbase/mali_kbase_as_fault_debugfs.c b/mali_kbase/mali_kbase_as_fault_debugfs.c
index 77f450d..ad33691 100644
--- a/mali_kbase/mali_kbase_as_fault_debugfs.c
+++ b/mali_kbase/mali_kbase_as_fault_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -98,11 +98,9 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
"unable to create address_spaces debugfs directory");
} else {
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
- snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
- debugfs_create_file(as_name, 0444,
- debugfs_directory,
- (void *)(uintptr_t)i,
- &as_fault_fops);
+ if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i)))
+ debugfs_create_file(as_name, 0444, debugfs_directory,
+ (void *)(uintptr_t)i, &as_fault_fops);
}
}
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 60fe2ce..c99ad52 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -232,6 +232,16 @@ enum {
*/
#define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT
+ * is updated on HW side so a Job Slot is considered free.
+ * This timeout will only take effect on GPUs with low value for the minimum
+ * GPU clock frequency (<= 100MHz).
+ *
+ * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher
+ * value for minimum GPU clock frequency.
+ */
+#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
+
#endif /* MALI_USE_CSF */
/* Default timeslice that a context is scheduled in for, in nanoseconds.
@@ -268,5 +278,12 @@ enum {
*/
#define DEFAULT_IR_THRESHOLD (192)
+/* Waiting time in clock cycles for the completion of a MMU operation.
+ *
+ * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush.
+ *
+ * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen.
+ * It corresponds to 0.5s in GPU @ 100Mhz.
+ */
+#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
-
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 2d38767..c31994c 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,11 +31,8 @@
#include <ipa/mali_kbase_ipa_debugfs.h>
#endif /* CONFIG_DEVFREQ_THERMAL */
#endif /* CONFIG_MALI_DEVFREQ */
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include "backend/gpu/mali_kbase_model_linux.h"
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_NO_MALI */
-#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "uapi/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_mem_pool_debugfs.h"
#include "mali_kbase_mem_pool_group.h"
@@ -54,8 +51,8 @@
#if !MALI_USE_CSF
#include "mali_kbase_kinstr_jm.h"
#endif
-#include "mali_kbase_hwcnt_context.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_vinstr.h"
#if MALI_USE_CSF
@@ -98,15 +95,16 @@
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/mm.h>
#include <linux/compat.h> /* is_compat_task/in_compat_syscall */
#include <linux/mman.h>
#include <linux/version.h>
#include <linux/version_compat_defs.h>
#include <mali_kbase_hw.h>
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
#include <mali_kbase_sync.h>
-#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#endif /* CONFIG_SYNC_FILE */
#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/delay.h>
@@ -125,11 +123,6 @@
#include <mali_kbase_caps.h>
-/* GPU IRQ Tags */
-#define JOB_IRQ_TAG 0
-#define MMU_IRQ_TAG 1
-#define GPU_IRQ_TAG 2
-
#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
/**
@@ -141,9 +134,6 @@
(((minor) & 0xFFF) << 8) | \
((0 & 0xFF) << 0))
-#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF)
-#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF)
-
/**
* struct mali_kbase_capability_def - kbase capabilities table
*
@@ -501,6 +491,12 @@ static struct kbase_device *to_kbase_device(struct device *dev)
int assign_irqs(struct kbase_device *kbdev)
{
+ static const char *const irq_names_caps[] = { "JOB", "MMU", "GPU" };
+
+#if IS_ENABLED(CONFIG_OF)
+ static const char *const irq_names[] = { "job", "mmu", "gpu" };
+#endif
+
struct platform_device *pdev;
int i;
@@ -508,34 +504,31 @@ int assign_irqs(struct kbase_device *kbdev)
return -ENODEV;
pdev = to_platform_device(kbdev->dev);
- /* 3 IRQ resources */
- for (i = 0; i < 3; i++) {
- struct resource *irq_res;
- int irqtag;
-
- irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
- if (!irq_res) {
- dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
- return -ENOENT;
- }
+
+ for (i = 0; i < ARRAY_SIZE(irq_names_caps); i++) {
+ int irq;
#if IS_ENABLED(CONFIG_OF)
- if (!strncasecmp(irq_res->name, "JOB", 4)) {
- irqtag = JOB_IRQ_TAG;
- } else if (!strncasecmp(irq_res->name, "MMU", 4)) {
- irqtag = MMU_IRQ_TAG;
- } else if (!strncasecmp(irq_res->name, "GPU", 4)) {
- irqtag = GPU_IRQ_TAG;
- } else {
- dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
- irq_res->name);
- return -EINVAL;
- }
+ /* We recommend using Upper case for the irq names in dts, but if
+ * there are devices in the world using Lower case then we should
+ * avoid breaking support for them. So try using names in Upper case
+ * first then try using Lower case names. If both attempts fail then
+ * we assume there is no IRQ resource specified for the GPU.
+ */
+ irq = platform_get_irq_byname(pdev, irq_names_caps[i]);
+ if (irq < 0)
+ irq = platform_get_irq_byname(pdev, irq_names[i]);
#else
- irqtag = i;
+ irq = platform_get_irq(pdev, i);
#endif /* CONFIG_OF */
- kbdev->irqs[irqtag].irq = irq_res->start;
- kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+
+ if (irq < 0) {
+ dev_err(kbdev->dev, "No IRQ resource '%s'\n", irq_names_caps[i]);
+ return irq;
+ }
+
+ kbdev->irqs[i].irq = irq;
+ kbdev->irqs[i].flags = irqd_get_trigger_type(irq_get_irq_data(irq));
}
return 0;
@@ -693,7 +686,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
#if IS_ENABLED(CONFIG_DEBUG_FS)
- snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+ if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id)))
+ return -ENOMEM;
mutex_init(&kctx->mem_profile_lock);
@@ -732,6 +726,11 @@ static int kbase_open(struct inode *inode, struct file *filp)
if (!kbdev)
return -ENODEV;
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+ /* Set address space operations for page migration */
+ kbase_mem_migrate_set_address_space_ops(kbdev, filp);
+#endif
+
/* Device-wide firmware load is moved here from probing to comply with
* Android GKI vendor guideline.
*/
@@ -1125,52 +1124,11 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx,
return len;
}
-/* Defaults for legacy just-in-time memory allocator initialization
- * kernel calls
- */
-#define DEFAULT_MAX_JIT_ALLOCATIONS 255
-#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
-
-static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx,
- struct kbase_ioctl_mem_jit_init_10_2 *jit_init)
-{
- kctx->jit_version = 1;
-
- /* since no phys_pages parameter, use the maximum: va_pages */
- return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
- DEFAULT_MAX_JIT_ALLOCATIONS,
- JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT,
- jit_init->va_pages);
-}
-
-static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx,
- struct kbase_ioctl_mem_jit_init_11_5 *jit_init)
-{
- int i;
-
- kctx->jit_version = 2;
-
- for (i = 0; i < sizeof(jit_init->padding); i++) {
- /* Ensure all padding bytes are 0 for potential future
- * extension
- */
- if (jit_init->padding[i])
- return -EINVAL;
- }
-
- /* since no phys_pages parameter, use the maximum: va_pages */
- return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
- jit_init->max_allocations, jit_init->trim_level,
- jit_init->group_id, jit_init->va_pages);
-}
-
static int kbase_api_mem_jit_init(struct kbase_context *kctx,
struct kbase_ioctl_mem_jit_init *jit_init)
{
int i;
- kctx->jit_version = 3;
-
for (i = 0; i < sizeof(jit_init->padding); i++) {
/* Ensure all padding bytes are 0 for potential future
* extension
@@ -1328,7 +1286,7 @@ static int kbase_api_mem_flags_change(struct kbase_context *kctx,
static int kbase_api_stream_create(struct kbase_context *kctx,
struct kbase_ioctl_stream_create *stream)
{
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
int fd, ret;
/* Name must be NULL-terminated and padded with NULLs, so check last
@@ -1350,7 +1308,7 @@ static int kbase_api_stream_create(struct kbase_context *kctx,
static int kbase_api_fence_validate(struct kbase_context *kctx,
struct kbase_ioctl_fence_validate *validate)
{
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
return kbase_sync_fence_validate(validate->fd);
#else
return -ENOENT;
@@ -1364,12 +1322,18 @@ static int kbase_api_mem_profile_add(struct kbase_context *kctx,
int err;
if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
- dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n");
+ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big");
return -EINVAL;
}
+ if (!data->len) {
+ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer size is 0");
+ /* Should return -EINVAL, but returning -ENOMEM for backwards compat */
+ return -ENOMEM;
+ }
+
buf = kmalloc(data->len, GFP_KERNEL);
- if (ZERO_OR_NULL_PTR(buf))
+ if (!buf)
return -ENOMEM;
err = copy_from_user(buf, u64_to_user_ptr(data->buffer),
@@ -1591,6 +1555,9 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx,
union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init)
{
+ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
+ return -EINVAL;
+
kctx->jit_group_id = heap_init->in.group_id;
return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -1679,7 +1646,6 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
cpu_queue_info->size);
}
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
union kbase_ioctl_read_user_page *user_page)
{
@@ -1914,18 +1880,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_get_ddk_version,
kctx);
break;
- case KBASE_IOCTL_MEM_JIT_INIT_10_2:
- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2,
- kbase_api_mem_jit_init_10_2,
- struct kbase_ioctl_mem_jit_init_10_2,
- kctx);
- break;
- case KBASE_IOCTL_MEM_JIT_INIT_11_5:
- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5,
- kbase_api_mem_jit_init_11_5,
- struct kbase_ioctl_mem_jit_init_11_5,
- kctx);
- break;
case KBASE_IOCTL_MEM_JIT_INIT:
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
kbase_api_mem_jit_init,
@@ -2186,6 +2140,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_cs_cpu_queue_info,
kctx);
break;
+ /* This IOCTL will be kept for backward compatibility */
case KBASE_IOCTL_READ_USER_PAGE:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page,
union kbase_ioctl_read_user_page, kctx);
@@ -2365,7 +2320,10 @@ KBASE_EXPORT_TEST_API(_kbase_event_wakeup);
#if MALI_USE_CSF
int kbase_event_pending(struct kbase_context *ctx)
{
- WARN_ON_ONCE(!ctx);
+ KBASE_DEBUG_ASSERT(ctx);
+
+ if (unlikely(!ctx))
+ return -EPERM;
return (atomic_read(&ctx->event_count) != 0) ||
kbase_csf_event_error_pending(ctx) ||
@@ -2376,6 +2334,9 @@ int kbase_event_pending(struct kbase_context *ctx)
{
KBASE_DEBUG_ASSERT(ctx);
+ if (unlikely(!ctx))
+ return -EPERM;
+
return (atomic_read(&ctx->event_count) != 0) ||
(atomic_read(&ctx->event_closed) != 0);
}
@@ -3345,6 +3306,10 @@ static ssize_t gpuinfo_show(struct device *dev,
.name = "Mali-G510" },
{ .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G310" },
+ { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+ .name = "Mali-TTIX" },
+ { .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+ .name = "Mali-LTIX" },
};
const char *product_name = "(Unknown Mali GPU)";
struct kbase_device *kbdev;
@@ -4428,7 +4393,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
{
}
-#else /* CONFIG_MALI_NO_MALI */
+#else /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
int err = 0;
@@ -4464,7 +4429,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
kbdev->reg_size = 0;
}
}
-#endif /* CONFIG_MALI_NO_MALI */
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
int registers_map(struct kbase_device * const kbdev)
{
@@ -4668,7 +4633,7 @@ int power_control_init(struct kbase_device *kbdev)
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
kbdev->regulators[i] = regulator_get_optional(kbdev->dev,
regulator_names[i]);
- if (IS_ERR_OR_NULL(kbdev->regulators[i])) {
+ if (IS_ERR(kbdev->regulators[i])) {
err = PTR_ERR(kbdev->regulators[i]);
kbdev->regulators[i] = NULL;
break;
@@ -4696,7 +4661,7 @@ int power_control_init(struct kbase_device *kbdev)
*/
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i);
- if (IS_ERR_OR_NULL(kbdev->clocks[i])) {
+ if (IS_ERR(kbdev->clocks[i])) {
err = PTR_ERR(kbdev->clocks[i]);
kbdev->clocks[i] = NULL;
break;
@@ -4728,18 +4693,29 @@ int power_control_init(struct kbase_device *kbdev)
* from completing its initialization.
*/
#if defined(CONFIG_PM_OPP)
-#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
- defined(CONFIG_REGULATOR))
+#if defined(CONFIG_REGULATOR)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ if (kbdev->nr_regulators > 0) {
+ kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names);
+
+ if (kbdev->token < 0) {
+ err = kbdev->token;
+ goto regulators_probe_defer;
+ }
+
+ }
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
if (kbdev->nr_regulators > 0) {
kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev,
regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS);
- if (IS_ERR_OR_NULL(kbdev->opp_table)) {
+ if (IS_ERR(kbdev->opp_table)) {
err = PTR_ERR(kbdev->opp_table);
goto regulators_probe_defer;
}
}
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+#endif /* CONFIG_REGULATOR */
err = dev_pm_opp_of_add_table(kbdev->dev);
CSTD_UNUSED(err);
#endif /* CONFIG_PM_OPP */
@@ -4774,11 +4750,15 @@ void power_control_term(struct kbase_device *kbdev)
#if defined(CONFIG_PM_OPP)
dev_pm_opp_of_remove_table(kbdev->dev);
-#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
- defined(CONFIG_REGULATOR))
+#if defined(CONFIG_REGULATOR)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ if (kbdev->token > -EPERM)
+ dev_pm_opp_put_regulators(kbdev->token);
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
if (!IS_ERR_OR_NULL(kbdev->opp_table))
dev_pm_opp_put_regulators(kbdev->opp_table);
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+#endif /* CONFIG_REGULATOR */
#endif /* CONFIG_PM_OPP */
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
@@ -4982,52 +4962,84 @@ static const struct file_operations
.release = single_release,
};
-int kbase_device_debugfs_init(struct kbase_device *kbdev)
+/**
+ * debugfs_ctx_defaults_init - Create the default configuration of new contexts in debugfs
+ * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver.
+ * Return: A pointer to the last dentry that it tried to create, whether successful or not.
+ * Could be NULL or encode another error value.
+ */
+static struct dentry *debugfs_ctx_defaults_init(struct kbase_device *const kbdev)
{
- struct dentry *debugfs_ctx_defaults_directory;
- int err;
/* prevent unprivileged use of debug file system
* in old kernel version
*/
const mode_t mode = 0644;
+ struct dentry *dentry = debugfs_create_dir("defaults", kbdev->debugfs_ctx_directory);
+ struct dentry *debugfs_ctx_defaults_directory = dentry;
+
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+ return dentry;
+ }
+
+ debugfs_create_bool("infinite_cache", mode,
+ debugfs_ctx_defaults_directory,
+ &kbdev->infinite_cache_active_default);
+
+ dentry = debugfs_create_file("mem_pool_max_size", mode, debugfs_ctx_defaults_directory,
+ &kbdev->mem_pool_defaults.small,
+ &kbase_device_debugfs_mem_pool_max_size_fops);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create mem_pool_max_size debugfs entry\n");
+ return dentry;
+ }
+
+ dentry = debugfs_create_file("lp_mem_pool_max_size", mode, debugfs_ctx_defaults_directory,
+ &kbdev->mem_pool_defaults.large,
+ &kbase_device_debugfs_mem_pool_max_size_fops);
+ if (IS_ERR_OR_NULL(dentry))
+ dev_err(kbdev->dev, "Unable to create lp_mem_pool_max_size debugfs entry\n");
- kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
- NULL);
- if (IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)) {
+ return dentry;
+}
+
+/**
+ * init_debugfs - Create device-wide debugfs directories and files for the Mali driver
+ * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver.
+ * Return: A pointer to the last dentry that it tried to create, whether successful or not.
+ * Could be NULL or encode another error value.
+ */
+static struct dentry *init_debugfs(struct kbase_device *kbdev)
+{
+ struct dentry *dentry = debugfs_create_dir(kbdev->devname, NULL);
+
+ kbdev->mali_debugfs_directory = dentry;
+ if (IS_ERR_OR_NULL(dentry)) {
dev_err(kbdev->dev,
"Couldn't create mali debugfs directory: %s\n",
kbdev->devname);
- err = -ENOMEM;
- goto out;
+ return dentry;
}
- kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
- kbdev->mali_debugfs_directory);
- if (IS_ERR_OR_NULL(kbdev->debugfs_ctx_directory)) {
+ dentry = debugfs_create_dir("ctx", kbdev->mali_debugfs_directory);
+ kbdev->debugfs_ctx_directory = dentry;
+ if (IS_ERR_OR_NULL(dentry)) {
dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
- err = -ENOMEM;
- goto out;
+ return dentry;
}
- kbdev->debugfs_instr_directory = debugfs_create_dir("instrumentation",
- kbdev->mali_debugfs_directory);
- if (IS_ERR_OR_NULL(kbdev->debugfs_instr_directory)) {
+ dentry = debugfs_create_dir("instrumentation", kbdev->mali_debugfs_directory);
+ kbdev->debugfs_instr_directory = dentry;
+ if (IS_ERR_OR_NULL(dentry)) {
dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n");
- err = -ENOMEM;
- goto out;
- }
-
- debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
- kbdev->debugfs_ctx_directory);
- if (IS_ERR_OR_NULL(debugfs_ctx_defaults_directory)) {
- dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
- err = -ENOMEM;
- goto out;
+ return dentry;
}
kbasep_regs_history_debugfs_init(kbdev);
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+ kbase_debug_csf_fault_debugfs_init(kbdev);
+#else /* MALI_USE_CSF */
kbase_debug_job_fault_debugfs_init(kbdev);
#endif /* !MALI_USE_CSF */
@@ -5041,41 +5053,58 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
/* fops_* variables created by invocations of macro
* MAKE_QUIRK_ACCESSORS() above.
*/
- debugfs_create_file("quirks_sc", 0644,
+ dentry = debugfs_create_file("quirks_sc", 0644,
kbdev->mali_debugfs_directory, kbdev,
&fops_sc_quirks);
- debugfs_create_file("quirks_tiler", 0644,
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create quirks_sc debugfs entry\n");
+ return dentry;
+ }
+
+ dentry = debugfs_create_file("quirks_tiler", 0644,
kbdev->mali_debugfs_directory, kbdev,
&fops_tiler_quirks);
- debugfs_create_file("quirks_mmu", 0644,
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create quirks_tiler debugfs entry\n");
+ return dentry;
+ }
+
+ dentry = debugfs_create_file("quirks_mmu", 0644,
kbdev->mali_debugfs_directory, kbdev,
&fops_mmu_quirks);
- debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory,
- kbdev, &fops_gpu_quirks);
-
- debugfs_create_bool("infinite_cache", mode,
- debugfs_ctx_defaults_directory,
- &kbdev->infinite_cache_active_default);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create quirks_mmu debugfs entry\n");
+ return dentry;
+ }
- debugfs_create_file("mem_pool_max_size", mode,
- debugfs_ctx_defaults_directory,
- &kbdev->mem_pool_defaults.small,
- &kbase_device_debugfs_mem_pool_max_size_fops);
+ dentry = debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory,
+ kbdev, &fops_gpu_quirks);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create quirks_gpu debugfs entry\n");
+ return dentry;
+ }
- debugfs_create_file("lp_mem_pool_max_size", mode,
- debugfs_ctx_defaults_directory,
- &kbdev->mem_pool_defaults.large,
- &kbase_device_debugfs_mem_pool_max_size_fops);
+ dentry = debugfs_ctx_defaults_init(kbdev);
+ if (IS_ERR_OR_NULL(dentry))
+ return dentry;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
- debugfs_create_file("protected_debug_mode", 0444,
+ dentry = debugfs_create_file("protected_debug_mode", 0444,
kbdev->mali_debugfs_directory, kbdev,
&fops_protected_debug_mode);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create protected_debug_mode debugfs entry\n");
+ return dentry;
+ }
}
- debugfs_create_file("reset", 0644,
+ dentry = debugfs_create_file("reset", 0644,
kbdev->mali_debugfs_directory, kbdev,
&fops_trigger_reset);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create reset debugfs entry\n");
+ return dentry;
+ }
debugfs_create_file("trigger_uevent", 0644,
kbdev->mali_debugfs_directory, kbdev,
@@ -5091,20 +5120,30 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
#endif /* CONFIG_MALI_DEVFREQ */
#if !MALI_USE_CSF
- debugfs_create_file("serialize_jobs", 0644,
+ dentry = debugfs_create_file("serialize_jobs", 0644,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_serialize_jobs_debugfs_fops);
-
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create serialize_jobs debugfs entry\n");
+ return dentry;
+ }
kbase_timeline_io_debugfs_init(kbdev);
#endif
kbase_dvfs_status_debugfs_init(kbdev);
- return 0;
+ return dentry;
+}
-out:
- debugfs_remove_recursive(kbdev->mali_debugfs_directory);
- return err;
+int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+ struct dentry *dentry = init_debugfs(kbdev);
+
+ if (IS_ERR_OR_NULL(dentry)) {
+ debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+ return IS_ERR(dentry) ? PTR_ERR(dentry) : -ENOMEM;
+ }
+ return 0;
}
void kbase_device_debugfs_term(struct kbase_device *kbdev)
@@ -5662,6 +5701,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
}
kbdev->dev = &pdev->dev;
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ kbdev->token = -EPERM;
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+
dev_set_drvdata(kbdev->dev, kbdev);
#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
mutex_lock(&kbase_probe_mutex);
@@ -5688,9 +5732,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
mutex_unlock(&kbase_probe_mutex);
#endif
#ifdef CONFIG_MALI_ARBITER_SUPPORT
- mutex_lock(&kbdev->pm.lock);
+ rt_mutex_lock(&kbdev->pm.lock);
kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT);
- mutex_unlock(&kbdev->pm.lock);
+ rt_mutex_unlock(&kbdev->pm.lock);
#endif
}
@@ -5894,12 +5938,11 @@ static const struct dev_pm_ops kbase_pm_ops = {
};
#if IS_ENABLED(CONFIG_OF)
-static const struct of_device_id kbase_dt_ids[] = {
- { .compatible = "arm,malit6xx" },
- { .compatible = "arm,mali-midgard" },
- { .compatible = "arm,mali-bifrost" },
- { /* sentinel */ }
-};
+static const struct of_device_id kbase_dt_ids[] = { { .compatible = "arm,malit6xx" },
+ { .compatible = "arm,mali-midgard" },
+ { .compatible = "arm,mali-bifrost" },
+ { .compatible = "arm,mali-valhall" },
+ { /* sentinel */ } };
MODULE_DEVICE_TABLE(of, kbase_dt_ids);
#endif
@@ -5937,6 +5980,7 @@ static int __init kbase_driver_init(void)
return ret;
}
#endif
+
return ret;
}
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index fb05467..dc6feb9 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -119,7 +119,7 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
if (atomic_inc_return(&kctx->refcount) == 1) {
int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
- if (free_as != KBASEP_AS_NR_INVALID) {
+ if (free_as >= 0) {
kbdev->as_free &= ~(1u << free_as);
/* Only program the MMU if the context has not been
* assigned the same address space before.
@@ -173,8 +173,10 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
*/
WARN_ON(!atomic_read(&kctx->refcount));
#endif
- WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
- WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)))
+ WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+ else
+ WARN(true, "Invalid as_nr(%d)", kctx->as_nr);
atomic_inc(&kctx->refcount);
}
@@ -188,16 +190,17 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
new_ref_count = atomic_dec_return(&kctx->refcount);
if (new_ref_count == 0) {
- kbdev->as_free |= (1u << kctx->as_nr);
- if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
- KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(
- kbdev, kctx->id);
- kbdev->as_to_kctx[kctx->as_nr] = NULL;
- kctx->as_nr = KBASEP_AS_NR_INVALID;
- kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
+ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) {
+ kbdev->as_free |= (1u << kctx->as_nr);
+ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
+ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id);
+ kbdev->as_to_kctx[kctx->as_nr] = NULL;
+ kctx->as_nr = KBASEP_AS_NR_INVALID;
+ kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
#if !MALI_USE_CSF
- kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
#endif
+ }
}
}
@@ -214,7 +217,7 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
WARN_ON(atomic_read(&kctx->refcount) != 0);
- if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+ if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) {
if (kbdev->pm.backend.gpu_powered)
kbase_mmu_disable(kctx);
@@ -239,6 +242,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
struct kbase_context *kctx;
+ kbdev->as[i].is_unresponsive = false;
#if MALI_USE_CSF
if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,
@@ -337,20 +341,14 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx)
{
bool result = false;
-#ifdef CONFIG_MALI_DEBUG
- int as_nr;
-#endif
if (WARN_ON(kctx == NULL))
return result;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-#ifdef CONFIG_MALI_DEBUG
- as_nr = kctx->as_nr;
-#endif
if (atomic_read(&kctx->refcount) > 0) {
- KBASE_DEBUG_ASSERT(as_nr >= 0);
+ KBASE_DEBUG_ASSERT(kctx->as_nr >= 0);
kbase_ctx_sched_retain_ctx_refcount(kctx);
KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx,
diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c
new file mode 100644
index 0000000..418bb19
--- /dev/null
+++ b/mali_kbase/mali_kbase_debug_mem_allocs.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Debugfs interface to dump information about GPU allocations in kctx
+ */
+
+#include "mali_kbase_debug_mem_allocs.h"
+#include "mali_kbase.h"
+
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/file.h>
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+/**
+ * debug_zone_mem_allocs_show - Show information from specific rbtree
+ * @zone: Name of GPU virtual memory zone
+ * @rbtree: Pointer to the root of the rbtree associated with @zone
+ * @sfile: The debugfs entry
+ *
+ * This function is called to show information about all the GPU allocations of a
+ * a particular zone within GPU virtual memory space of a context.
+ * The information like the start virtual address and size (in bytes) is shown for
+ * every GPU allocation mapped in the zone.
+ */
+static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile)
+{
+ struct rb_node *p;
+ struct kbase_va_region *reg;
+ const char *type_names[5] = {
+ "Native",
+ "Imported UMM",
+ "Imported user buf",
+ "Alias",
+ "Raw"
+ };
+
+#define MEM_ALLOCS_HEADER \
+ " VA, VA size, Commit size, Flags, Mem type\n"
+ seq_printf(sfile, "Zone name: %s\n:", zone);
+ seq_printf(sfile, MEM_ALLOCS_HEADER);
+ for (p = rb_first(rbtree); p; p = rb_next(p)) {
+ reg = rb_entry(p, struct kbase_va_region, rblink);
+ if (!(reg->flags & KBASE_REG_FREE)) {
+ seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n",
+ reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT,
+ kbase_reg_current_backed_size(reg) << PAGE_SHIFT,
+ reg->flags, type_names[reg->gpu_alloc->type]);
+ }
+ }
+}
+
+/**
+ * debug_ctx_mem_allocs_show - Show information about GPU allocations in a kctx
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * Return:
+ * 0 if successfully prints data in debugfs entry file
+ * -1 if it encountered an error
+ */
+static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data)
+{
+ struct kbase_context *const kctx = sfile->private;
+
+ kbase_gpu_vm_lock(kctx);
+
+ debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile);
+ debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile);
+ debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile);
+
+#if MALI_USE_CSF
+ debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile);
+ debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile);
+#endif /* MALI_USE_CSF */
+
+ kbase_gpu_vm_unlock(kctx);
+ return 0;
+}
+
+/*
+ * File operations related to debugfs entry for mem_zones
+ */
+static int debug_mem_allocs_open(struct inode *in, struct file *file)
+{
+ return single_open(file, debug_ctx_mem_allocs_show, in->i_private);
+}
+
+static const struct file_operations kbase_debug_mem_allocs_fops = {
+ .owner = THIS_MODULE,
+ .open = debug_mem_allocs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/*
+ * Initialize debugfs entry for mem_allocs
+ */
+void kbase_debug_mem_allocs_init(struct kbase_context *const kctx)
+{
+ /* Caller already ensures this, but we keep the pattern for
+ * maintenance safety.
+ */
+ if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+ return;
+
+ debugfs_create_file("mem_allocs", 0400, kctx->kctx_dentry, kctx,
+ &kbase_debug_mem_allocs_fops);
+}
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_debug_mem_allocs_init(struct kbase_context *const kctx)
+{
+}
+#endif
diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.c b/mali_kbase/mali_kbase_debug_mem_allocs.h
index 2eebed0..8cf69c2 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.c
+++ b/mali_kbase/mali_kbase_debug_mem_allocs.h
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,25 +19,21 @@
*
*/
+#ifndef _KBASE_DEBUG_MEM_ALLOCS_H
+#define _KBASE_DEBUG_MEM_ALLOCS_H
+
#include <mali_kbase.h>
-#include <mali_kbase_defs.h>
-#include <mali_kbase_config.h>
-#include "mali_kbase_config_platform.h"
-#include <device/mali_kbase_device.h>
-#include <mali_kbase_hwaccess_time.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
-#include <linux/kthread.h>
-#include <linux/timer.h>
-#include <linux/jiffies.h>
-#include <linux/wait.h>
-#include <linux/delay.h>
-#include <linux/gcd.h>
-#include <asm/arch_timer.h>
+/**
+ * kbase_debug_mem_allocs_init() - Initialize the mem_allocs debugfs file
+ * @kctx: Pointer to kernel base context
+ *
+ * This function creates a "mem_allocs" file for a context to show infor about the
+ * GPU allocations created for that context.
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_allocs_init(struct kbase_context *kctx);
-struct kbase_platform_funcs_conf platform_funcs = {
- .platform_init_func = NULL,
- .platform_term_func = NULL,
- .platform_late_init_func = NULL,
- .platform_late_term_func = NULL,
-};
+#endif
diff --git a/mali_kbase/mali_kbase_debug_mem_view.h b/mali_kbase/mali_kbase_debug_mem_view.h
index d034832..cb8050d 100644
--- a/mali_kbase/mali_kbase_debug_mem_view.h
+++ b/mali_kbase/mali_kbase_debug_mem_view.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2013-2015, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015, 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,7 @@
#include <mali_kbase.h>
/**
- * kbase_debug_mem_view_init - Initialize the mem_view sysfs file
+ * kbase_debug_mem_view_init - Initialize the mem_view debugfs file
* @kctx: Pointer to kernel base context
*
* This function creates a "mem_view" file which can be used to get a view of
diff --git a/mali_kbase/mali_kbase_debugfs_helper.c b/mali_kbase/mali_kbase_debugfs_helper.c
index 4c1aa28..c846491 100644
--- a/mali_kbase/mali_kbase_debugfs_helper.c
+++ b/mali_kbase/mali_kbase_debugfs_helper.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -90,11 +90,10 @@ set_attr_from_string(char *const buf, void *const array, size_t const nelems,
int kbase_debugfs_string_validator(char *const buf)
{
- size_t index;
int err = 0;
char *ptr = buf;
- for (index = 0; *ptr; ++index) {
+ while (*ptr) {
unsigned long test_number;
size_t len;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 347f15c..12e90ac 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -35,13 +35,13 @@
#include <backend/gpu/mali_kbase_instr_defs.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_gpuprops_types.h>
-#include <mali_kbase_hwcnt_watchdog_if.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
#if MALI_USE_CSF
-#include <mali_kbase_hwcnt_backend_csf.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_csf.h>
#else
-#include <mali_kbase_hwcnt_backend_jm.h>
-#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
#endif
#include <protected_mode_switcher.h>
@@ -53,11 +53,7 @@
#include <linux/sizes.h>
#include <linux/rtmutex.h>
-#if defined(CONFIG_SYNC)
-#include <sync.h>
-#else
#include "mali_kbase_fence_defs.h"
-#endif
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <linux/debugfs.h>
@@ -268,12 +264,25 @@ struct kbase_fault {
bool protected_mode;
};
+/** Maximum number of memory pages that should be allocated for the array
+ * of pointers to free PGDs.
+ *
+ * This number has been pre-calculated to deal with the maximum allocation
+ * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE.
+ * This is supposed to be enough for almost the entirety of MMU operations.
+ * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down
+ * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE
+ * bytes.
+ *
+ * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes.
+ */
+#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
+
+/* Maximum number of pointers to free PGDs */
+#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
+
/**
* struct kbase_mmu_table - object representing a set of GPU page tables
- * @mmu_teardown_pages: Array containing pointers to 3 separate pages, used
- * to cache the entries of top (L0) & intermediate level
- * page tables (L1 & L2) to avoid repeated calls to
- * kmap_atomic() during the MMU teardown.
* @mmu_lock: Lock to serialize the accesses made to multi level GPU
* page tables
* @pgd: Physical address of the page allocated for the top
@@ -285,14 +294,40 @@ struct kbase_fault {
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
* @kctx: If this set of MMU tables belongs to a context then
* this is a back-reference to the context, otherwise
- * it is NULL
+ * it is NULL.
+ * @scratch_mem: Scratch memory used for MMU operations, which are
+ * serialized by the @mmu_lock.
*/
struct kbase_mmu_table {
- u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
struct rt_mutex mmu_lock;
phys_addr_t pgd;
u8 group_id;
struct kbase_context *kctx;
+ union {
+ /**
+ * @teardown_pages: Scratch memory used for backup copies of whole
+ * PGD pages when tearing down levels upon
+ * termination of the MMU table.
+ */
+ struct {
+ /**
+ * @levels: Array of PGD pages, large enough to copy one PGD
+ * for each level of the MMU table.
+ */
+ u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
+ } teardown_pages;
+ /**
+ * @free_pgds: Scratch memory user for insertion, update and teardown
+ * operations to store a temporary list of PGDs to be freed
+ * at the end of the operation.
+ */
+ struct {
+ /** @pgds: Array of pointers to PGDs to free. */
+ struct page *pgds[MAX_FREE_PGDS];
+ /** @head_index: Index of first free element in the PGDs array. */
+ size_t head_index;
+ } free_pgds;
+ } scratch_mem;
};
/**
@@ -316,6 +351,8 @@ struct kbase_reg_zone {
#include "jm/mali_kbase_jm_defs.h"
#endif
+#include "mali_kbase_hwaccess_time.h"
+
static inline int kbase_as_has_bus_fault(struct kbase_as *as,
struct kbase_fault *fault)
{
@@ -464,36 +501,40 @@ struct kbase_pm_device_data {
/**
* struct kbase_mem_pool - Page based memory pool for kctx/kbdev
- * @kbdev: Kbase device where memory is used
- * @cur_size: Number of free pages currently in the pool (may exceed
- * @max_size in some corner cases)
- * @max_size: Maximum number of free pages in the pool
- * @order: order = 0 refers to a pool of 4 KB pages
- * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
- * @group_id: A memory group ID to be passed to a platform-specific
- * memory group manager, if present. Immutable.
- * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
- * @pool_lock: Lock protecting the pool - must be held when modifying
- * @cur_size and @page_list
- * @page_list: List of free pages in the pool
- * @reclaim: Shrinker for kernel reclaim of free pages
- * @next_pool: Pointer to next pool where pages can be allocated when this
- * pool is empty. Pages will spill over to the next pool when
- * this pool is full. Can be NULL if there is no next pool.
- * @dying: true if the pool is being terminated, and any ongoing
- * operations should be abandoned
- * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
- * this pool, eg during a grow operation
+ * @kbdev: Kbase device where memory is used
+ * @cur_size: Number of free pages currently in the pool (may exceed
+ * @max_size in some corner cases)
+ * @max_size: Maximum number of free pages in the pool
+ * @order: order = 0 refers to a pool of 4 KB pages
+ * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @group_id: A memory group ID to be passed to a platform-specific
+ * memory group manager, if present. Immutable.
+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @pool_lock: Lock protecting the pool - must be held when modifying
+ * @cur_size and @page_list
+ * @page_list: List of free pages in the pool
+ * @reclaim: Shrinker for kernel reclaim of free pages
+ * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation.
+ * This is used to avoid race condition between pool termination
+ * and page isolation for page migration.
+ * @next_pool: Pointer to next pool where pages can be allocated when this
+ * pool is empty. Pages will spill over to the next pool when
+ * this pool is full. Can be NULL if there is no next pool.
+ * @dying: true if the pool is being terminated, and any ongoing
+ * operations should be abandoned
+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
+ * this pool, eg during a grow operation
*/
struct kbase_mem_pool {
struct kbase_device *kbdev;
- size_t cur_size;
- size_t max_size;
- u8 order;
- u8 group_id;
- spinlock_t pool_lock;
- struct list_head page_list;
- struct shrinker reclaim;
+ size_t cur_size;
+ size_t max_size;
+ u8 order;
+ u8 group_id;
+ spinlock_t pool_lock;
+ struct list_head page_list;
+ struct shrinker reclaim;
+ atomic_t isolation_in_progress_cnt;
struct kbase_mem_pool *next_pool;
@@ -674,6 +715,33 @@ struct kbase_process {
};
/**
+ * struct kbase_mem_migrate - Object representing an instance for managing
+ * page migration.
+ *
+ * @free_pages_list: List of deferred pages to free. Mostly used when page migration
+ * is enabled. Pages in memory pool that require migrating
+ * will be freed instead. However page cannot be freed
+ * right away as Linux will need to release the page lock.
+ * Therefore page will be added to this list and freed later.
+ * @free_pages_lock: This lock should be held when adding or removing pages
+ * from @free_pages_list.
+ * @free_pages_workq: Work queue to process the work items queued to free
+ * pages in @free_pages_list.
+ * @free_pages_work: Work item to free pages in @free_pages_list.
+ * @inode: Pointer to inode whose address space operations are used
+ * for page migration purposes.
+ */
+struct kbase_mem_migrate {
+ struct list_head free_pages_list;
+ spinlock_t free_pages_lock;
+ struct workqueue_struct *free_pages_workq;
+ struct work_struct free_pages_work;
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+ struct inode *inode;
+#endif
+};
+
+/**
* struct kbase_device - Object representing an instance of GPU platform device,
* allocated from the probe method of mali driver.
* @hw_quirks_sc: Configuration to be used for the shader cores as per
@@ -711,6 +779,10 @@ struct kbase_process {
* @opp_table: Pointer to the device OPP structure maintaining the
* link to OPPs attached to a device. This is obtained
* after setting regulator names for the device.
+ * @token: Integer replacement for opp_table in kernel versions
+ * 6 and greater. Value is a token id number when 0 or greater,
+ * and a linux errno when negative. Must be initialised
+ * to an non-zero value as 0 is valid token id.
* @devname: string containing the name used for GPU device instance,
* miscellaneous device is registered using the same name.
* @id: Unique identifier for the device, indicates the number of
@@ -757,6 +829,8 @@ struct kbase_process {
* GPU adrress spaces assigned to them.
* @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
* register used in the handling of Bus & Page faults.
+ * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are
+ * supported and used where possible.
* @gpu_props: Object containing complete information about the
* configuration/properties of GPU HW device in use.
* @hw_issues_mask: List of SW workarounds for HW issues
@@ -802,6 +876,7 @@ struct kbase_process {
* GPU reset.
* @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used
* to calculate suitable timeouts for wait operations.
+ * @backend_time: Kbase backend time related attributes.
* @cache_clean_in_progress: Set when a cache clean has been started, and
* cleared when it has finished. This prevents multiple
* cache cleans being done simultaneously.
@@ -908,6 +983,10 @@ struct kbase_process {
* GPU2019-3878. PM state machine is invoked after
* clearing this flag and @hwaccess_lock is used to
* serialize the access.
+ * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction
+ * and cleared after the transaction completes. PM L2 state is
+ * prevented from entering powering up/down transitions when the
+ * flag is set, @hwaccess_lock is used to serialize the access.
* @poweroff_pending: Set when power off operation for GPU is started, reset when
* power on for GPU is started.
* @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -998,6 +1077,14 @@ struct kbase_process {
* @oom_notifier_block: notifier_block containing kernel-registered out-of-
* memory handler.
* @proc_sysfs_node: Sysfs directory node to store per-process stats.
+ * @mem_migrate: Per device object for managing page migration.
+ * @live_fence_metadata: Count of live fence metadata structures created by
+ * KCPU queue. These structures may outlive kbase module
+ * itself. Therefore, in such a case, a warning should be
+ * be produced.
+ * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
+ * a MMU operation
+ * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures.
*/
struct kbase_device {
u32 hw_quirks_sc;
@@ -1022,14 +1109,16 @@ struct kbase_device {
#if IS_ENABLED(CONFIG_REGULATOR)
struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
unsigned int nr_regulators;
-#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ int token;
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
struct opp_table *opp_table;
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
#endif /* CONFIG_REGULATOR */
char devname[DEVNAME_SIZE];
u32 id;
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
void *model;
struct kmem_cache *irq_slab;
struct workqueue_struct *irq_workq;
@@ -1037,7 +1126,7 @@ struct kbase_device {
atomic_t serving_gpu_irq;
atomic_t serving_mmu_irq;
spinlock_t reg_op_lock;
-#endif /* CONFIG_MALI_NO_MALI */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
struct kbase_pm_device_data pm;
struct kbase_mem_pool_group mem_pools;
@@ -1052,6 +1141,8 @@ struct kbase_device {
spinlock_t mmu_mask_change;
+ bool pagesize_2mb;
+
struct kbase_gpu_props gpu_props;
unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
@@ -1065,6 +1156,12 @@ struct kbase_device {
s8 nr_hw_address_spaces;
s8 nr_user_address_spaces;
+ /**
+ * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to
+ * restore to L2_CONFIG upon GPU reset.
+ */
+ u8 pbha_propagate_bits;
+
#if MALI_USE_CSF
struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
#else
@@ -1099,6 +1196,10 @@ struct kbase_device {
u64 lowest_gpu_freq_khz;
+#if MALI_USE_CSF
+ struct kbase_backend_time backend_time;
+#endif
+
bool cache_clean_in_progress;
u32 cache_clean_queued;
wait_queue_head_t cache_clean_wait;
@@ -1146,7 +1247,9 @@ struct kbase_device {
#endif /* CONFIG_MALI_DEVFREQ */
unsigned long previous_frequency;
+#if !MALI_USE_CSF
atomic_t job_fault_debug;
+#endif /* !MALI_USE_CSF */
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *mali_debugfs_directory;
@@ -1157,11 +1260,13 @@ struct kbase_device {
u64 debugfs_as_read_bitmap;
#endif /* CONFIG_MALI_DEBUG */
+#if !MALI_USE_CSF
wait_queue_head_t job_fault_wq;
wait_queue_head_t job_fault_resume_wq;
struct workqueue_struct *job_fault_resume_workq;
struct list_head job_fault_event_list;
spinlock_t job_fault_event_lock;
+#endif /* !MALI_USE_CSF */
#if !MALI_CUSTOMER_RELEASE
struct {
@@ -1183,6 +1288,7 @@ struct kbase_device {
#if MALI_USE_CSF
bool mmu_hw_operation_in_progress;
#endif
+ bool mmu_page_migrate_in_progress;
bool poweroff_pending;
bool infinite_cache_active_default;
@@ -1284,6 +1390,14 @@ struct kbase_device {
struct notifier_block oom_notifier_block;
struct kobject *proc_sysfs_node;
+
+ struct kbase_mem_migrate mem_migrate;
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+ atomic_t live_fence_metadata;
+#endif
+ u32 mmu_as_inactive_wait_time_ms;
+ struct kmem_cache *va_region_slab;
};
/**
@@ -1366,10 +1480,6 @@ struct kbase_file {
*
* @KCTX_DYING: Set when the context process is in the process of being evicted.
*
- * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
- * context, to disable use of implicit dma-buf fences. This is used to avoid
- * potential synchronization deadlocks.
- *
* @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
* allocations. For 64-bit clients it is enabled by default, and disabled by
* default on 32-bit clients. Being able to clear this flag is only used for
@@ -1412,7 +1522,6 @@ enum kbase_context_flags {
KCTX_PRIVILEGED = 1U << 7,
KCTX_SCHEDULED = 1U << 8,
KCTX_DYING = 1U << 9,
- KCTX_NO_IMPLICIT_SYNC = 1U << 10,
KCTX_FORCE_SAME_VA = 1U << 11,
KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
@@ -1451,9 +1560,6 @@ enum kbase_context_flags {
*
* @KCTX_DYING: Set when the context process is in the process of being evicted.
*
- * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
- * context, to disable use of implicit dma-buf fences. This is used to avoid
- * potential synchronization deadlocks.
*
* @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
* allocations. For 64-bit clients it is enabled by default, and disabled by
@@ -1494,7 +1600,6 @@ enum kbase_context_flags {
KCTX_PRIVILEGED = 1U << 7,
KCTX_SCHEDULED = 1U << 8,
KCTX_DYING = 1U << 9,
- KCTX_NO_IMPLICIT_SYNC = 1U << 10,
KCTX_FORCE_SAME_VA = 1U << 11,
KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
@@ -1644,11 +1749,13 @@ struct kbase_sub_alloc {
* is scheduled in and an atom is pulled from the context's per
* slot runnable tree in JM GPU or GPU command queue
* group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock: lock used for handling of special tracking page.
* @process_mm: Pointer to the memory descriptor of the process which
* created the context. Used for accounting the physical
* pages used for GPU allocations, done for the context,
- * to the memory consumed by the process.
+ * to the memory consumed by the process. A reference is taken
+ * on this descriptor for the Userspace created contexts so that
+ * Kbase can safely access it to update the memory usage counters.
+ * The reference is dropped on context termination.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
* @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
* tiler heaps of the kbase context.
@@ -1699,12 +1806,6 @@ struct kbase_sub_alloc {
* memory allocations.
* @jit_current_allocations_per_bin: Current number of in-flight just-in-time
* memory allocations per bin.
- * @jit_version: Version number indicating whether userspace is using
- * old or new version of interface for just-in-time
- * memory allocations.
- * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2
- * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5
- * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT
* @jit_group_id: A memory group ID to be passed to a platform-specific
* memory group manager.
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
@@ -1862,19 +1963,12 @@ struct kbase_context {
struct list_head waiting_soft_jobs;
spinlock_t waiting_soft_jobs_lock;
-#ifdef CONFIG_MALI_DMA_FENCE
- struct {
- struct list_head waiting_resource;
- struct workqueue_struct *wq;
- } dma_fence;
-#endif /* CONFIG_MALI_DMA_FENCE */
int as_nr;
atomic_t refcount;
- spinlock_t mm_update_lock;
- struct mm_struct __rcu *process_mm;
+ struct mm_struct *process_mm;
u64 gpu_va_end;
#if MALI_USE_CSF
u32 running_total_tiler_heap_nr_chunks;
@@ -1899,7 +1993,6 @@ struct kbase_context {
u8 jit_max_allocations;
u8 jit_current_allocations;
u8 jit_current_allocations_per_bin[256];
- u8 jit_version;
u8 jit_group_id;
#if MALI_JIT_PRESSURE_LIMIT_BASE
u64 jit_phys_pages_limit;
@@ -2040,5 +2133,4 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
#define KBASE_AS_INACTIVE_MAX_LOOPS 100000
/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000
-
#endif /* _KBASE_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c
deleted file mode 100644
index ca3863f..0000000
--- a/mali_kbase/mali_kbase_dma_fence.c
+++ /dev/null
@@ -1,491 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- *
- * (C) COPYRIGHT 2011-2016, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
- * it will be set there.
- */
-#include "mali_kbase_dma_fence.h"
-#include <linux/atomic.h>
-#include <linux/list.h>
-#include <linux/lockdep.h>
-#include <linux/mutex.h>
-#include <linux/version.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/ww_mutex.h>
-#include <mali_kbase.h>
-
-static void
-kbase_dma_fence_work(struct work_struct *pwork);
-
-static void
-kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
-{
- struct kbase_context *kctx = katom->kctx;
-
- list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
-}
-
-static void
-kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
-{
- list_del(&katom->queue);
-}
-
-static int
-kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
- struct ww_acquire_ctx *ctx)
-{
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- struct reservation_object *content_res = NULL;
-#else
- struct dma_resv *content_res = NULL;
-#endif
- unsigned int content_res_idx = 0;
- unsigned int r;
- int err = 0;
-
- ww_acquire_init(ctx, &reservation_ww_class);
-
-retry:
- for (r = 0; r < info->dma_fence_resv_count; r++) {
- if (info->resv_objs[r] == content_res) {
- content_res = NULL;
- continue;
- }
-
- err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
- if (err)
- goto error;
- }
-
- ww_acquire_done(ctx);
- return err;
-
-error:
- content_res_idx = r;
-
- /* Unlock the locked one ones */
- while (r--)
- ww_mutex_unlock(&info->resv_objs[r]->lock);
-
- if (content_res)
- ww_mutex_unlock(&content_res->lock);
-
- /* If we deadlock try with lock_slow and retry */
- if (err == -EDEADLK) {
- content_res = info->resv_objs[content_res_idx];
- ww_mutex_lock_slow(&content_res->lock, ctx);
- goto retry;
- }
-
- /* If we are here the function failed */
- ww_acquire_fini(ctx);
- return err;
-}
-
-static void
-kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
- struct ww_acquire_ctx *ctx)
-{
- unsigned int r;
-
- for (r = 0; r < info->dma_fence_resv_count; r++)
- ww_mutex_unlock(&info->resv_objs[r]->lock);
- ww_acquire_fini(ctx);
-}
-
-
-
-/**
- * kbase_dma_fence_queue_work() - Queue work to handle @katom
- * @katom: Pointer to atom for which to queue work
- *
- * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
- * submit the atom.
- */
-static void
-kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
-{
- struct kbase_context *kctx = katom->kctx;
- bool ret;
-
- INIT_WORK(&katom->work, kbase_dma_fence_work);
- ret = queue_work(kctx->dma_fence.wq, &katom->work);
- /* Warn if work was already queued, that should not happen. */
- WARN_ON(!ret);
-}
-
-/**
- * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
- * @katom: Katom to cancel
- *
- * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
- */
-static void
-kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
-{
- lockdep_assert_held(&katom->kctx->jctx.lock);
-
- /* Cancel callbacks and clean up. */
- kbase_fence_free_callbacks(katom);
-
- /* Mark the atom as handled in case all fences signaled just before
- * canceling the callbacks and the worker was queued.
- */
- kbase_fence_dep_count_set(katom, -1);
-
- /* Prevent job_done_nolock from being called twice on an atom when
- * there is a race between job completion and cancellation.
- */
-
- if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
- /* Wait was cancelled - zap the atom */
- katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
- if (kbase_jd_done_nolock(katom, true))
- kbase_js_sched_all(katom->kctx->kbdev);
- }
-}
-
-/**
- * kbase_dma_fence_work() - Worker thread called when a fence is signaled
- * @pwork: work_struct containing a pointer to a katom
- *
- * This function will clean and mark all dependencies as satisfied
- */
-static void
-kbase_dma_fence_work(struct work_struct *pwork)
-{
- struct kbase_jd_atom *katom;
- struct kbase_jd_context *ctx;
-
- katom = container_of(pwork, struct kbase_jd_atom, work);
- ctx = &katom->kctx->jctx;
-
- mutex_lock(&ctx->lock);
- if (kbase_fence_dep_count_read(katom) != 0)
- goto out;
-
- kbase_fence_dep_count_set(katom, -1);
-
- /* Remove atom from list of dma-fence waiting atoms. */
- kbase_dma_fence_waiters_remove(katom);
- /* Cleanup callbacks. */
- kbase_fence_free_callbacks(katom);
- /*
- * Queue atom on GPU, unless it has already completed due to a failing
- * dependency. Run kbase_jd_done_nolock() on the katom if it is completed.
- */
- if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
- kbase_jd_done_nolock(katom, true);
- else
- kbase_jd_dep_clear_locked(katom);
-
-out:
- mutex_unlock(&ctx->lock);
-}
-
-static void
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
-#else
-kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
-#endif
-{
- struct kbase_fence_cb *kcb = container_of(cb,
- struct kbase_fence_cb,
- fence_cb);
- struct kbase_jd_atom *katom = kcb->katom;
-
- /* If the atom is zapped dep_count will be forced to a negative number
- * preventing this callback from ever scheduling work. Which in turn
- * would reschedule the atom.
- */
-
- if (kbase_fence_dep_count_dec_and_test(katom))
- kbase_dma_fence_queue_work(katom);
-}
-
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
-static int
-kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
- struct reservation_object *resv,
- bool exclusive)
-#else
-static int
-kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
- struct dma_resv *resv,
- bool exclusive)
-#endif
-{
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- struct fence *excl_fence = NULL;
- struct fence **shared_fences = NULL;
-#else
- struct dma_fence *excl_fence = NULL;
- struct dma_fence **shared_fences = NULL;
-#endif
- unsigned int shared_count = 0;
- int err, i;
-
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- err = reservation_object_get_fences_rcu(
-#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE)
- err = dma_resv_get_fences_rcu(
-#else
- err = dma_resv_get_fences(
-#endif
- resv,
- &excl_fence,
- &shared_count,
- &shared_fences);
- if (err)
- return err;
-
- if (excl_fence) {
- err = kbase_fence_add_callback(katom,
- excl_fence,
- kbase_dma_fence_cb);
-
- /* Release our reference, taken by reservation_object_get_fences_rcu(),
- * to the fence. We have set up our callback (if that was possible),
- * and it's the fence's owner is responsible for singling the fence
- * before allowing it to disappear.
- */
- dma_fence_put(excl_fence);
-
- if (err)
- goto out;
- }
-
- if (exclusive) {
- for (i = 0; i < shared_count; i++) {
- err = kbase_fence_add_callback(katom,
- shared_fences[i],
- kbase_dma_fence_cb);
- if (err)
- goto out;
- }
- }
-
- /* Release all our references to the shared fences, taken by
- * reservation_object_get_fences_rcu(). We have set up our callback (if
- * that was possible), and it's the fence's owner is responsible for
- * signaling the fence before allowing it to disappear.
- */
-out:
- for (i = 0; i < shared_count; i++)
- dma_fence_put(shared_fences[i]);
- kfree(shared_fences);
-
- if (err) {
- /*
- * On error, cancel and clean up all callbacks that was set up
- * before the error.
- */
- kbase_fence_free_callbacks(katom);
- }
-
- return err;
-}
-
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
-void kbase_dma_fence_add_reservation(struct reservation_object *resv,
- struct kbase_dma_fence_resv_info *info,
- bool exclusive)
-#else
-void kbase_dma_fence_add_reservation(struct dma_resv *resv,
- struct kbase_dma_fence_resv_info *info,
- bool exclusive)
-#endif
-{
- unsigned int i;
-
- for (i = 0; i < info->dma_fence_resv_count; i++) {
- /* Duplicate resource, ignore */
- if (info->resv_objs[i] == resv)
- return;
- }
-
- info->resv_objs[info->dma_fence_resv_count] = resv;
- if (exclusive)
- set_bit(info->dma_fence_resv_count,
- info->dma_fence_excl_bitmap);
- (info->dma_fence_resv_count)++;
-}
-
-int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
- struct kbase_dma_fence_resv_info *info)
-{
- int err, i;
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- struct fence *fence;
-#else
- struct dma_fence *fence;
-#endif
- struct ww_acquire_ctx ww_ctx;
-
- lockdep_assert_held(&katom->kctx->jctx.lock);
-
- fence = kbase_fence_out_new(katom);
- if (!fence) {
- err = -ENOMEM;
- dev_err(katom->kctx->kbdev->dev,
- "Error %d creating fence.\n", err);
- return err;
- }
-
- kbase_fence_dep_count_set(katom, 1);
-
- err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
- if (err) {
- dev_err(katom->kctx->kbdev->dev,
- "Error %d locking reservations.\n", err);
- kbase_fence_dep_count_set(katom, -1);
- kbase_fence_out_remove(katom);
- return err;
- }
-
- for (i = 0; i < info->dma_fence_resv_count; i++) {
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- struct reservation_object *obj = info->resv_objs[i];
-#else
- struct dma_resv *obj = info->resv_objs[i];
-#endif
- if (!test_bit(i, info->dma_fence_excl_bitmap)) {
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- err = reservation_object_reserve_shared(obj);
-#else
- err = dma_resv_reserve_shared(obj, 0);
-#endif
- if (err) {
- dev_err(katom->kctx->kbdev->dev,
- "Error %d reserving space for shared fence.\n", err);
- goto end;
- }
-
- err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
- if (err) {
- dev_err(katom->kctx->kbdev->dev,
- "Error %d adding reservation to callback.\n", err);
- goto end;
- }
-
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- reservation_object_add_shared_fence(obj, fence);
-#else
- dma_resv_add_shared_fence(obj, fence);
-#endif
- } else {
- err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
- if (err) {
- dev_err(katom->kctx->kbdev->dev,
- "Error %d adding reservation to callback.\n", err);
- goto end;
- }
-
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- reservation_object_add_excl_fence(obj, fence);
-#else
- dma_resv_add_excl_fence(obj, fence);
-#endif
- }
- }
-
-end:
- kbase_dma_fence_unlock_reservations(info, &ww_ctx);
-
- if (likely(!err)) {
- /* Test if the callbacks are already triggered */
- if (kbase_fence_dep_count_dec_and_test(katom)) {
- kbase_fence_dep_count_set(katom, -1);
- kbase_fence_free_callbacks(katom);
- } else {
- /* Add katom to the list of dma-buf fence waiting atoms
- * only if it is still waiting.
- */
- kbase_dma_fence_waiters_add(katom);
- }
- } else {
- /* There was an error, cancel callbacks, set dep_count to -1 to
- * indicate that the atom has been handled (the caller will
- * kill it for us), signal the fence, free callbacks and the
- * fence.
- */
- kbase_fence_free_callbacks(katom);
- kbase_fence_dep_count_set(katom, -1);
- kbase_dma_fence_signal(katom);
- }
-
- return err;
-}
-
-void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
-{
- struct list_head *list = &kctx->dma_fence.waiting_resource;
-
- while (!list_empty(list)) {
- struct kbase_jd_atom *katom;
-
- katom = list_first_entry(list, struct kbase_jd_atom, queue);
- kbase_dma_fence_waiters_remove(katom);
- kbase_dma_fence_cancel_atom(katom);
- }
-}
-
-void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
-{
- /* Cancel callbacks and clean up. */
- if (kbase_fence_free_callbacks(katom))
- kbase_dma_fence_queue_work(katom);
-}
-
-void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
-{
- if (!katom->dma_fence.fence)
- return;
-
- /* Signal the atom's fence. */
- dma_fence_signal(katom->dma_fence.fence);
-
- kbase_fence_out_remove(katom);
-
- kbase_fence_free_callbacks(katom);
-}
-
-void kbase_dma_fence_term(struct kbase_context *kctx)
-{
- destroy_workqueue(kctx->dma_fence.wq);
- kctx->dma_fence.wq = NULL;
-}
-
-int kbase_dma_fence_init(struct kbase_context *kctx)
-{
- INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
-
- kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
- WQ_UNBOUND, 1, kctx->pid);
- if (!kctx->dma_fence.wq)
- return -ENOMEM;
-
- return 0;
-}
diff --git a/mali_kbase/mali_kbase_dma_fence.h b/mali_kbase/mali_kbase_dma_fence.h
deleted file mode 100644
index 53effbc..0000000
--- a/mali_kbase/mali_kbase_dma_fence.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_DMA_FENCE_H_
-#define _KBASE_DMA_FENCE_H_
-
-#ifdef CONFIG_MALI_DMA_FENCE
-
-#include <linux/list.h>
-#include <linux/version.h>
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
-#include <linux/reservation.h>
-#else
-#include <linux/dma-resv.h>
-#endif
-#include <mali_kbase_fence.h>
-
-/* Forward declaration from mali_kbase_defs.h */
-struct kbase_jd_atom;
-struct kbase_context;
-
-/**
- * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
- * @resv_objs: Array of reservation objects to attach the
- * new fence to.
- * @dma_fence_resv_count: Number of reservation objects in the array.
- * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
- *
- * This is used by some functions to pass around a collection of data about
- * reservation objects.
- */
-struct kbase_dma_fence_resv_info {
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- struct reservation_object **resv_objs;
-#else
- struct dma_resv **resv_objs;
-#endif
- unsigned int dma_fence_resv_count;
- unsigned long *dma_fence_excl_bitmap;
-};
-
-/**
- * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
- * @resv: Reservation object to add to the array.
- * @info: Pointer to struct with current reservation info
- * @exclusive: Boolean indicating if exclusive access is needed
- *
- * The function adds a new reservation_object to an existing array of
- * reservation_objects. At the same time keeps track of which objects require
- * exclusive access in dma_fence_excl_bitmap.
- */
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
-void kbase_dma_fence_add_reservation(struct reservation_object *resv,
- struct kbase_dma_fence_resv_info *info,
- bool exclusive);
-#else
-void kbase_dma_fence_add_reservation(struct dma_resv *resv,
- struct kbase_dma_fence_resv_info *info,
- bool exclusive);
-#endif
-
-/**
- * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
- * @katom: Katom with the external dependency.
- * @info: Pointer to struct with current reservation info
- *
- * Return: An error code or 0 if succeeds
- */
-int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
- struct kbase_dma_fence_resv_info *info);
-
-/**
- * kbase_dma_fence_cancel_all_atoms() - Cancel all dma-fences blocked atoms on kctx
- * @kctx: Pointer to kbase context
- *
- * This function will cancel and clean up all katoms on @kctx that is waiting
- * on dma-buf fences.
- *
- * Locking: jctx.lock needs to be held when calling this function.
- */
-void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
-
-/**
- * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
- * @katom: Pointer to katom whose callbacks are to be canceled
- *
- * This function cancels all dma-buf fence callbacks on @katom, but does not
- * cancel the katom itself.
- *
- * The caller is responsible for ensuring that kbase_jd_done_nolock is called on
- * @katom.
- *
- * Locking: jctx.lock must be held when calling this function.
- */
-void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
-
-/**
- * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
- * @katom: Pointer to katom to signal and clean up
- *
- * This function will signal the @katom's fence, if it has one, and clean up
- * the callback data from the katom's wait on earlier fences.
- *
- * Locking: jctx.lock must be held while calling this function.
- */
-void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
-
-/**
- * kbase_dma_fence_term() - Terminate Mali dma-fence context
- * @kctx: kbase context to terminate
- */
-void kbase_dma_fence_term(struct kbase_context *kctx);
-
-/**
- * kbase_dma_fence_init() - Initialize Mali dma-fence context
- * @kctx: kbase context to initialize
- *
- * Return: 0 on success, error code otherwise.
- */
-int kbase_dma_fence_init(struct kbase_context *kctx);
-
-#else /* !CONFIG_MALI_DMA_FENCE */
-/* Dummy functions for when dma-buf fence isn't enabled. */
-
-static inline int kbase_dma_fence_init(struct kbase_context *kctx)
-{
- return 0;
-}
-
-static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
-#endif /* CONFIG_MALI_DMA_FENCE */
-#endif
diff --git a/mali_kbase/mali_kbase_fence.c b/mali_kbase/mali_kbase_fence.c
index 01557cd..b16b276 100644
--- a/mali_kbase/mali_kbase_fence.c
+++ b/mali_kbase/mali_kbase_fence.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -59,95 +59,3 @@ kbase_fence_out_new(struct kbase_jd_atom *katom)
return fence;
}
-bool
-kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
-{
- struct kbase_fence_cb *cb, *tmp;
- bool res = false;
-
- lockdep_assert_held(&katom->kctx->jctx.lock);
-
- /* Clean up and free callbacks. */
- list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
- bool ret;
-
- /* Cancel callbacks that hasn't been called yet. */
- ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb);
- if (ret) {
- int ret;
-
- /* Fence had not signaled, clean up after
- * canceling.
- */
- ret = atomic_dec_return(&katom->dma_fence.dep_count);
-
- if (unlikely(ret == 0))
- res = true;
- }
-
- /*
- * Release the reference taken in
- * kbase_fence_add_callback().
- */
- dma_fence_put(cb->fence);
- list_del(&cb->node);
- kfree(cb);
- }
-
- return res;
-}
-
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-int
-kbase_fence_add_callback(struct kbase_jd_atom *katom,
- struct fence *fence,
- fence_func_t callback)
-#else
-int
-kbase_fence_add_callback(struct kbase_jd_atom *katom,
- struct dma_fence *fence,
- dma_fence_func_t callback)
-#endif
-{
- int err = 0;
- struct kbase_fence_cb *kbase_fence_cb;
-
- if (!fence)
- return -EINVAL;
-
- kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
- if (!kbase_fence_cb)
- return -ENOMEM;
-
- kbase_fence_cb->fence = fence;
- kbase_fence_cb->katom = katom;
- INIT_LIST_HEAD(&kbase_fence_cb->node);
- atomic_inc(&katom->dma_fence.dep_count);
-
- err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
- callback);
- if (err == -ENOENT) {
- /* Fence signaled, get the completion result */
- err = dma_fence_get_status(fence);
-
- /* remap success completion to err code */
- if (err == 1)
- err = 0;
-
- kfree(kbase_fence_cb);
- atomic_dec(&katom->dma_fence.dep_count);
- } else if (err) {
- kfree(kbase_fence_cb);
- atomic_dec(&katom->dma_fence.dep_count);
- } else {
- /*
- * Get reference to fence that will be kept until callback gets
- * cleaned up in kbase_fence_free_callbacks().
- */
- dma_fence_get(fence);
- /* Add callback to katom's list of callbacks */
- list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
- }
-
- return err;
-}
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index 0f9b73a..f4507ac 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,41 +23,61 @@
#define _KBASE_FENCE_H_
/*
- * mali_kbase_fence.[hc] has common fence code used by both
- * - CONFIG_MALI_DMA_FENCE - implicit DMA fences
- * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel
+ * mali_kbase_fence.[hc] has fence code used only by
+ * - CONFIG_SYNC_FILE - explicit fences
*/
-#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
#include <linux/list.h>
#include "mali_kbase_fence_defs.h"
#include "mali_kbase.h"
+#include "mali_kbase_refcount_defs.h"
+#if MALI_USE_CSF
+/* Maximum number of characters in DMA fence timeline name. */
+#define MAX_TIMELINE_NAME (32)
+
+/**
+ * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing
+ * information about KCPU queue. One instance per KCPU
+ * queue.
+ *
+ * @refcount: Atomic value to keep track of number of references to an instance.
+ * An instance can outlive the KCPU queue itself.
+ * @kbdev: Pointer to Kbase device.
+ * @kctx_id: Kbase context ID.
+ * @timeline_name: String of timeline name for associated fence object.
+ */
+struct kbase_kcpu_dma_fence_meta {
+ kbase_refcount_t refcount;
+ struct kbase_device *kbdev;
+ int kctx_id;
+ char timeline_name[MAX_TIMELINE_NAME];
+};
+
+/**
+ * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a
+ * reference to metadata containing more informaiton about it.
+ *
+ * @base: Fence object itself.
+ * @metadata: Pointer to metadata structure.
+ */
+struct kbase_kcpu_dma_fence {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-extern const struct fence_ops kbase_fence_ops;
+ struct fence base;
#else
-extern const struct dma_fence_ops kbase_fence_ops;
+ struct dma_fence base;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+ struct kbase_kcpu_dma_fence_meta *metadata;
+};
#endif
-/**
- * struct kbase_fence_cb - Mali dma-fence callback data struct
- * @fence_cb: Callback function
- * @katom: Pointer to katom that is waiting on this callback
- * @fence: Pointer to the fence object on which this callback is waiting
- * @node: List head for linking this callback to the katom
- */
-struct kbase_fence_cb {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- struct fence_cb fence_cb;
- struct fence *fence;
+extern const struct fence_ops kbase_fence_ops;
#else
- struct dma_fence_cb fence_cb;
- struct dma_fence *fence;
+extern const struct dma_fence_ops kbase_fence_ops;
#endif
- struct kbase_jd_atom *katom;
- struct list_head node;
-};
/**
* kbase_fence_out_new() - Creates a new output fence and puts it on the atom
@@ -71,7 +91,7 @@ struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
#endif
-#if defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
/**
* kbase_fence_fence_in_set() - Assign input fence to atom
* @katom: Atom to assign input fence to
@@ -102,7 +122,7 @@ static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
}
}
-#if defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
/**
* kbase_fence_in_remove() - Removes the input fence from atom
* @katom: Atom to remove input fence for
@@ -153,101 +173,7 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
return dma_fence_signal(katom->dma_fence.fence);
}
-/**
- * kbase_fence_add_callback() - Add callback on @fence to block @katom
- * @katom: Pointer to katom that will be blocked by @fence
- * @fence: Pointer to fence on which to set up the callback
- * @callback: Pointer to function to be called when fence is signaled
- *
- * Caller needs to hold a reference to @fence when calling this function, and
- * the caller is responsible for releasing that reference. An additional
- * reference to @fence will be taken when the callback was successfully set up
- * and @fence needs to be kept valid until the callback has been called and
- * cleanup have been done.
- *
- * Return: 0 on success: fence was either already signaled, or callback was
- * set up. Negative error code is returned on error.
- */
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-int kbase_fence_add_callback(struct kbase_jd_atom *katom,
- struct fence *fence,
- fence_func_t callback);
-#else
-int kbase_fence_add_callback(struct kbase_jd_atom *katom,
- struct dma_fence *fence,
- dma_fence_func_t callback);
-#endif
-
-/**
- * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value
- * @katom: Atom to set dep_count for
- * @val: value to set dep_count to
- *
- * The dep_count is available to the users of this module so that they can
- * synchronize completion of the wait with cancellation and adding of more
- * callbacks. For instance, a user could do the following:
- *
- * dep_count set to 1
- * callback #1 added, dep_count is increased to 2
- * callback #1 happens, dep_count decremented to 1
- * since dep_count > 0, no completion is done
- * callback #2 is added, dep_count is increased to 2
- * dep_count decremented to 1
- * callback #2 happens, dep_count decremented to 0
- * since dep_count now is zero, completion executes
- *
- * The dep_count can also be used to make sure that the completion only
- * executes once. This is typically done by setting dep_count to -1 for the
- * thread that takes on this responsibility.
- */
-static inline void
-kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val)
-{
- atomic_set(&katom->dma_fence.dep_count, val);
-}
-
-/**
- * kbase_fence_dep_count_dec_and_test() - Decrements dep_count
- * @katom: Atom to decrement dep_count for
- *
- * See @kbase_fence_dep_count_set for general description about dep_count
- *
- * Return: true if value was decremented to zero, otherwise false
- */
-static inline bool
-kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom)
-{
- return atomic_dec_and_test(&katom->dma_fence.dep_count);
-}
-
-/**
- * kbase_fence_dep_count_read() - Returns the current dep_count value
- * @katom: Pointer to katom
- *
- * See @kbase_fence_dep_count_set for general description about dep_count
- *
- * Return: The current dep_count value
- */
-static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom)
-{
- return atomic_read(&katom->dma_fence.dep_count);
-}
-
-/**
- * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom
- * @katom: Pointer to katom
- *
- * This function will free all fence callbacks on the katom's list of
- * callbacks. Callbacks that have not yet been called, because their fence
- * hasn't yet signaled, will first be removed from the fence.
- *
- * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
- *
- * Return: true if dep_count reached 0, otherwise false.
- */
-bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
-
-#if defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
/**
* kbase_fence_in_get() - Retrieve input fence for atom.
* @katom: Atom to get input fence from
@@ -281,13 +207,53 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
*/
#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence)
+#if MALI_USE_CSF
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence)
+#else
+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence)
+#endif
+{
+ if (fence->ops == &kbase_fence_ops)
+ return (struct kbase_kcpu_dma_fence *)fence;
+
+ return NULL;
+}
+
+static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
+{
+ if (kbase_refcount_dec_and_test(&metadata->refcount)) {
+ atomic_dec(&metadata->kbdev->live_fence_metadata);
+ kfree(metadata);
+ }
+}
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline void kbase_kcpu_dma_fence_put(struct fence *fence)
+#else
+static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence)
+#endif
+{
+ struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence);
+
+ if (kcpu_fence)
+ kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
+}
+#endif /* MALI_USE_CSF */
+
/**
* kbase_fence_put() - Releases a reference to a fence
* @fence: Fence to release reference for.
*/
-#define kbase_fence_put(fence) dma_fence_put(fence)
-
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline void kbase_fence_put(struct fence *fence)
+#else
+static inline void kbase_fence_put(struct dma_fence *fence)
+#endif
+{
+ dma_fence_put(fence);
+}
-#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */
+#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
#endif /* _KBASE_FENCE_H_ */
diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c
index be14155..25b4c9c 100644
--- a/mali_kbase/mali_kbase_fence_ops.c
+++ b/mali_kbase/mali_kbase_fence_ops.c
@@ -21,7 +21,7 @@
#include <linux/atomic.h>
#include <linux/list.h>
-#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
#include <mali_kbase.h>
static const char *
@@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence)
kbase_fence_get_timeline_name(struct dma_fence *fence)
#endif
{
+#if MALI_USE_CSF
+ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
+
+ return kcpu_fence->metadata->timeline_name;
+#else
return kbase_timeline_name;
+#endif /* MALI_USE_CSF */
}
static bool
@@ -62,24 +68,44 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
#endif
{
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
- snprintf(str, size, "%u", fence->seqno);
+ const char *format = "%u";
+#else
+ const char *format = "%llu";
+#endif
+ if (unlikely(!scnprintf(str, size, format, fence->seqno)))
+ pr_err("Fail to encode fence seqno to string");
+}
+
+#if MALI_USE_CSF
+static void
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+kbase_fence_release(struct fence *fence)
#else
- snprintf(str, size, "%llu", fence->seqno);
+kbase_fence_release(struct dma_fence *fence)
#endif
+{
+ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
+
+ kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
+ kfree(kcpu_fence);
}
+#endif
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
extern const struct fence_ops kbase_fence_ops; /* silence checker warning */
-const struct fence_ops kbase_fence_ops = {
- .wait = fence_default_wait,
+const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait,
#else
extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */
-const struct dma_fence_ops kbase_fence_ops = {
- .wait = dma_fence_default_wait,
+const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait,
+#endif
+ .get_driver_name = kbase_fence_get_driver_name,
+ .get_timeline_name = kbase_fence_get_timeline_name,
+ .enable_signaling = kbase_fence_enable_signaling,
+#if MALI_USE_CSF
+ .fence_value_str = kbase_fence_fence_value_str,
+ .release = kbase_fence_release
+#else
+ .fence_value_str = kbase_fence_fence_value_str
#endif
- .get_driver_name = kbase_fence_get_driver_name,
- .get_timeline_name = kbase_fence_get_timeline_name,
- .enable_signaling = kbase_fence_enable_signaling,
- .fence_value_str = kbase_fence_fence_value_str
};
-
+KBASE_EXPORT_TEST_API(kbase_fence_ops);
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index c5ed338..afbba3d 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -311,9 +311,6 @@ static void kbase_gpuprops_calculate_props(
struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
{
int i;
-#if !MALI_USE_CSF
- u32 gpu_id;
-#endif
/* Populate the base_gpu_props structure */
kbase_gpuprops_update_core_props_gpu_id(gpu_props);
@@ -365,45 +362,21 @@ static void kbase_gpuprops_calculate_props(
#if MALI_USE_CSF
gpu_props->thread_props.max_registers =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 0U, 22);
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
gpu_props->thread_props.impl_tech =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 22U, 2);
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
gpu_props->thread_props.max_task_queue =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 24U, 8);
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
gpu_props->thread_props.max_thread_group_split = 0;
#else
- /* MIDHARC-2364 was intended for tULx.
- * Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
- */
- gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
- if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) {
- gpu_props->thread_props.max_registers =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 0U, 22);
- gpu_props->thread_props.impl_tech =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 22U, 2);
- gpu_props->thread_props.max_task_queue =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 24U, 8);
- gpu_props->thread_props.max_thread_group_split = 0;
- } else {
- gpu_props->thread_props.max_registers =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 0U, 16);
- gpu_props->thread_props.max_task_queue =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 16U, 8);
- gpu_props->thread_props.max_thread_group_split =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 24U, 6);
- gpu_props->thread_props.impl_tech =
- KBASE_UBFX32(gpu_props->raw_props.thread_features,
- 30U, 2);
- }
+ gpu_props->thread_props.max_registers =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+ gpu_props->thread_props.max_task_queue =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+ gpu_props->thread_props.max_thread_group_split =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+ gpu_props->thread_props.impl_tech =
+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
#endif
/* If values are not specified, then use defaults */
@@ -539,7 +512,7 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
static u32 l2_hash_values[ASN_HASH_COUNT] = {
0,
};
-static int num_override_l2_hash_values;
+static unsigned int num_override_l2_hash_values;
module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
@@ -593,7 +566,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
kbdev->l2_hash_values_override = false;
if (num_override_l2_hash_values) {
- int i;
+ unsigned int i;
kbdev->l2_hash_values_override = true;
for (i = 0; i < num_override_l2_hash_values; i++)
@@ -677,9 +650,11 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
int idx;
const bool asn_he = regdump.l2_config &
L2_CONFIG_ASN_HASH_ENABLE_MASK;
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
if (!asn_he && kbdev->l2_hash_values_override)
dev_err(kbdev->dev,
"Failed to use requested ASN_HASH, fallback to default");
+#endif
for (idx = 0; idx < ASN_HASH_COUNT; idx++)
dev_info(kbdev->dev,
"%s ASN_HASH[%d] is [0x%08x]\n",
@@ -705,10 +680,6 @@ static struct {
#define PROP(name, member) \
{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
sizeof(((struct base_gpu_props *)0)->member)}
-#define BACKWARDS_COMPAT_PROP(name, type) \
- { \
- KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \
- }
PROP(PRODUCT_ID, core_props.product_id),
PROP(VERSION_STATUS, core_props.version_status),
PROP(MINOR_REVISION, core_props.minor_revision),
@@ -722,6 +693,10 @@ static struct {
PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
#if MALI_USE_CSF
+#define BACKWARDS_COMPAT_PROP(name, type) \
+ { \
+ KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \
+ }
BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8),
#else
PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
@@ -820,7 +795,7 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
}
kprops->prop_buffer_size = size;
- kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
+ kprops->prop_buffer = kzalloc(size, GFP_KERNEL);
if (!kprops->prop_buffer) {
kprops->prop_buffer_size = 0;
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 16cccee..0eba889 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -125,14 +125,17 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx)
return 0;
}
-
+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE)
+static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b)
+#else
static int list_cmp_function(void *priv, struct list_head *a,
struct list_head *b)
+#endif
{
- struct kbasep_gwt_list_element *elementA = container_of(a,
- struct kbasep_gwt_list_element, link);
- struct kbasep_gwt_list_element *elementB = container_of(b,
- struct kbasep_gwt_list_element, link);
+ const struct kbasep_gwt_list_element *elementA =
+ container_of(a, struct kbasep_gwt_list_element, link);
+ const struct kbasep_gwt_list_element *elementB =
+ container_of(b, struct kbasep_gwt_list_element, link);
CSTD_UNUSED(priv);
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index f205617..b07327a 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TBAX:
features = base_hw_features_tBAx;
break;
- case GPU_ID2_PRODUCT_TDUX:
- features = base_hw_features_tDUx;
- break;
case GPU_ID2_PRODUCT_TODX:
case GPU_ID2_PRODUCT_LODX:
features = base_hw_features_tODx;
@@ -85,6 +82,10 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_LTUX:
features = base_hw_features_tTUx;
break;
+ case GPU_ID2_PRODUCT_TTIX:
+ case GPU_ID2_PRODUCT_LTIX:
+ features = base_hw_features_tTIx;
+ break;
default:
features = base_hw_features_generic;
break;
@@ -207,10 +208,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 },
{ U32_MAX, NULL } } },
- { GPU_ID2_PRODUCT_TDUX,
- { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 },
- { U32_MAX, NULL } } },
-
{ GPU_ID2_PRODUCT_TODX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
{ GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 },
@@ -231,16 +228,27 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_PRODUCT_TTUX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 },
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
{ GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
- { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p1 },
+ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_LTUX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
{ GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
- { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p1 },
+ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TTIX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_LTIX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 },
{ U32_MAX, NULL } } },
};
@@ -297,21 +305,20 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
*/
issues = fallback_issues;
- dev_warn(kbdev->dev,
- "GPU hardware issue table may need updating:\n"
- "r%dp%d status %d is unknown; treating as r%dp%d status %d",
- (gpu_id & GPU_ID2_VERSION_MAJOR) >>
- GPU_ID2_VERSION_MAJOR_SHIFT,
- (gpu_id & GPU_ID2_VERSION_MINOR) >>
- GPU_ID2_VERSION_MINOR_SHIFT,
- (gpu_id & GPU_ID2_VERSION_STATUS) >>
- GPU_ID2_VERSION_STATUS_SHIFT,
- (fallback_version & GPU_ID2_VERSION_MAJOR) >>
- GPU_ID2_VERSION_MAJOR_SHIFT,
- (fallback_version & GPU_ID2_VERSION_MINOR) >>
- GPU_ID2_VERSION_MINOR_SHIFT,
- (fallback_version & GPU_ID2_VERSION_STATUS) >>
- GPU_ID2_VERSION_STATUS_SHIFT);
+ dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n",
+ (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT,
+ (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT,
+ (gpu_id & GPU_ID2_VERSION_STATUS) >>
+ GPU_ID2_VERSION_STATUS_SHIFT);
+ dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n",
+ (fallback_version & GPU_ID2_VERSION_MAJOR) >>
+ GPU_ID2_VERSION_MAJOR_SHIFT,
+ (fallback_version & GPU_ID2_VERSION_MINOR) >>
+ GPU_ID2_VERSION_MINOR_SHIFT,
+ (fallback_version & GPU_ID2_VERSION_STATUS) >>
+ GPU_ID2_VERSION_STATUS_SHIFT);
+ dev_notice(kbdev->dev,
+ "Execution proceeding normally with fallback match\n");
gpu_id &= ~GPU_ID2_VERSION;
gpu_id |= fallback_version;
@@ -337,7 +344,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
issues = kbase_hw_get_issues_for_new_id(kbdev);
if (issues == NULL) {
dev_err(kbdev->dev,
- "Unknown GPU ID %x", gpu_id);
+ "HW product - Unknown GPU ID %x", gpu_id);
return -EINVAL;
}
@@ -381,9 +388,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TBAX:
issues = base_hw_issues_model_tBAx;
break;
- case GPU_ID2_PRODUCT_TDUX:
- issues = base_hw_issues_model_tDUx;
- break;
case GPU_ID2_PRODUCT_TODX:
case GPU_ID2_PRODUCT_LODX:
issues = base_hw_issues_model_tODx;
@@ -398,10 +402,13 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_LTUX:
issues = base_hw_issues_model_tTUx;
break;
-
+ case GPU_ID2_PRODUCT_TTIX:
+ case GPU_ID2_PRODUCT_LTIX:
+ issues = base_hw_issues_model_tTIx;
+ break;
default:
dev_err(kbdev->dev,
- "Unknown GPU ID %x", gpu_id);
+ "HW issues - Unknown GPU ID %x", gpu_id);
return -EINVAL;
}
}
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index 124a6d6..ca77c19 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -97,8 +97,8 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
* Return: true if context is now active, false otherwise (ie if context does
* not have an address space assigned)
*/
-bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
- struct kbase_context *kctx, int js);
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js);
/**
* kbase_backend_release_ctx_irq - Release a context from the GPU. This will
@@ -183,8 +183,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
*
* Return: Atom currently at the head of slot @js, or NULL
*/
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
- int js);
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
@@ -194,7 +193,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
*
* Return: Number of atoms currently on slot
*/
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
@@ -204,7 +203,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
*
* Return: Number of atoms currently on slot @js that are currently on the GPU.
*/
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
@@ -233,7 +232,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
*
* Return: Number of jobs that can be submitted.
*/
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_job_check_leave_disjoint - potentially leave disjoint state
@@ -287,8 +286,8 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
* Context:
* The job slot lock must be held when calling this function.
*/
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
- struct kbase_jd_atom *target_katom);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
+ struct kbase_jd_atom *target_katom);
/**
* kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index 27e2cb7..ac2a26d 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,49 @@
#ifndef _KBASE_BACKEND_TIME_H_
#define _KBASE_BACKEND_TIME_H_
+#if MALI_USE_CSF
+/**
+ * struct kbase_backend_time - System timestamp attributes.
+ *
+ * @multiplier: Numerator of the converter's fraction.
+ * @divisor: Denominator of the converter's fraction.
+ * @offset: Converter's offset term.
+ *
+ * According to Generic timer spec, system timer:
+ * - Increments at a fixed frequency
+ * - Starts operating from zero
+ *
+ * Hence CPU time is a linear function of System Time.
+ *
+ * CPU_ts = alpha * SYS_ts + beta
+ *
+ * Where
+ * - alpha = 10^9/SYS_ts_freq
+ * - beta is calculated by two timer samples taken at the same time:
+ * beta = CPU_ts_s - SYS_ts_s * alpha
+ *
+ * Since alpha is a rational number, we minimizing possible
+ * rounding error by simplifying the ratio. Thus alpha is stored
+ * as a simple `multiplier / divisor` ratio.
+ *
+ */
+struct kbase_backend_time {
+ u64 multiplier;
+ u64 divisor;
+ s64 offset;
+};
+
+/**
+ * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp.
+ *
+ * @kbdev: Kbase device pointer
+ * @gpu_ts: System timestamp value to converter.
+ *
+ * Return: The CPU timestamp.
+ */
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts);
+#endif
+
/**
* kbase_backend_get_gpu_time() - Get current GPU time
* @kbdev: Device pointer
@@ -46,9 +89,6 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
u64 *cycle_counter,
u64 *system_time,
struct timespec64 *ts);
-
-#endif /* _KBASE_BACKEND_TIME_H_ */
-
/**
* kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
* GPU frequency, using a choice from
@@ -70,3 +110,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
* Return: Snapshot of the GPU cycle count register.
*/
u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_time_init() - Initialize system timestamp converter.
+ *
+ * @kbdev: Kbase device pointer
+ *
+ * This function should only be called after GPU is powered-up and
+ * L2 cached power-up has been initiated.
+ *
+ * Return: Zero on success, error code otherwise.
+ */
+int kbase_backend_time_init(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 66064ec..15e30db 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -28,6 +28,11 @@
#include <linux/version.h>
#include <linux/ratelimit.h>
#include <linux/priority_control_manager.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
#include <mali_kbase_jm.h>
#include <mali_kbase_kinstr_jm.h>
@@ -35,7 +40,6 @@
#include <tl/mali_kbase_tracepoints.h>
#include <mali_linux_trace.h>
-#include "mali_kbase_dma_fence.h"
#include <mali_kbase_cs_experimental.h>
#include <mali_kbase_caps.h>
@@ -158,15 +162,6 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
{
-#ifdef CONFIG_MALI_DMA_FENCE
- /* Flush dma-fence workqueue to ensure that any callbacks that may have
- * been queued are done before continuing.
- * Any successfully completed atom would have had all it's callbacks
- * completed before the atom was run, so only flush for failed atoms.
- */
- if (katom->event_code != BASE_JD_EVENT_DONE)
- flush_workqueue(katom->kctx->dma_fence.wq);
-#endif /* CONFIG_MALI_DMA_FENCE */
}
static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
@@ -174,10 +169,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
KBASE_DEBUG_ASSERT(katom);
KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
-#ifdef CONFIG_MALI_DMA_FENCE
- kbase_dma_fence_signal(katom);
-#endif /* CONFIG_MALI_DMA_FENCE */
-
kbase_gpu_vm_lock(katom->kctx);
/* only roll back if extres is non-NULL */
if (katom->extres) {
@@ -203,24 +194,6 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
{
int err = -EINVAL;
u32 res_no;
-#ifdef CONFIG_MALI_DMA_FENCE
- struct kbase_dma_fence_resv_info info = {
- .resv_objs = NULL,
- .dma_fence_resv_count = 0,
- .dma_fence_excl_bitmap = NULL
- };
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
- /*
- * When both dma-buf fence and Android native sync is enabled, we
- * disable dma-buf fence for contexts that are using Android native
- * fences.
- */
- const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
- KCTX_NO_IMPLICIT_SYNC);
-#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/
- const bool implicit_sync = true;
-#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
-#endif /* CONFIG_MALI_DMA_FENCE */
struct base_external_resource *input_extres;
KBASE_DEBUG_ASSERT(katom);
@@ -240,31 +213,6 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
goto failed_input_alloc;
}
-#ifdef CONFIG_MALI_DMA_FENCE
- if (implicit_sync) {
- info.resv_objs =
- kmalloc_array(katom->nr_extres,
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- sizeof(struct reservation_object *),
-#else
- sizeof(struct dma_resv *),
-#endif
- GFP_KERNEL);
- if (!info.resv_objs) {
- err = -ENOMEM;
- goto failed_input_copy;
- }
-
- info.dma_fence_excl_bitmap =
- kcalloc(BITS_TO_LONGS(katom->nr_extres),
- sizeof(unsigned long), GFP_KERNEL);
- if (!info.dma_fence_excl_bitmap) {
- err = -ENOMEM;
- goto failed_input_copy;
- }
- }
-#endif /* CONFIG_MALI_DMA_FENCE */
-
if (copy_from_user(input_extres,
get_compat_pointer(katom->kctx, user_atom->extres_list),
sizeof(*input_extres) * katom->nr_extres) != 0) {
@@ -280,12 +228,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
for (res_no = 0; res_no < katom->nr_extres; res_no++) {
struct base_external_resource *user_res = &input_extres[res_no];
struct kbase_va_region *reg;
-#ifdef CONFIG_MALI_DMA_FENCE
- bool exclusive;
- exclusive = (user_res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
- ? true : false;
-#endif
reg = kbase_region_tracker_find_region_enclosing_address(
katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
/* did we find a matching region object? */
@@ -303,20 +246,6 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
if (err)
goto failed_loop;
-#ifdef CONFIG_MALI_DMA_FENCE
- if (implicit_sync &&
- reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
- struct reservation_object *resv;
-#else
- struct dma_resv *resv;
-#endif
- resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
- if (resv)
- kbase_dma_fence_add_reservation(resv, &info,
- exclusive);
- }
-#endif /* CONFIG_MALI_DMA_FENCE */
katom->extres[res_no] = reg;
}
/* successfully parsed the extres array */
@@ -326,20 +255,6 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
/* Release the processes mmap lock */
up_read(kbase_mem_get_process_mmap_lock());
-#ifdef CONFIG_MALI_DMA_FENCE
- if (implicit_sync) {
- if (info.dma_fence_resv_count) {
- int ret;
-
- ret = kbase_dma_fence_wait(katom, &info);
- if (ret < 0)
- goto failed_dma_fence_setup;
- }
-
- kfree(info.resv_objs);
- kfree(info.dma_fence_excl_bitmap);
- }
-#endif /* CONFIG_MALI_DMA_FENCE */
/* Free the buffer holding data from userspace */
kfree(input_extres);
@@ -347,15 +262,6 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
return 0;
/* error handling section */
-#ifdef CONFIG_MALI_DMA_FENCE
-failed_dma_fence_setup:
- /* Lock the processes mmap lock */
- down_read(kbase_mem_get_process_mmap_lock());
-
- /* lock before we unmap */
- kbase_gpu_vm_lock(katom->kctx);
-#endif
-
failed_loop:
/* undo the loop work. We are guaranteed to have access to the VA region
* as we hold a reference to it until it's unmapped
@@ -375,12 +281,6 @@ failed_input_copy:
failed_input_alloc:
kfree(katom->extres);
katom->extres = NULL;
-#ifdef CONFIG_MALI_DMA_FENCE
- if (implicit_sync) {
- kfree(info.resv_objs);
- kfree(info.dma_fence_excl_bitmap);
- }
-#endif
return err;
}
@@ -404,10 +304,6 @@ static inline void jd_resolve_dep(struct list_head *out_list,
if (katom->event_code != BASE_JD_EVENT_DONE &&
(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
-#ifdef CONFIG_MALI_DMA_FENCE
- kbase_dma_fence_cancel_callbacks(dep_atom);
-#endif
-
dep_atom->event_code = katom->event_code;
KBASE_DEBUG_ASSERT(dep_atom->status !=
KBASE_JD_ATOM_STATE_UNUSED);
@@ -421,35 +317,8 @@ static inline void jd_resolve_dep(struct list_head *out_list,
(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
!dep_atom->will_fail_event_code &&
!other_dep_atom->will_fail_event_code))) {
- bool dep_satisfied = true;
-#ifdef CONFIG_MALI_DMA_FENCE
- int dep_count;
-
- dep_count = kbase_fence_dep_count_read(dep_atom);
- if (likely(dep_count == -1)) {
- dep_satisfied = true;
- } else {
- /*
- * There are either still active callbacks, or
- * all fences for this @dep_atom has signaled,
- * but the worker that will queue the atom has
- * not yet run.
- *
- * Wait for the fences to signal and the fence
- * worker to run and handle @dep_atom. If
- * @dep_atom was completed due to error on
- * @katom, then the fence worker will pick up
- * the complete status and error code set on
- * @dep_atom above.
- */
- dep_satisfied = false;
- }
-#endif /* CONFIG_MALI_DMA_FENCE */
-
- if (dep_satisfied) {
- dep_atom->in_jd_list = true;
- list_add_tail(&dep_atom->jd_item, out_list);
- }
+ dep_atom->in_jd_list = true;
+ list_add_tail(&dep_atom->jd_item, out_list);
}
}
}
@@ -508,33 +377,8 @@ static void jd_try_submitting_deps(struct list_head *out_list,
dep_atom->dep[0].atom);
bool dep1_valid = is_dep_valid(
dep_atom->dep[1].atom);
- bool dep_satisfied = true;
-#ifdef CONFIG_MALI_DMA_FENCE
- int dep_count;
-
- dep_count = kbase_fence_dep_count_read(
- dep_atom);
- if (likely(dep_count == -1)) {
- dep_satisfied = true;
- } else {
- /*
- * There are either still active callbacks, or
- * all fences for this @dep_atom has signaled,
- * but the worker that will queue the atom has
- * not yet run.
- *
- * Wait for the fences to signal and the fence
- * worker to run and handle @dep_atom. If
- * @dep_atom was completed due to error on
- * @katom, then the fence worker will pick up
- * the complete status and error code set on
- * @dep_atom above.
- */
- dep_satisfied = false;
- }
-#endif /* CONFIG_MALI_DMA_FENCE */
- if (dep0_valid && dep1_valid && dep_satisfied) {
+ if (dep0_valid && dep1_valid) {
dep_atom->in_jd_list = true;
list_add(&dep_atom->jd_item, out_list);
}
@@ -1065,9 +909,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
INIT_LIST_HEAD(&katom->queue);
INIT_LIST_HEAD(&katom->jd_item);
-#ifdef CONFIG_MALI_DMA_FENCE
- kbase_fence_dep_count_set(katom, -1);
-#endif
/* Don't do anything if there is a mess up with dependencies.
* This is done in a separate cycle to check both the dependencies at ones, otherwise
@@ -1289,12 +1130,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (queued && !IS_GPU_ATOM(katom))
return false;
-#ifdef CONFIG_MALI_DMA_FENCE
- if (kbase_fence_dep_count_read(katom) != -1)
- return false;
-
-#endif /* CONFIG_MALI_DMA_FENCE */
-
if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
@@ -1366,18 +1201,26 @@ int kbase_jd_submit(struct kbase_context *kctx,
return -EINVAL;
}
+ if (nr_atoms > BASE_JD_ATOM_COUNT) {
+ dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d",
+ nr_atoms, kctx->tgid, kctx->id);
+ return -EINVAL;
+ }
+
/* All atoms submitted in this call have the same flush ID */
latest_flush = kbase_backend_get_current_flush_id(kbdev);
for (i = 0; i < nr_atoms; i++) {
- struct base_jd_atom user_atom;
+ struct base_jd_atom user_atom = {
+ .seq_nr = 0,
+ };
struct base_jd_fragment user_jc_incr;
struct kbase_jd_atom *katom;
if (unlikely(jd_atom_is_v2)) {
if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) {
dev_dbg(kbdev->dev,
- "Invalid atom address %p passed to job_submit\n",
+ "Invalid atom address %pK passed to job_submit\n",
user_addr);
err = -EFAULT;
break;
@@ -1388,7 +1231,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
} else {
if (copy_from_user(&user_atom, user_addr, stride) != 0) {
dev_dbg(kbdev->dev,
- "Invalid atom address %p passed to job_submit\n",
+ "Invalid atom address %pK passed to job_submit\n",
user_addr);
err = -EFAULT;
break;
@@ -1494,6 +1337,12 @@ while (false)
kbase_disjoint_event_potential(kbdev);
rt_mutex_unlock(&jctx->lock);
+ if (fatal_signal_pending(current)) {
+ dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d",
+ kctx->tgid, kctx->id);
+ /* We're being killed so the result code doesn't really matter */
+ return 0;
+ }
}
if (need_to_try_schedule_context)
@@ -1701,6 +1550,7 @@ static void jd_cancel_worker(struct kthread_work *data)
bool need_to_try_schedule_context;
bool attr_state_changed;
struct kbase_device *kbdev;
+ CSTD_UNUSED(need_to_try_schedule_context);
/* Soft jobs should never reach this function */
KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
@@ -1850,20 +1700,8 @@ void kbase_jd_zap_context(struct kbase_context *kctx)
kbase_cancel_soft_job(katom);
}
-
-#ifdef CONFIG_MALI_DMA_FENCE
- kbase_dma_fence_cancel_all_atoms(kctx);
-#endif
-
rt_mutex_unlock(&kctx->jctx.lock);
-#ifdef CONFIG_MALI_DMA_FENCE
- /* Flush dma-fence workqueue to ensure that any callbacks that may have
- * been queued are done before continuing.
- */
- flush_workqueue(kctx->dma_fence.wq);
-#endif
-
#if IS_ENABLED(CONFIG_DEBUG_FS)
kbase_debug_job_fault_kctx_unblock(kctx);
#endif
@@ -1892,11 +1730,10 @@ int kbase_jd_init(struct kbase_context *kctx)
kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
-#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
kctx->jctx.atoms[i].dma_fence.context =
dma_fence_context_alloc(1);
atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
- INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
#endif
}
diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c
index 0d6230d..3e0a760 100644
--- a/mali_kbase/mali_kbase_jd_debugfs.c
+++ b/mali_kbase/mali_kbase_jd_debugfs.c
@@ -24,8 +24,7 @@
#include <linux/seq_file.h>
#include <mali_kbase.h>
#include <mali_kbase_jd_debugfs.h>
-#include <mali_kbase_dma_fence.h>
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
#include <mali_kbase_sync.h>
#endif
#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
@@ -38,7 +37,7 @@ struct kbase_jd_debugfs_depinfo {
static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
struct seq_file *sfile)
{
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
struct kbase_sync_fence_info info;
int res;
@@ -58,51 +57,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
default:
break;
}
-#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
-
-#ifdef CONFIG_MALI_DMA_FENCE
- if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
- struct kbase_fence_cb *cb;
-
- if (atom->dma_fence.fence) {
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- struct fence *fence = atom->dma_fence.fence;
-#else
- struct dma_fence *fence = atom->dma_fence.fence;
-#endif
-
- seq_printf(sfile,
-#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
- "Sd(%llu#%u: %s) ",
-#else
- "Sd(%llu#%llu: %s) ",
-#endif
- fence->context, fence->seqno,
- dma_fence_is_signaled(fence) ? "signaled" :
- "active");
- }
-
- list_for_each_entry(cb, &atom->dma_fence.callbacks,
- node) {
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- struct fence *fence = cb->fence;
-#else
- struct dma_fence *fence = cb->fence;
-#endif
-
- seq_printf(sfile,
-#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
- "Wd(%llu#%u: %s) ",
-#else
- "Wd(%llu#%llu: %s) ",
-#endif
- fence->context, fence->seqno,
- dma_fence_is_signaled(fence) ? "signaled" :
- "active");
- }
- }
-#endif /* CONFIG_MALI_DMA_FENCE */
-
+#endif /* CONFIG_SYNC_FILE */
}
static void kbasep_jd_debugfs_atom_deps(
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index 6cbd6f1..1ac5cd3 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,15 +37,13 @@
*
* Return: true if slot can still be submitted on, false if slot is now full.
*/
-static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
- int nr_jobs_to_submit)
+static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit)
{
struct kbase_context *kctx;
int i;
kctx = kbdev->hwaccess.active_kctx[js];
- dev_dbg(kbdev->dev,
- "Trying to run the next %d jobs in kctx %pK (s:%d)\n",
+ dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n",
nr_jobs_to_submit, (void *)kctx, js);
if (!kctx)
@@ -60,7 +58,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
kbase_backend_run_atom(kbdev, katom);
}
- dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js);
+ dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js);
return false;
}
@@ -72,7 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask);
while (js_mask) {
- int js = ffs(js_mask) - 1;
+ unsigned int js = ffs(js_mask) - 1;
int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
@@ -111,14 +109,14 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
{
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == kctx) {
- dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx,
+ js);
kbdev->hwaccess.active_kctx[js] = NULL;
}
}
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 598a11b..5dd7813 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@
#include "mali_kbase_jm.h"
#include "mali_kbase_hwaccess_jm.h"
+#include <mali_kbase_hwaccess_time.h>
#include <linux/priority_control_manager.h>
/*
@@ -77,8 +78,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
struct kbase_device *kbdev, struct kbase_context *kctx,
struct kbasep_js_atom_retained_state *katom_retained_state);
-static int kbase_js_get_slot(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom);
+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
kbasep_js_ctx_job_cb *callback);
@@ -151,8 +151,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev)
*
* Return: true if there are no atoms to pull, false otherwise.
*/
-static inline bool
-jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio)
{
bool none_to_pull;
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
@@ -161,9 +160,8 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree);
- dev_dbg(kctx->kbdev->dev,
- "Slot %d (prio %d) is %spullable in kctx %pK\n",
- js, prio, none_to_pull ? "not " : "", kctx);
+ dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio,
+ none_to_pull ? "not " : "", kctx);
return none_to_pull;
}
@@ -179,8 +177,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
* Return: true if the ring buffers for all priorities have no pullable atoms,
* false otherwise.
*/
-static inline bool
-jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js)
{
int prio;
@@ -212,8 +209,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
*
* The HW access lock must always be held when calling this function.
*/
-static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js,
- int prio, kbasep_js_ctx_job_cb *callback)
+static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio,
+ kbasep_js_ctx_job_cb *callback)
{
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
@@ -272,7 +269,7 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js,
* jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
* for each entry, and remove the entry from the queue.
*/
-static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js,
+static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js,
kbasep_js_ctx_job_cb *callback)
{
int prio;
@@ -293,15 +290,14 @@ static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js,
*
* Return: Pointer to next atom in buffer, or NULL if there is no atom.
*/
-static inline struct kbase_jd_atom *
-jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js,
+ int prio)
{
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
struct rb_node *node;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- dev_dbg(kctx->kbdev->dev,
- "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n",
+ dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n",
(void *)kctx, prio, js);
node = rb_first(&rb->runnable_tree);
@@ -326,8 +322,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
*
* Return: Pointer to next atom in buffer, or NULL if there is no atom.
*/
-static inline struct kbase_jd_atom *
-jsctx_rb_peek(struct kbase_context *kctx, int js)
+static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js)
{
int prio;
@@ -358,7 +353,7 @@ static inline void
jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
int prio = katom->sched_priority;
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -377,14 +372,14 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
struct kbase_device *kbdev = kctx->kbdev;
int prio = katom->sched_priority;
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n",
- (void *)katom, (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom,
+ (void *)kctx, js);
while (*new) {
struct kbase_jd_atom *entry = container_of(*new,
@@ -425,15 +420,11 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
jsctx_tree_add(kctx, katom);
}
-static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
- int js,
- bool is_scheduled);
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled);
static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js);
+ struct kbase_context *kctx, unsigned int js);
static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js);
+ struct kbase_context *kctx, unsigned int js);
typedef bool(katom_ordering_func)(const struct kbase_jd_atom *,
const struct kbase_jd_atom *);
@@ -541,6 +532,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+ jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT);
dev_dbg(kbdev->dev, "JS Config Attribs: ");
dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
@@ -565,6 +557,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
jsdd->ctx_timeslice_ns);
dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
atomic_read(&jsdd->soft_job_timeout_ms));
+ dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms);
if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
@@ -619,7 +612,9 @@ void kbasep_js_devdata_halt(struct kbase_device *kbdev)
void kbasep_js_devdata_term(struct kbase_device *kbdev)
{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+ CSTD_UNUSED(js_devdata);
KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -637,11 +632,10 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
{
struct kbasep_js_kctx_info *js_kctx_info;
int i, j;
+ CSTD_UNUSED(js_kctx_info);
KBASE_DEBUG_ASSERT(kctx != NULL);
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
kbase_ctx_sched_init_ctx(kctx);
for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
@@ -681,9 +675,11 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
void kbasep_js_kctx_term(struct kbase_context *kctx)
{
struct kbase_device *kbdev;
- int js;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ unsigned int js;
bool update_ctx_count = false;
unsigned long flags;
+ CSTD_UNUSED(js_kctx_info);
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -726,8 +722,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
*/
/* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */
-static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
- int js, int sched_prio)
+static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js,
+ int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
@@ -739,7 +735,7 @@ static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
NULL, 0, js, (unsigned int)sched_prio);
}
-static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
+static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js)
{
return atomic_read(&kctx->slot_tracking[js].atoms_pulled);
}
@@ -749,7 +745,7 @@ static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
* - that priority level is blocked
* - or, any higher priority level is blocked
*/
-static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
+static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js,
int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
@@ -789,7 +785,7 @@ static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
const struct kbase_jd_atom *katom)
{
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
int sched_prio = katom->sched_priority;
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
@@ -798,7 +794,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio),
- "Should not have pulled atoms for slot %d from a context that is blocked at priority %d or higher",
+ "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher",
js, sched_prio);
nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots);
@@ -827,7 +823,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
const struct kbase_jd_atom *katom)
{
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
int sched_prio = katom->sched_priority;
int atoms_pulled_pri;
struct kbase_jsctx_slot_tracking *slot_tracking =
@@ -876,14 +872,12 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+ struct kbase_context *kctx, unsigned int js)
{
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -918,14 +912,13 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
*
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
-static bool kbase_js_ctx_list_add_pullable_head_nolock(
- struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx, unsigned int js)
{
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js);
if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -963,8 +956,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+ struct kbase_context *kctx, unsigned int js)
{
bool ret;
unsigned long flags;
@@ -994,14 +986,12 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+ struct kbase_context *kctx, unsigned int js)
{
bool ret = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js);
list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
@@ -1036,9 +1026,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
*
* Return: true if caller should call kbase_backend_ctx_count_changed()
*/
-static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
bool ret = false;
@@ -1074,9 +1063,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
* Return: Context to use for specified slot.
* NULL if no contexts present for specified slot
*/
-static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
- struct kbase_device *kbdev,
- int js)
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev,
+ unsigned int js)
{
struct kbase_context *kctx;
int i;
@@ -1092,9 +1080,8 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
jctx.sched_info.ctx.ctx_list_entry[js]);
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
- dev_dbg(kbdev->dev,
- "Popped %pK from the pullable queue (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx,
+ js);
return kctx;
}
return NULL;
@@ -1109,8 +1096,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
* Return: Context to use for specified slot.
* NULL if no contexts present for specified slot
*/
-static struct kbase_context *kbase_js_ctx_list_pop_head(
- struct kbase_device *kbdev, int js)
+static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js)
{
struct kbase_context *kctx;
unsigned long flags;
@@ -1134,8 +1120,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head(
* Return: true if context can be pulled from on specified slot
* false otherwise
*/
-static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
- bool is_scheduled)
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled)
{
struct kbasep_js_device_data *js_devdata;
struct kbase_jd_atom *katom;
@@ -1154,8 +1139,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
}
katom = jsctx_rb_peek(kctx, js);
if (!katom) {
- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js);
return false; /* No pullable atoms */
}
if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
@@ -1163,7 +1147,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom,
katom->jc, js, (unsigned int)katom->sched_priority);
dev_dbg(kbdev->dev,
- "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n",
(void *)kctx, katom->sched_priority, js);
return false;
}
@@ -1184,14 +1168,14 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) {
dev_dbg(kbdev->dev,
- "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n",
(void *)katom, js);
return false;
}
}
- dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n",
- (void *)katom, (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom,
+ (void *)kctx, js);
return true;
}
@@ -1202,7 +1186,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
struct kbase_device *kbdev = kctx->kbdev;
bool ret = true;
bool has_dep = false, has_x_dep = false;
- int js = kbase_js_get_slot(kbdev, katom);
+ unsigned int js = kbase_js_get_slot(kbdev, katom);
int prio = katom->sched_priority;
int i;
@@ -1210,7 +1194,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
if (dep_atom) {
- int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+ unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom);
int dep_prio = dep_atom->sched_priority;
dev_dbg(kbdev->dev,
@@ -1365,7 +1349,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
{
struct kbase_device *kbdev = kctx->kbdev;
- int js;
+ unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1791,10 +1775,12 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
unsigned long flags;
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
+ int kctx_as_nr = kctx->as_nr;
kbasep_js_release_result release_result = 0u;
bool runpool_ctx_attr_change = false;
int new_ref_count;
+ CSTD_UNUSED(kctx_as_nr);
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -2068,9 +2054,8 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev)
kbase_backend_timeouts_changed(kbdev);
}
-static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
@@ -2078,7 +2063,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
bool kctx_suspended = false;
int as_nr;
- dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js);
+ dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js);
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -2105,8 +2090,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
}
}
- if (as_nr == KBASEP_AS_NR_INVALID)
- return false; /* No address spaces currently available */
+ if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS))
+ return false; /* No address space currently available */
/*
* Atomic transaction on the Context and Run Pool begins
@@ -2175,9 +2160,11 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
#else
if (kbase_pm_is_suspending(kbdev)) {
#endif
+ /* Cause it to leave at some later point */
+ bool retained;
+ CSTD_UNUSED(retained);
kbase_ctx_sched_inc_refcount_nolock(kctx);
- KBASE_DEBUG_ASSERT(retained);
kbasep_js_clear_submit_allowed(js_devdata, kctx);
kctx_suspended = true;
@@ -2210,9 +2197,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
return true;
}
-static bool kbase_js_use_ctx(struct kbase_device *kbdev,
- struct kbase_context *kctx,
- int js)
+static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx,
+ unsigned int js)
{
unsigned long flags;
@@ -2220,9 +2206,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
-
- dev_dbg(kbdev->dev,
- "kctx %pK already has ASID - mark as active (s:%d)\n",
+ dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n",
(void *)kctx, js);
if (kbdev->hwaccess.active_kctx[js] != kctx) {
@@ -2489,8 +2473,7 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
return true;
}
-static int kbase_js_get_slot(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom)
+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
{
if (katom->core_req & BASE_JD_REQ_JOB_SLOT)
return katom->jobslot;
@@ -2529,11 +2512,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
(katom->pre_dep && (katom->pre_dep->atom_flags &
KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
int prio = katom->sched_priority;
- int js = katom->slot_nr;
+ unsigned int js = katom->slot_nr;
struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
- dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n",
- (void *)katom, js);
+ dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js);
list_add_tail(&katom->queue, &queue->x_dep_head);
katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
@@ -2624,8 +2606,8 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
*
* Context: Caller must hold the HW access lock
*/
-static void kbase_js_evict_deps(struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js, int prio)
+static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom,
+ unsigned int js, int prio)
{
struct kbase_jd_atom *x_dep = katom->x_post_dep;
struct kbase_jd_atom *next_katom = katom->post_dep;
@@ -2657,7 +2639,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
}
}
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js)
{
struct kbase_jd_atom *katom;
struct kbasep_js_device_data *js_devdata;
@@ -2667,8 +2649,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
KBASE_DEBUG_ASSERT(kctx);
kbdev = kctx->kbdev;
- dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js);
js_devdata = &kbdev->js_data;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -2687,13 +2668,12 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom = jsctx_rb_peek(kctx, js);
if (!katom) {
- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js);
return NULL;
}
if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
dev_dbg(kbdev->dev,
- "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n",
(void *)kctx, katom->sched_priority, js);
return NULL;
}
@@ -2727,7 +2707,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
kbase_backend_nr_atoms_on_slot(kbdev, js)) {
dev_dbg(kbdev->dev,
- "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n",
(void *)katom, js);
return NULL;
}
@@ -2750,7 +2730,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
katom->ticks = 0;
- dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n",
+ dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n",
(void *)katom, (void *)kctx, js);
return katom;
@@ -3347,7 +3327,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
!kbase_jsctx_atoms_pulled(kctx) &&
!kbase_ctx_flag(kctx, KCTX_DYING)) {
- int js;
+ unsigned int js;
kbasep_js_set_submit_allowed(js_devdata, kctx);
@@ -3359,7 +3339,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
}
} else if (katom->x_post_dep &&
kbasep_js_is_submit_allowed(js_devdata, kctx)) {
- int js;
+ unsigned int js;
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
if (kbase_js_ctx_pullable(kctx, js, true))
@@ -3637,13 +3617,13 @@ done:
return ret;
}
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask)
{
struct kbasep_js_device_data *js_devdata;
struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
bool timer_sync = false;
bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
- int js;
+ unsigned int js;
KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0);
@@ -3689,18 +3669,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
if (!kctx) {
js_mask &= ~(1 << js);
- dev_dbg(kbdev->dev,
- "No kctx on pullable list (s:%d)\n",
- js);
+ dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js);
break;
}
if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
context_idle = true;
- dev_dbg(kbdev->dev,
- "kctx %pK is not active (s:%d)\n",
- (void *)kctx, js);
+ dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx,
+ js);
if (kbase_js_defer_activate_for_slot(kctx, js)) {
bool ctx_count_changed;
@@ -3723,8 +3700,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
if (kbase_pm_context_active_handle_suspend(
kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
- dev_dbg(kbdev->dev,
- "Suspend pending (s:%d)\n", js);
+ dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js);
/* Suspend pending - return context to
* queue and stop scheduling
*/
@@ -3785,16 +3761,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
kbase_ctx_flag_clear(kctx, KCTX_PULLED);
if (!kbase_jm_kick(kbdev, 1 << js)) {
- dev_dbg(kbdev->dev,
- "No more jobs can be submitted (s:%d)\n",
- js);
+ dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js);
js_mask &= ~(1 << js);
}
if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
bool pullable;
- dev_dbg(kbdev->dev,
- "No atoms pulled from kctx %pK (s:%d)\n",
+ dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n",
(void *)kctx, js);
pullable = kbase_js_ctx_pullable(kctx, js,
@@ -3878,8 +3851,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
ctx_waiting[js]) {
- dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
- (void *)last_active[js], js);
+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n",
+ (void *)last_active[js], js);
kbdev->hwaccess.active_kctx[js] = NULL;
}
}
@@ -3950,7 +3923,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
*/
if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
unsigned long flags;
- int js;
+ unsigned int js;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
@@ -3989,6 +3962,8 @@ void kbase_js_zap_context(struct kbase_context *kctx)
rt_mutex_unlock(&kctx->jctx.lock);
} else {
unsigned long flags;
+ bool was_retained;
+ CSTD_UNUSED(was_retained);
/* Case c: didn't evict, but it is scheduled - it's in the Run
* Pool
@@ -4072,7 +4047,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
{
struct kbase_device *kbdev;
unsigned long flags;
- u32 js;
+ unsigned int js;
kbdev = kctx->kbdev;
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
index 34ba196..ca74540 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.c
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -48,6 +48,11 @@
#include <linux/version_compat_defs.h>
#include <linux/wait.h>
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
/* Define static_assert().
*
* The macro was introduced in kernel 5.1. But older vendor kernels may define
@@ -61,10 +66,6 @@
#define __static_assert(e, msg, ...) _Static_assert(e, msg)
#endif
-#ifndef ENOTSUP
-#define ENOTSUP EOPNOTSUPP
-#endif
-
/* The module printing prefix */
#define PR_ "mali_kbase_kinstr_jm: "
@@ -224,11 +225,8 @@ static inline bool reader_changes_is_valid_size(const size_t size)
*
* Return:
* (0, U16_MAX] - the number of data elements allocated
- * -EINVAL - a pointer was invalid
- * -ENOTSUP - we do not support allocation of the context
* -ERANGE - the requested memory size was invalid
* -ENOMEM - could not allocate the memory
- * -EADDRINUSE - the buffer memory was already allocated
*/
static int reader_changes_init(struct reader_changes *const changes,
const size_t size)
@@ -623,31 +621,34 @@ exit:
*
* Return:
* * 0 - no data ready
- * * POLLIN - state changes have been buffered
- * * -EBADF - the file descriptor did not have an attached reader
- * * -EINVAL - the IO control arguments were invalid
+ * * EPOLLIN | EPOLLRDNORM - state changes have been buffered
+ * * EPOLLHUP | EPOLLERR - IO control arguments were invalid or the file
+ * descriptor did not have an attached reader.
*/
static __poll_t reader_poll(struct file *const file,
struct poll_table_struct *const wait)
{
struct reader *reader;
struct reader_changes *changes;
+ __poll_t mask = 0;
if (unlikely(!file || !wait))
- return (__poll_t)-EINVAL;
+ return EPOLLHUP | EPOLLERR;
reader = file->private_data;
if (unlikely(!reader))
- return (__poll_t)-EBADF;
+ return EPOLLHUP | EPOLLERR;
changes = &reader->changes;
-
if (reader_changes_count(changes) >= changes->threshold)
- return POLLIN;
+ return EPOLLIN | EPOLLRDNORM;
poll_wait(file, &reader->wait_queue, wait);
- return (reader_changes_count(changes) > 0) ? POLLIN : 0;
+ if (reader_changes_count(changes) > 0)
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ return mask;
}
/* The file operations virtual function table */
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index b7c8a16..8d52689 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -21,8 +21,8 @@
#include "mali_kbase.h"
#include "mali_kbase_kinstr_prfcnt.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include "mali_malisw.h"
#include "mali_kbase_debug.h"
@@ -36,9 +36,15 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
/* The minimum allowed interval between dumps, in nanoseconds
* (equivalent to 10KHz)
*/
@@ -47,9 +53,6 @@
/* The maximum allowed buffers per client */
#define MAX_BUFFER_COUNT 32
-/* The module printing prefix */
-#define KINSTR_PRFCNT_PREFIX "mali_kbase_kinstr_prfcnt: "
-
/**
* struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware
* counters.
@@ -118,16 +121,31 @@ struct kbase_kinstr_prfcnt_client_config {
};
/**
- * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to
- * carry out for a kinstr_prfcnt_client.
- * @dump_work: Worker for performing asynchronous counter dumps.
- * @user_data: User data for asynchronous dump in progress.
- * @ts_end_ns: End timestamp of most recent async dump.
+ * enum kbase_kinstr_prfcnt_client_init_state - A list of
+ * initialisation states that the
+ * kinstr_prfcnt client can be at
+ * during initialisation. Useful
+ * for terminating a partially
+ * initialised client.
+ *
+ * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised
+ * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session
+ * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map
+ * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer
+ * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array
+ * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client
+ * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue
+ * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised
*/
-struct kbase_kinstr_prfcnt_async {
- struct work_struct dump_work;
- u64 user_data;
- u64 ts_end_ns;
+enum kbase_kinstr_prfcnt_client_init_state {
+ KINSTR_PRFCNT_UNINITIALISED,
+ KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED,
+ KINSTR_PRFCNT_ENABLE_MAP,
+ KINSTR_PRFCNT_DUMP_BUFFER,
+ KINSTR_PRFCNT_SAMPLE_ARRAY,
+ KINSTR_PRFCNT_VIRTUALIZER_CLIENT,
+ KINSTR_PRFCNT_WAITQ_MUTEX,
+ KINSTR_PRFCNT_INITIALISED
};
/**
@@ -137,9 +155,7 @@ struct kbase_kinstr_prfcnt_async {
* @hvcli: Hardware counter virtualizer client.
* @node: Node used to attach this client to list in
* kinstr_prfcnt context.
- * @cmd_sync_lock: Lock coordinating the reader interface for commands
- * that need interacting with the async sample dump
- * worker thread.
+ * @cmd_sync_lock: Lock coordinating the reader interface for commands.
* @next_dump_time_ns: Time in ns when this client's next periodic dump must
* occur. If 0, not a periodic client.
* @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
@@ -160,15 +176,10 @@ struct kbase_kinstr_prfcnt_async {
* @waitq: Client's notification queue.
* @sample_size: Size of the data required for one sample, in bytes.
* @sample_count: Number of samples the client is able to capture.
- * @sync_sample_count: Number of available spaces for synchronous samples.
- * It can differ from sample_count if asynchronous
- * sample requests are reserving space in the buffer.
* @user_data: User data associated with the session.
* This is set when the session is started and stopped.
* This value is ignored for control commands that
* provide another value.
- * @async: Asynchronous sampling operations to carry out in this
- * client's session.
*/
struct kbase_kinstr_prfcnt_client {
struct kbase_kinstr_prfcnt_context *kinstr_ctx;
@@ -189,9 +200,7 @@ struct kbase_kinstr_prfcnt_client {
wait_queue_head_t waitq;
size_t sample_size;
size_t sample_count;
- atomic_t sync_sample_count;
u64 user_data;
- struct kbase_kinstr_prfcnt_async async;
};
static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
@@ -224,8 +233,8 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
* @filp: Non-NULL pointer to file structure.
* @wait: Non-NULL pointer to poll table.
*
- * Return: POLLIN if data can be read without blocking, 0 if data can not be
- * read without blocking, else error code.
+ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if
+ * data can not be read without blocking, else EPOLLHUP | EPOLLERR.
*/
static __poll_t
kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
@@ -234,19 +243,19 @@ kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
struct kbase_kinstr_prfcnt_client *cli;
if (!filp || !wait)
- return (__poll_t)-EINVAL;
+ return EPOLLHUP | EPOLLERR;
cli = filp->private_data;
if (!cli)
- return (__poll_t)-EINVAL;
+ return EPOLLHUP | EPOLLERR;
poll_wait(filp, &cli->waitq, wait);
if (atomic_read(&cli->write_idx) != atomic_read(&cli->fetch_idx))
- return POLLIN;
+ return EPOLLIN | EPOLLRDNORM;
- return 0;
+ return (__poll_t)0;
}
/**
@@ -429,6 +438,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
size_t grp, blk, blk_inst;
struct prfcnt_metadata **ptr_md = block_meta_base;
const struct kbase_hwcnt_metadata *metadata;
+ uint8_t block_idx = 0;
if (!dst || !*block_meta_base)
return -EINVAL;
@@ -437,6 +447,10 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
u8 *dst_blk;
+ /* Block indices must be reported with no gaps. */
+ if (blk_inst == 0)
+ block_idx = 0;
+
/* Skip unavailable or non-enabled blocks */
if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
@@ -450,13 +464,14 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
kbase_hwcnt_metadata_block_type(metadata, grp,
blk));
- (*ptr_md)->u.block_md.block_idx = (u8)blk_inst;
+ (*ptr_md)->u.block_md.block_idx = block_idx;
(*ptr_md)->u.block_md.set = counter_set;
(*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN;
(*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr);
/* update the buf meta data block pointer to next item */
(*ptr_md)++;
+ block_idx++;
}
return 0;
@@ -509,33 +524,6 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata(
}
/**
- * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample
- * for output.
- * @cli: Non-NULL pointer to a kinstr_prfcnt client.
- * @buf_idx: The index to the sample array for saving the sample.
- */
-static void kbasep_kinstr_prfcnt_client_output_empty_sample(
- struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx)
-{
- struct kbase_hwcnt_dump_buffer *dump_buf;
- struct prfcnt_metadata *ptr_md;
-
- if (WARN_ON(buf_idx >= cli->sample_arr.sample_count))
- return;
-
- dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf;
- ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
-
- kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map);
-
- /* Use end timestamp from most recent async dump */
- ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns;
- ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns;
-
- kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md);
-}
-
-/**
* kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output.
* @cli: Non-NULL pointer to a kinstr_prfcnt client.
* @buf_idx: The index to the sample array for saving the sample.
@@ -584,16 +572,11 @@ static void kbasep_kinstr_prfcnt_client_output_sample(
* @cli: Non-NULL pointer to a kinstr_prfcnt client.
* @event_id: Event type that triggered the dump.
* @user_data: User data to return to the user.
- * @async_dump: Whether this is an asynchronous dump or not.
- * @empty_sample: Sample block data will be 0 if this is true.
*
* Return: 0 on success, else error code.
*/
-static int
-kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
- enum base_hwcnt_reader_event event_id,
- u64 user_data, bool async_dump,
- bool empty_sample)
+static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
+ enum base_hwcnt_reader_event event_id, u64 user_data)
{
int ret;
u64 ts_start_ns = 0;
@@ -611,17 +594,11 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
/* Check if there is a place to copy HWC block into. Calculate the
* number of available samples count, by taking into account the type
* of dump.
- * Asynchronous dumps have the ability to reserve space in the samples
- * array for future dumps, unlike synchronous dumps. Because of that,
- * the samples count for synchronous dumps is managed by a variable
- * called sync_sample_count, that originally is defined as equal to the
- * size of the whole array but later decreases every time an
- * asynchronous dump request is pending and then re-increased every
- * time an asynchronous dump request is completed.
*/
- available_samples_count = async_dump ?
- cli->sample_arr.sample_count :
- atomic_read(&cli->sync_sample_count);
+ available_samples_count = cli->sample_arr.sample_count;
+ WARN_ON(available_samples_count < 1);
+ /* Reserve one slot to store the implicit sample taken on CMD_STOP */
+ available_samples_count -= 1;
if (write_idx - read_idx == available_samples_count) {
/* For periodic sampling, the current active dump
* will be accumulated in the next sample, when
@@ -637,38 +614,19 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
*/
write_idx %= cli->sample_arr.sample_count;
- if (!empty_sample) {
- ret = kbase_hwcnt_virtualizer_client_dump(
- cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf);
- /* HWC dump error, set the sample with error flag */
- if (ret)
- cli->sample_flags |= SAMPLE_FLAG_ERROR;
-
- /* Make the sample ready and copy it to the userspace mapped buffer */
- kbasep_kinstr_prfcnt_client_output_sample(
- cli, write_idx, user_data, ts_start_ns, ts_end_ns);
- } else {
- if (!async_dump) {
- struct prfcnt_metadata *ptr_md;
- /* User data will not be updated for empty samples. */
- ptr_md = cli->sample_arr.samples[write_idx].sample_meta;
- ptr_md->u.sample_md.user_data = user_data;
- }
+ ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns,
+ &cli->tmp_buf);
+ /* HWC dump error, set the sample with error flag */
+ if (ret)
+ cli->sample_flags |= SAMPLE_FLAG_ERROR;
- /* Make the sample ready and copy it to the userspace mapped buffer */
- kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx);
- }
+ /* Make the sample ready and copy it to the userspace mapped buffer */
+ kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns,
+ ts_end_ns);
/* Notify client. Make sure all changes to memory are visible. */
wmb();
atomic_inc(&cli->write_idx);
- if (async_dump) {
- /* Remember the end timestamp of async dump for empty samples */
- if (!empty_sample)
- cli->async.ts_end_ns = ts_end_ns;
-
- atomic_inc(&cli->sync_sample_count);
- }
wake_up_interruptible(&cli->waitq);
/* Reset the flags for the next sample dump */
cli->sample_flags = 0;
@@ -682,6 +640,9 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
{
int ret;
u64 tm_start, tm_end;
+ unsigned int write_idx;
+ unsigned int read_idx;
+ size_t available_samples_count;
WARN_ON(!cli);
lockdep_assert_held(&cli->cmd_sync_lock);
@@ -690,6 +651,16 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
if (cli->active)
return 0;
+ write_idx = atomic_read(&cli->write_idx);
+ read_idx = atomic_read(&cli->read_idx);
+
+ /* Check whether there is space to store atleast an implicit sample
+ * corresponding to CMD_STOP.
+ */
+ available_samples_count = cli->sample_count - (write_idx - read_idx);
+ if (!available_samples_count)
+ return -EBUSY;
+
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
&cli->config.phys_em);
@@ -702,7 +673,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL);
if (!ret) {
- atomic_set(&cli->sync_sample_count, cli->sample_count);
cli->active = true;
cli->user_data = user_data;
cli->sample_flags = 0;
@@ -716,16 +686,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
return ret;
}
-static int kbasep_kinstr_prfcnt_client_wait_async_done(
- struct kbase_kinstr_prfcnt_client *cli)
-{
- lockdep_assert_held(&cli->cmd_sync_lock);
-
- return wait_event_interruptible(cli->waitq,
- atomic_read(&cli->sync_sample_count) ==
- cli->sample_count);
-}
-
static int
kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
u64 user_data)
@@ -734,7 +694,7 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
u64 tm_start = 0;
u64 tm_end = 0;
struct kbase_hwcnt_physical_enable_map phys_em;
- struct kbase_hwcnt_dump_buffer *tmp_buf = NULL;
+ size_t available_samples_count;
unsigned int write_idx;
unsigned int read_idx;
@@ -745,12 +705,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
if (!cli->active)
return -EINVAL;
- /* Wait until pending async sample operation done */
- ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
-
- if (ret < 0)
- return -ERESTARTSYS;
+ mutex_lock(&cli->kinstr_ctx->lock);
+ /* Disable counters under the lock, so we do not race with the
+ * sampling thread.
+ */
phys_em.fe_bm = 0;
phys_em.tiler_bm = 0;
phys_em.mmu_l2_bm = 0;
@@ -758,15 +717,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
- mutex_lock(&cli->kinstr_ctx->lock);
-
/* Check whether one has the buffer to hold the last sample */
write_idx = atomic_read(&cli->write_idx);
read_idx = atomic_read(&cli->read_idx);
- /* Check if there is a place to save the last stop produced sample */
- if (write_idx - read_idx < cli->sample_arr.sample_count)
- tmp_buf = &cli->tmp_buf;
+ available_samples_count = cli->sample_count - (write_idx - read_idx);
ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli,
&cli->enable_map,
@@ -776,7 +731,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
if (ret)
cli->sample_flags |= SAMPLE_FLAG_ERROR;
- if (tmp_buf) {
+ /* There must be a place to save the last stop produced sample */
+ if (!WARN_ON(!available_samples_count)) {
write_idx %= cli->sample_arr.sample_count;
/* Handle the last stop sample */
kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
@@ -806,7 +762,6 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli,
u64 user_data)
{
int ret;
- bool empty_sample = false;
lockdep_assert_held(&cli->cmd_sync_lock);
@@ -814,90 +769,9 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli,
if (!cli->active || cli->dump_interval_ns)
return -EINVAL;
- /* Wait until pending async sample operation done, this is required to
- * satisfy the stated sample sequence following their issuing order,
- * reflected by the sample start timestamp.
- */
- if (atomic_read(&cli->sync_sample_count) != cli->sample_count) {
- /* Return empty sample instead of performing real dump.
- * As there is an async dump currently in-flight which will
- * have the desired information.
- */
- empty_sample = true;
- ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
-
- if (ret < 0)
- return -ERESTARTSYS;
- }
-
mutex_lock(&cli->kinstr_ctx->lock);
- ret = kbasep_kinstr_prfcnt_client_dump(cli,
- BASE_HWCNT_READER_EVENT_MANUAL,
- user_data, false, empty_sample);
-
- mutex_unlock(&cli->kinstr_ctx->lock);
-
- return ret;
-}
-
-static int
-kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
- u64 user_data)
-{
- unsigned int write_idx;
- unsigned int read_idx;
- unsigned int active_async_dumps;
- unsigned int new_async_buf_idx;
- int ret;
-
- lockdep_assert_held(&cli->cmd_sync_lock);
-
- /* If the client is not started, or not manual, the command invalid */
- if (!cli->active || cli->dump_interval_ns)
- return -EINVAL;
-
- mutex_lock(&cli->kinstr_ctx->lock);
-
- write_idx = atomic_read(&cli->write_idx);
- read_idx = atomic_read(&cli->read_idx);
- active_async_dumps =
- cli->sample_count - atomic_read(&cli->sync_sample_count);
- new_async_buf_idx = write_idx + active_async_dumps;
-
- /* Check if there is a place to copy HWC block into.
- * If successful, reserve space in the buffer for the asynchronous
- * operation to make sure that it can actually take place.
- * Because we reserve space for asynchronous dumps we need to take that
- * in consideration here.
- */
- ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ?
- -EBUSY :
- 0;
-
- if (ret == -EBUSY) {
- mutex_unlock(&cli->kinstr_ctx->lock);
- return ret;
- }
-
- if (active_async_dumps > 0) {
- struct prfcnt_metadata *ptr_md;
- unsigned int buf_idx =
- new_async_buf_idx % cli->sample_arr.sample_count;
- /* Instead of storing user_data, write it directly to future
- * empty sample.
- */
- ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
- ptr_md->u.sample_md.user_data = user_data;
-
- atomic_dec(&cli->sync_sample_count);
- } else {
- cli->async.user_data = user_data;
- atomic_dec(&cli->sync_sample_count);
-
- kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt,
- &cli->async.dump_work);
- }
+ ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data);
mutex_unlock(&cli->kinstr_ctx->lock);
@@ -957,10 +831,6 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
ret = kbasep_kinstr_prfcnt_client_sync_dump(
cli, control_cmd->user_data);
break;
- case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC:
- ret = kbasep_kinstr_prfcnt_client_async_dump(
- cli, control_cmd->user_data);
- break;
case PRFCNT_CONTROL_CMD_DISCARD:
ret = kbasep_kinstr_prfcnt_client_discard(cli);
break;
@@ -1015,17 +885,6 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
sample_meta = cli->sample_arr.samples[read_idx].sample_meta;
sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf;
- /* Verify that a valid sample has been dumped in the read_idx.
- * There are situations where this may not be the case,
- * for instance if the client is trying to get an asynchronous
- * sample which has not been dumped yet.
- */
- if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE ||
- sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) {
- err = -EINVAL;
- goto error_out;
- }
-
sample_access->sequence = sample_meta->u.sample_md.seq;
sample_access->sample_offset_bytes = sample_offset_bytes;
@@ -1172,22 +1031,49 @@ static void kbasep_kinstr_prfcnt_sample_array_free(
memset(sample_arr, 0, sizeof(*sample_arr));
}
-#if !MALI_KERNEL_TEST_API
-static
-#endif
-void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+static void
+kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli,
+ enum kbase_kinstr_prfcnt_client_init_state init_state)
{
if (!cli)
return;
- kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
- kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
- kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
- kbase_hwcnt_enable_map_free(&cli->enable_map);
- mutex_destroy(&cli->cmd_sync_lock);
+ while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) {
+ switch (init_state) {
+ case KINSTR_PRFCNT_INITIALISED:
+ /* This shouldn't be reached */
+ break;
+ case KINSTR_PRFCNT_WAITQ_MUTEX:
+ mutex_destroy(&cli->cmd_sync_lock);
+ break;
+ case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
+ kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
+ break;
+ case KINSTR_PRFCNT_SAMPLE_ARRAY:
+ kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
+ break;
+ case KINSTR_PRFCNT_DUMP_BUFFER:
+ kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
+ break;
+ case KINSTR_PRFCNT_ENABLE_MAP:
+ kbase_hwcnt_enable_map_free(&cli->enable_map);
+ break;
+ case KINSTR_PRFCNT_PARSE_SETUP:
+ /* Nothing to do here */
+ break;
+ }
+ }
kfree(cli);
}
+#if !MALI_KERNEL_TEST_API
+static
+#endif
+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+{
+ kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED);
+}
+
/**
* kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release.
* @inode: Non-NULL pointer to inode structure.
@@ -1294,9 +1180,8 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
list_for_each_entry(pos, &kinstr_ctx->clients, node) {
if (pos->active && (pos->next_dump_time_ns != 0) &&
(pos->next_dump_time_ns < cur_time_ns))
- kbasep_kinstr_prfcnt_client_dump(
- pos, BASE_HWCNT_READER_EVENT_PERIODIC,
- pos->user_data, false, false);
+ kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC,
+ pos->user_data);
}
kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx);
@@ -1305,48 +1190,6 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
}
/**
- * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client
- * to take a single asynchronous
- * sample.
- * @work: Work structure.
- */
-static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work)
-{
- struct kbase_kinstr_prfcnt_async *cli_async =
- container_of(work, struct kbase_kinstr_prfcnt_async, dump_work);
- struct kbase_kinstr_prfcnt_client *cli = container_of(
- cli_async, struct kbase_kinstr_prfcnt_client, async);
-
- mutex_lock(&cli->kinstr_ctx->lock);
- /* While the async operation is in flight, a sync stop might have been
- * executed, for which the dump should be skipped. Further as we are
- * doing an async dump, we expect that there is reserved buffer for
- * this to happen. This is to avoid the rare corner case where the
- * user side has issued a stop/start pair before the async work item
- * get the chance to execute.
- */
- if (cli->active &&
- (atomic_read(&cli->sync_sample_count) < cli->sample_count))
- kbasep_kinstr_prfcnt_client_dump(cli,
- BASE_HWCNT_READER_EVENT_MANUAL,
- cli->async.user_data, true,
- false);
-
- /* While the async operation is in flight, more async dump requests
- * may have been submitted. In this case, no more async dumps work
- * will be queued. Instead space will be reserved for that dump and
- * an empty sample will be return after handling the current async
- * dump.
- */
- while (cli->active &&
- (atomic_read(&cli->sync_sample_count) < cli->sample_count)) {
- kbasep_kinstr_prfcnt_client_dump(
- cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true);
- }
- mutex_unlock(&cli->kinstr_ctx->lock);
-}
-
-/**
* kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for
* execution as soon as possible.
* @timer: Timer structure.
@@ -1808,83 +1651,100 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
{
int err;
struct kbase_kinstr_prfcnt_client *cli;
+ enum kbase_kinstr_prfcnt_client_init_state init_state;
- WARN_ON(!kinstr_ctx);
- WARN_ON(!setup);
- WARN_ON(!req_arr);
+ if (WARN_ON(!kinstr_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(!setup))
+ return -EINVAL;
+
+ if (WARN_ON(!req_arr))
+ return -EINVAL;
cli = kzalloc(sizeof(*cli), GFP_KERNEL);
if (!cli)
return -ENOMEM;
- cli->kinstr_ctx = kinstr_ctx;
- err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr);
+ for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED;
+ init_state++) {
+ err = 0;
+ switch (init_state) {
+ case KINSTR_PRFCNT_PARSE_SETUP:
+ cli->kinstr_ctx = kinstr_ctx;
+ err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config,
+ req_arr);
- if (err < 0)
- goto error;
-
- cli->config.buffer_count = MAX_BUFFER_COUNT;
- cli->dump_interval_ns = cli->config.period_ns;
- cli->next_dump_time_ns = 0;
- cli->active = false;
- atomic_set(&cli->write_idx, 0);
- atomic_set(&cli->read_idx, 0);
- atomic_set(&cli->fetch_idx, 0);
+ break;
- err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata,
- &cli->enable_map);
+ case KINSTR_PRFCNT_ENABLE_MAP:
+ cli->config.buffer_count = MAX_BUFFER_COUNT;
+ cli->dump_interval_ns = cli->config.period_ns;
+ cli->next_dump_time_ns = 0;
+ cli->active = false;
+ atomic_set(&cli->write_idx, 0);
+ atomic_set(&cli->read_idx, 0);
+ atomic_set(&cli->fetch_idx, 0);
- if (err < 0)
- goto error;
+ err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map);
+ break;
- kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em);
+ case KINSTR_PRFCNT_DUMP_BUFFER:
+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
+ &cli->config.phys_em);
- cli->sample_count = cli->config.buffer_count;
- atomic_set(&cli->sync_sample_count, cli->sample_count);
- cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
+ cli->sample_count = cli->config.buffer_count;
+ cli->sample_size =
+ kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
- /* Use virtualizer's metadata to alloc tmp buffer which interacts with
- * the HWC virtualizer.
- */
- err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata,
- &cli->tmp_buf);
+ /* Use virtualizer's metadata to alloc tmp buffer which interacts with
+ * the HWC virtualizer.
+ */
+ err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf);
+ break;
- if (err < 0)
- goto error;
+ case KINSTR_PRFCNT_SAMPLE_ARRAY:
+ /* Disable clock map in setup, and enable clock map when start */
+ cli->enable_map.clk_enable_map = 0;
- /* Disable clock map in setup, and enable clock map when start */
- cli->enable_map.clk_enable_map = 0;
+ /* Use metadata from virtualizer to allocate dump buffers if
+ * kinstr_prfcnt doesn't have the truncated metadata.
+ */
+ err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
- /* Use metadata from virtualizer to allocate dump buffers if
- * kinstr_prfcnt doesn't have the truncated metadata.
- */
- err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
+ break;
- if (err < 0)
- goto error;
+ case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
+ /* Set enable map to be 0 to prevent virtualizer to init and kick the
+ * backend to count.
+ */
+ kbase_hwcnt_gpu_enable_map_from_physical(
+ &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 });
- /* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */
- kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
- &(struct kbase_hwcnt_physical_enable_map){ 0 });
+ err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt,
+ &cli->enable_map, &cli->hvcli);
+ break;
- err = kbase_hwcnt_virtualizer_client_create(
- kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli);
+ case KINSTR_PRFCNT_WAITQ_MUTEX:
+ init_waitqueue_head(&cli->waitq);
+ mutex_init(&cli->cmd_sync_lock);
+ break;
- if (err < 0)
- goto error;
+ case KINSTR_PRFCNT_INITIALISED:
+ /* This shouldn't be reached */
+ break;
+ }
- init_waitqueue_head(&cli->waitq);
- INIT_WORK(&cli->async.dump_work,
- kbasep_kinstr_prfcnt_async_dump_worker);
- mutex_init(&cli->cmd_sync_lock);
+ if (err < 0) {
+ kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state);
+ return err;
+ }
+ }
*out_vcli = cli;
return 0;
-error:
- kbasep_kinstr_prfcnt_client_destroy(cli);
- return err;
}
static size_t kbasep_kinstr_prfcnt_get_block_info_count(
@@ -2106,17 +1966,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
union kbase_ioctl_kinstr_prfcnt_setup *setup)
{
int err;
- unsigned int item_count;
- unsigned long bytes;
- struct prfcnt_request_item *req_arr;
+ size_t item_count;
+ size_t bytes;
+ struct prfcnt_request_item *req_arr = NULL;
struct kbase_kinstr_prfcnt_client *cli = NULL;
+ const size_t max_bytes = 32 * sizeof(*req_arr);
if (!kinstr_ctx || !setup)
return -EINVAL;
item_count = setup->in.request_item_count;
- /* Limiting the request items to 2x of the expected: acommodating
+ /* Limiting the request items to 2x of the expected: accommodating
* moderate duplications but rejecting excessive abuses.
*/
if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) ||
@@ -2124,7 +1985,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
return -EINVAL;
}
- bytes = item_count * sizeof(*req_arr);
+ if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes))
+ return -EINVAL;
+
+ /* Further limiting the max bytes to copy from userspace by setting it in the following
+ * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items,
+ * each currently at the size of prfcnt_request_item.
+ *
+ * Note: if more request types get added, this max limit needs to be updated.
+ */
+ if (bytes > max_bytes)
+ return -EINVAL;
+
req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes);
if (IS_ERR(req_arr))
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h
index e834926..e8e9664 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.h
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h
@@ -26,7 +26,7 @@
#ifndef _KBASE_KINSTR_PRFCNT_H_
#define _KBASE_KINSTR_PRFCNT_H_
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
struct kbase_kinstr_prfcnt_context;
@@ -81,6 +81,7 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx);
#if MALI_KERNEL_TEST_API
+
/**
* kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types
* with their information.
diff --git a/mali_kbase/mali_kbase_linux.h b/mali_kbase/mali_kbase_linux.h
index 1d8d196..e5c6f7a 100644
--- a/mali_kbase/mali_kbase_linux.h
+++ b/mali_kbase/mali_kbase_linux.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,7 @@
#include <linux/module.h>
#include <linux/atomic.h>
-#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+#if IS_ENABLED(MALI_KERNEL_TEST_API)
#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
#else
#define KBASE_EXPORT_TEST_API(func)
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 7c09772..8912783 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,6 +44,11 @@
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_trace_gpu_mem.h>
+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+
/*
* Alignment of objects allocated by the GPU inside a just-in-time memory
* region whose size is given by an end address
@@ -66,6 +71,7 @@
*/
#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/* Forward declarations */
static void free_partial_locked(struct kbase_context *kctx,
@@ -89,10 +95,8 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
#error "Unknown CPU VA width for this architecture"
#endif
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (kbase_ctx_compat_mode(kctx))
cpu_va_bits = 32;
-#endif
return cpu_va_bits;
}
@@ -127,18 +131,14 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
else {
u64 same_va_end;
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif /* CONFIG_64BIT */
+ if (kbase_ctx_compat_mode(kctx)) {
same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
} else {
struct kbase_reg_zone *same_va_zone =
kbase_ctx_reg_zone_get(kctx,
KBASE_REG_ZONE_SAME_VA);
same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
}
-#endif /* CONFIG_64BIT */
if (gpu_pfn >= same_va_end)
rbtree = &kctx->reg_rbtree_custom;
@@ -430,24 +430,23 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
next->nr_pages += reg->nr_pages;
rb_erase(&(reg->rblink), reg_rbtree);
merged_back = 1;
- if (merged_front) {
- /* We already merged with prev, free it */
- kfree(reg);
- }
}
}
- /* If we failed to merge then we need to add a new block */
- if (!(merged_front || merged_back)) {
+ if (merged_front && merged_back) {
+ /* We already merged with prev, free it */
+ kfree(reg);
+ } else if (!(merged_front || merged_back)) {
+ /* If we failed to merge then we need to add a new block */
+
/*
* We didn't merge anything. Try to add a new free
* placeholder, and in any case, remove the original one.
*/
struct kbase_va_region *free_reg;
- free_reg = kbase_alloc_free_region(reg_rbtree,
- reg->start_pfn, reg->nr_pages,
- reg->flags & KBASE_REG_ZONE_MASK);
+ free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
+ reg->flags & KBASE_REG_ZONE_MASK);
if (!free_reg) {
/* In case of failure, we cannot allocate a replacement
* free region, so we will be left with a 'gap' in the
@@ -494,6 +493,7 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
* kbase_insert_va_region_nolock - Insert a VA region to the list,
* replacing the existing one.
*
+ * @kbdev: The kbase device
* @new_reg: The new region to insert
* @at_reg: The region to replace
* @start_pfn: The Page Frame Number to insert at
@@ -501,8 +501,10 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
*
* Return: 0 on success, error code otherwise.
*/
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
- struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
+ struct kbase_va_region *new_reg,
+ struct kbase_va_region *at_reg, u64 start_pfn,
+ size_t nr_pages)
{
struct rb_root *reg_rbtree = NULL;
int err = 0;
@@ -546,10 +548,9 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
else {
struct kbase_va_region *new_front_reg;
- new_front_reg = kbase_alloc_free_region(reg_rbtree,
- at_reg->start_pfn,
- start_pfn - at_reg->start_pfn,
- at_reg->flags & KBASE_REG_ZONE_MASK);
+ new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
+ start_pfn - at_reg->start_pfn,
+ at_reg->flags & KBASE_REG_ZONE_MASK);
if (new_front_reg) {
at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -686,8 +687,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
goto exit;
}
- err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
- nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
if (err) {
dev_warn(dev, "Failed to insert va region");
err = -ENOMEM;
@@ -712,8 +712,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
nr_pages, align_offset, align_mask,
&start_pfn);
if (tmp) {
- err = kbase_insert_va_region_nolock(reg, tmp,
- start_pfn, nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
if (unlikely(err)) {
dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
start_pfn, nr_pages);
@@ -807,6 +806,40 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
}
#endif /* MALI_USE_CSF */
+static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
+{
+ struct kbase_context *kctx = NULL;
+ struct rb_root *rbtree = reg->rbtree;
+
+ switch (reg->flags & KBASE_REG_ZONE_MASK) {
+ case KBASE_REG_ZONE_CUSTOM_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
+ break;
+ case KBASE_REG_ZONE_SAME_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
+ break;
+ case KBASE_REG_ZONE_EXEC_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
+ break;
+#if MALI_USE_CSF
+ case KBASE_REG_ZONE_EXEC_FIXED_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
+ break;
+ case KBASE_REG_ZONE_FIXED_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
+ break;
+ case KBASE_REG_ZONE_MCU_SHARED:
+ /* This is only expected to be called on driver unload. */
+ break;
+#endif
+ default:
+ WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+ break;
+ }
+
+ return kctx;
+}
+
static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
{
struct rb_node *rbnode;
@@ -817,7 +850,9 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
if (rbnode) {
rb_erase(rbnode, rbtree);
reg = rb_entry(rbnode, struct kbase_va_region, rblink);
- WARN_ON(reg->va_refcnt != 1);
+ WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
+ if (kbase_page_migration_enabled)
+ kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
/* Reset the start_pfn - as the rbtree is being
* destroyed and we've already erased this region, there
* is no further need to attempt to remove it.
@@ -901,9 +936,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
#endif
/* all have SAME_VA */
- same_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
- same_va_pages, KBASE_REG_ZONE_SAME_VA);
+ same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
+ same_va_pages, KBASE_REG_ZONE_SAME_VA);
if (!same_va_reg) {
err = -ENOMEM;
@@ -912,10 +946,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
same_va_pages);
-#if IS_ENABLED(CONFIG_64BIT)
- /* 32-bit clients have custom VA zones */
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
err = -EINVAL;
goto fail_free_same_va;
@@ -927,10 +958,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
- custom_va_reg = kbase_alloc_free_region(
- &kctx->reg_rbtree_custom,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
+ KBASE_REG_ZONE_CUSTOM_VA_BASE,
+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
if (!custom_va_reg) {
err = -ENOMEM;
@@ -939,11 +969,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
KBASE_REG_ZONE_CUSTOM_VA_BASE,
custom_va_size);
-#if IS_ENABLED(CONFIG_64BIT)
} else {
custom_va_size = 0;
}
-#endif
#if MALI_USE_CSF
/* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
@@ -954,17 +982,15 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
*/
fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (kbase_ctx_compat_mode(kctx)) {
exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
}
-#endif
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
KBASE_REG_ZONE_EXEC_VA_SIZE);
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base,
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
if (!exec_va_reg) {
@@ -978,8 +1004,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
exec_fixed_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base,
- KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
+ kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
+ exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
KBASE_REG_ZONE_EXEC_FIXED_VA);
if (!exec_fixed_va_reg) {
@@ -992,7 +1018,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
- fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base,
+ fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
kctx->gpu_va_end = fixed_va_end;
@@ -1131,7 +1157,6 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
return false;
}
-#if IS_ENABLED(CONFIG_64BIT)
static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
u64 jit_va_pages)
{
@@ -1180,9 +1205,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
* Create a custom VA zone at the end of the VA for allocations which
* JIT can use so it doesn't have to allocate VA from the kernel.
*/
- custom_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
- jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
+ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
/*
* The context will be destroyed if we fail here so no point
@@ -1199,7 +1223,6 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
kbase_region_tracker_insert(custom_va_reg);
return 0;
}
-#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
int max_allocations, int trim_level, int group_id,
@@ -1240,10 +1263,8 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (!kbase_ctx_compat_mode(kctx))
err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
-#endif
/*
* Nothing to do for 32-bit clients, JIT uses the existing
* custom VA zone.
@@ -1319,17 +1340,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
/* 32-bit client: take from CUSTOM_VA zone */
target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
-#if IS_ENABLED(CONFIG_64BIT)
} else {
/* 64-bit client: take from SAME_VA zone */
target_zone_bits = KBASE_REG_ZONE_SAME_VA;
}
-#endif
+
target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
@@ -1357,10 +1375,8 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
/* Taken from the end of the target zone */
exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
- exec_va_start,
- exec_va_pages,
- KBASE_REG_ZONE_EXEC_VA);
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
+ exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
if (!exec_va_reg) {
err = -ENOMEM;
goto exit_unlock;
@@ -1403,10 +1419,9 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
kbdev->csf.shared_reg_rbtree = RB_ROOT;
- shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
- shared_reg_start_pfn,
- shared_reg_size,
- KBASE_REG_ZONE_MCU_SHARED);
+ shared_reg =
+ kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
+ shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
if (!shared_reg)
return -ENOMEM;
@@ -1415,10 +1430,30 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
}
#endif
+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
+{
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
+ kbdev->pagesize_2mb = true;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
+ dev_warn(
+ kbdev->dev,
+ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
+ }
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+ kbdev->pagesize_2mb = false;
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+ /* Set it to the default based on which GPU is present */
+ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+}
+
int kbase_mem_init(struct kbase_device *kbdev)
{
int err = 0;
struct kbasep_mem_device *memdev;
+ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
#if IS_ENABLED(CONFIG_OF)
struct device_node *mgm_node = NULL;
#endif
@@ -1427,6 +1462,20 @@ int kbase_mem_init(struct kbase_device *kbdev)
memdev = &kbdev->memdev;
+ kbasep_mem_page_size_init(kbdev);
+
+ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
+ kbdev->devname);
+
+ /* Initialize slab cache for kbase_va_regions */
+ kbdev->va_region_slab =
+ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
+ if (kbdev->va_region_slab == NULL) {
+ dev_err(kbdev->dev, "Failed to create va_region_slab\n");
+ return -ENOMEM;
+ }
+
+ kbase_mem_migrate_init(kbdev);
kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -1490,8 +1539,7 @@ int kbase_mem_init(struct kbase_device *kbdev)
kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KBDEV);
- err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev,
- &mem_pool_defaults, NULL);
+ err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL);
}
return err;
@@ -1517,6 +1565,11 @@ void kbase_mem_term(struct kbase_device *kbdev)
kbase_mem_pool_group_term(&kbdev->mem_pools);
+ kbase_mem_migrate_term(kbdev);
+
+ kmem_cache_destroy(kbdev->va_region_slab);
+ kbdev->va_region_slab = NULL;
+
WARN_ON(kbdev->total_gpu_pages);
WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
@@ -1530,6 +1583,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
/**
* kbase_alloc_free_region - Allocate a free region object.
*
+ * @kbdev: kbase device
* @rbtree: Backlink to the red-black tree of memory regions.
* @start_pfn: The Page Frame Number in GPU virtual address space.
* @nr_pages: The size of the region in pages.
@@ -1542,8 +1596,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
*
* Return: pointer to the allocated region object on success, NULL otherwise.
*/
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone)
{
struct kbase_va_region *new_reg;
@@ -1555,13 +1609,13 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
- new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+ new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
if (!new_reg)
return NULL;
- new_reg->va_refcnt = 1;
- new_reg->no_user_free_refcnt = 0;
+ kbase_refcount_set(&new_reg->va_refcnt, 1);
+ atomic_set(&new_reg->no_user_free_count, 0);
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
new_reg->rbtree = rbtree;
@@ -1580,41 +1634,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
-static struct kbase_context *kbase_reg_flags_to_kctx(
- struct kbase_va_region *reg)
-{
- struct kbase_context *kctx = NULL;
- struct rb_root *rbtree = reg->rbtree;
-
- switch (reg->flags & KBASE_REG_ZONE_MASK) {
- case KBASE_REG_ZONE_CUSTOM_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_custom);
- break;
- case KBASE_REG_ZONE_SAME_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_same);
- break;
- case KBASE_REG_ZONE_EXEC_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_exec);
- break;
-#if MALI_USE_CSF
- case KBASE_REG_ZONE_EXEC_FIXED_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
- break;
- case KBASE_REG_ZONE_FIXED_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
- break;
-#endif
- default:
- WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
- break;
- }
-
- return kctx;
-}
-
/**
* kbase_free_alloced_region - Free a region object.
*
@@ -1626,6 +1645,7 @@ static struct kbase_context *kbase_reg_flags_to_kctx(
* alloc object will be released.
* It is a bug if no alloc object exists for non-free regions.
*
+ * If region is KBASE_REG_ZONE_MCU_SHARED it is freed
*/
void kbase_free_alloced_region(struct kbase_va_region *reg)
{
@@ -1649,6 +1669,13 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
(void *)reg);
#if MALI_USE_CSF
if (reg->flags & KBASE_REG_CSF_EVENT)
+ /*
+ * This should not be reachable if called from 'mcu_shared' functions
+ * such as:
+ * kbase_csf_firmware_mcu_shared_mapping_init
+ * kbase_csf_firmware_mcu_shared_mapping_term
+ */
+
kbase_unlink_event_mem_page(kctx, reg);
#endif
@@ -1748,16 +1775,13 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
for (i = 0; i < alloc->imported.alias.nents; i++) {
if (alloc->imported.alias.aliased[i].alloc) {
- err = kbase_mmu_insert_pages(
- kctx->kbdev, &kctx->mmu,
- reg->start_pfn + (i * stride),
- alloc->imported.alias.aliased[i]
- .alloc->pages +
- alloc->imported.alias.aliased[i]
- .offset,
+ err = kbase_mmu_insert_aliased_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+ alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset,
alloc->imported.alias.aliased[i].length,
- reg->flags & gwt_mask, kctx->as_nr,
- group_id, mmu_sync_info);
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
+ NULL);
if (err)
goto bad_aliased_insert;
@@ -1765,24 +1789,32 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
* creation time
*/
} else {
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + i * stride,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_aliased_page(
+ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
- (reg->flags & mask & gwt_mask) | attr,
- group_id, mmu_sync_info);
+ (reg->flags & mask & gwt_mask) | attr, group_id,
+ mmu_sync_info);
if (err)
goto bad_aliased_insert;
}
}
} else {
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- group_id, mmu_sync_info);
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+
+ err = kbase_mmu_insert_imported_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
+ } else {
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id,
+ mmu_sync_info, reg, true);
+ }
+
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1792,9 +1824,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
!WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
- /* For padded imported dma-buf memory, map the dummy aliasing
- * page from the end of the dma-buf pages, to the end of the
- * region using a read only mapping.
+ /* For padded imported dma-buf or user-buf memory, map the dummy
+ * aliasing page from the end of the imported pages, to the end of
+ * the region using a read only mapping.
*
* Only map when it's imported dma-buf memory that is currently
* mapped.
@@ -1802,12 +1834,11 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
* Assume reg->gpu_alloc->nents is the number of actual pages
* in the dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + reg->gpu_alloc->nents,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
reg->nr_pages - reg->gpu_alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK, mmu_sync_info);
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_insert;
}
@@ -1825,7 +1856,8 @@ bad_aliased_insert:
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
phys_alloc, alloc->imported.alias.aliased[i].length,
- kctx->as_nr);
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
+ false);
}
bad_insert:
kbase_remove_va_region(kctx->kbdev, reg);
@@ -1855,7 +1887,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS: {
size_t i = 0;
-
/* Due to the way the number of valid PTEs and ATEs are tracked
* currently, only the GPU virtual range that is backed & mapped
* should be passed to the kbase_mmu_teardown_pages() function,
@@ -1874,21 +1905,49 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
kctx->kbdev, &kctx->mmu,
reg->start_pfn + (i * alloc->imported.alias.stride),
phys_alloc, alloc->imported.alias.aliased[i].length,
- kctx->as_nr);
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
+ false);
if (WARN_ON_ONCE(err_loop))
err = err_loop;
}
}
break;
- case KBASE_MEM_TYPE_IMPORTED_UMM:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, reg->nr_pages, kctx->as_nr);
+ case KBASE_MEM_TYPE_IMPORTED_UMM: {
+ size_t nr_phys_pages = reg->nr_pages;
+ size_t nr_virt_pages = reg->nr_pages;
+ /* If the region has import padding and falls under the threshold for
+ * issuing a partial GPU cache flush, we want to reduce the number of
+ * physical pages that get flushed.
+
+ * This is symmetric with case of mapping the memory, which first maps
+ * each imported physical page to a separate virtual page, and then
+ * maps the single aliasing sink page to each of the virtual padding
+ * pages.
+ */
+ if (reg->flags & KBASE_REG_IMPORT_PAD)
+ nr_phys_pages = alloc->nents + 1;
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_phys_pages, nr_virt_pages,
+ kctx->as_nr, true);
+ }
break;
- default:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, kbase_reg_current_backed_size(reg),
- kctx->as_nr);
+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr, true);
+ }
+ break;
+ default: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr, false);
+ }
break;
}
@@ -2032,7 +2091,8 @@ void kbase_sync_single(struct kbase_context *kctx,
BUG_ON(!cpu_page);
BUG_ON(offset + size > PAGE_SIZE);
- dma_addr = kbase_dma_addr(cpu_page) + offset;
+ dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset;
+
if (sync_fn == KBASE_SYNC_TO_CPU)
dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
size, DMA_BIDIRECTIONAL);
@@ -2043,29 +2103,30 @@ void kbase_sync_single(struct kbase_context *kctx,
void *src = NULL;
void *dst = NULL;
struct page *gpu_page;
+ dma_addr_t dma_addr;
if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
return;
gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+ dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
if (sync_fn == KBASE_SYNC_TO_DEVICE) {
src = ((unsigned char *)kmap(cpu_page)) + offset;
dst = ((unsigned char *)kmap(gpu_page)) + offset;
} else if (sync_fn == KBASE_SYNC_TO_CPU) {
- dma_sync_single_for_cpu(kctx->kbdev->dev,
- kbase_dma_addr(gpu_page) + offset,
- size, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
+ DMA_BIDIRECTIONAL);
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
if (sync_fn == KBASE_SYNC_TO_DEVICE)
- dma_sync_single_for_device(kctx->kbdev->dev,
- kbase_dma_addr(gpu_page) + offset,
- size, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
+ DMA_BIDIRECTIONAL);
}
}
@@ -2211,7 +2272,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
__func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
- if (kbase_va_region_is_no_user_free(kctx, reg)) {
+ if (kbase_va_region_is_no_user_free(reg)) {
dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
return -EINVAL;
}
@@ -2432,7 +2493,7 @@ int kbase_update_region_flags(struct kbase_context *kctx,
if (flags & BASEP_MEM_NO_USER_FREE) {
kbase_gpu_vm_lock(kctx);
- kbase_va_region_no_user_free_get(kctx, reg);
+ kbase_va_region_no_user_free_inc(reg);
kbase_gpu_vm_unlock(kctx);
}
@@ -2486,11 +2547,10 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
tp = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
/* Check if we have enough pages requested so we can allocate a large
* page (512 * 4KB = 2MB )
*/
- if (nr_left >= (SZ_2M / SZ_4K)) {
+ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
@@ -2546,8 +2606,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
if (np)
break;
- err = kbase_mem_pool_grow(&kctx->mem_pools.large[alloc->group_id],
- 1, kctx->task);
+ err = kbase_mem_pool_grow(
+ &kctx->mem_pools.large[alloc->group_id],
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2588,9 +2649,8 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
}
}
}
-no_new_partial:
-#endif
+no_new_partial:
if (nr_left) {
res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
tp, false, kctx->task);
@@ -2652,18 +2712,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
lockdep_assert_held(&pool->pool_lock);
-#if !defined(CONFIG_MALI_2MB_ALLOC)
- WARN_ON(pool->order);
-#endif
+ kctx = alloc->imported.native.kctx;
+ kbdev = kctx->kbdev;
+
+ if (!kbdev->pagesize_2mb)
+ WARN_ON(pool->order);
if (alloc->reg) {
if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
goto invalid_request;
}
- kctx = alloc->imported.native.kctx;
- kbdev = kctx->kbdev;
-
lockdep_assert_held(&kctx->mem_partials_lock);
if (nr_pages_requested == 0)
@@ -2682,8 +2741,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
tp = alloc->pages + alloc->nents;
new_pages = tp;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
res = kbase_mem_pool_alloc_pages_locked(pool,
@@ -2767,15 +2825,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
if (nr_left)
goto alloc_failed;
} else {
-#endif
res = kbase_mem_pool_alloc_pages_locked(pool,
nr_left,
tp);
if (res <= 0)
goto alloc_failed;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
KBASE_TLSTREAM_AUX_PAGESALLOC(
kbdev,
@@ -2796,8 +2851,7 @@ alloc_failed:
struct tagged_addr *start_free = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
while (nr_pages_to_free) {
if (is_huge_head(*start_free)) {
kbase_mem_pool_free_pages_locked(
@@ -2815,15 +2869,12 @@ alloc_failed:
}
}
} else {
-#endif
kbase_mem_pool_free_pages_locked(pool,
nr_pages_to_free,
start_free,
false, /* not dirty */
true); /* return to pool */
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
}
kbase_process_page_usage_dec(kctx, nr_pages_requested);
@@ -3222,9 +3273,32 @@ out_rollback:
out_term:
return -1;
}
-
KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+ enum kbase_page_status status)
+{
+ u32 i = 0;
+
+ for (; i < alloc->nents; i++) {
+ struct tagged_addr phys = alloc->pages[i];
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(phys) || is_partial(phys))
+ continue;
+
+ if (!page_md)
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
+
bool kbase_check_alloc_flags(unsigned long flags)
{
/* Only known input flags should be set. */
@@ -3465,10 +3539,6 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
#undef KBASE_MSG_PRE
}
-/**
- * kbase_gpu_vm_lock() - Acquire the per-context region list lock
- * @kctx: KBase context
- */
void kbase_gpu_vm_lock(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -3477,10 +3547,6 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx)
KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
-/**
- * kbase_gpu_vm_unlock() - Release the per-context region list lock
- * @kctx: KBase context
- */
void kbase_gpu_vm_unlock(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -3797,8 +3863,8 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(reg->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, reg);
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
kbase_gpu_vm_unlock(kctx);
} while (1);
@@ -3813,7 +3879,7 @@ int kbase_jit_init(struct kbase_context *kctx)
INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
#if MALI_USE_CSF
- spin_lock_init(&kctx->csf.kcpu_queues.jit_lock);
+ mutex_init(&kctx->csf.kcpu_queues.jit_lock);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues);
#else /* !MALI_USE_CSF */
@@ -4059,18 +4125,14 @@ static int kbase_jit_grow(struct kbase_context *kctx,
delta = info->commit_pages - reg->gpu_alloc->nents;
pages_required = delta;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pages_required >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
pool = &kctx->mem_pools.large[kctx->jit_group_id];
/* Round up to number of 2 MB pages required */
pages_required += ((SZ_2M / SZ_4K) - 1);
pages_required /= (SZ_2M / SZ_4K);
} else {
-#endif
pool = &kctx->mem_pools.small[kctx->jit_group_id];
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (reg->cpu_alloc != reg->gpu_alloc)
pages_required *= 2;
@@ -4253,7 +4315,9 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
{
#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
-#endif
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit &&
@@ -4346,19 +4410,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
-#endif
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
return NULL;
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
- if (!prealloc_sas[i])
- goto end;
+ if (kctx->kbdev->pagesize_2mb) {
+ /* Preallocate memory for the sub-allocation structs */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+ if (!prealloc_sas[i])
+ goto end;
+ }
}
-#endif
kbase_gpu_vm_lock(kctx);
mutex_lock(&kctx->jit_evict_lock);
@@ -4447,7 +4513,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
if (ret < 0) {
/*
* An update to an allocation from the pool failed,
- * chances are slim a new allocation would fair any
+ * chances are slim a new allocation would fare any
* better so return the allocation to the pool and
* return the function with failure.
*/
@@ -4469,6 +4535,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
mutex_unlock(&kctx->jit_evict_lock);
reg = NULL;
goto end;
+ } else {
+ /* A suitable JIT allocation existed on the evict list, so we need
+ * to make sure that the NOT_MOVABLE property is cleared.
+ */
+ if (kbase_page_migration_enabled) {
+ kbase_gpu_vm_lock(kctx);
+ mutex_lock(&kctx->jit_evict_lock);
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_gpu_vm_unlock(kctx);
+ }
}
} else {
/* No suitable JIT allocation was found so create a new one */
@@ -4527,7 +4604,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
/* Similarly to tiler heap init, there is a short window of time
* where the (either recycled or newly allocated, in our case) region has
- * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+ * "no user free" count incremented but is still missing the DONT_NEED flag, and
* doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
* allocation is the least bad option that doesn't lead to a security issue down the
* line (it will eventually be cleaned up during context termination).
@@ -4536,9 +4613,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
* flags.
*/
kbase_gpu_vm_lock(kctx);
- if (unlikely(reg->no_user_free_refcnt > 1)) {
+ if (unlikely(atomic_read(&reg->no_user_free_count) > 1)) {
kbase_gpu_vm_unlock(kctx);
- dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
mutex_lock(&kctx->jit_evict_lock);
list_move(&reg->jit_node, &kctx->jit_pool_head);
@@ -4578,6 +4655,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
{
u64 old_pages;
+#if !MALI_USE_CSF
+ lockdep_assert_held(&kctx->jctx.lock);
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
+
/* JIT id not immediately available here, so use 0u */
trace_mali_jit_free(reg, 0u);
@@ -4630,6 +4713,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
list_move(&reg->jit_node, &kctx->jit_pool_head);
+ /* Inactive JIT regions should be freed by the shrinker and not impacted
+ * by page migration. Once freed, they will enter into the page migration
+ * state machine via the mempools.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
mutex_unlock(&kctx->jit_evict_lock);
}
@@ -4682,8 +4771,8 @@ bool kbase_jit_evict(struct kbase_context *kctx)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(reg->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, reg);
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
}
@@ -4711,8 +4800,8 @@ void kbase_jit_term(struct kbase_context *kctx)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(walker->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, walker);
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4730,8 +4819,8 @@ void kbase_jit_term(struct kbase_context *kctx)
* by implementing "free on putting the last reference",
* but only for JIT regions.
*/
- WARN_ON(walker->no_user_free_refcnt > 1);
- kbase_va_region_no_user_free_put(kctx, walker);
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4940,10 +5029,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct page **pages;
struct tagged_addr *pa;
long i, dma_mapped_pages;
- unsigned long address;
struct device *dev;
- unsigned long offset_within_page;
- unsigned long remaining_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
@@ -4959,20 +5045,34 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset_within_page = address & ~PAGE_MASK;
- remaining_size = alloc->imported.user_buf.size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < pinned_pages; i++) {
- unsigned long map_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
- dma_addr_t dma_addr = dma_map_page(dev, pages[i],
- offset_within_page, map_size,
- DMA_BIDIRECTIONAL);
-
+ dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
err = dma_mapping_error(dev, dma_addr);
if (err)
goto unwind;
@@ -4980,8 +5080,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- remaining_size -= map_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4989,29 +5088,36 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- pa, kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- alloc->group_id, mmu_sync_info);
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+ mmu_sync_info, NULL);
if (err == 0)
return 0;
/* fall down */
unwind:
alloc->nents = 0;
- offset_within_page = address & ~PAGE_MASK;
- remaining_size = alloc->imported.user_buf.size;
dma_mapped_pages = i;
- /* Run the unmap loop in the same order as map loop */
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
for (i = 0; i < dma_mapped_pages; i++) {
- unsigned long unmap_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- unmap_size, DMA_BIDIRECTIONAL);
- remaining_size -= unmap_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
/* The user buffer could already have been previously pinned before
@@ -5052,12 +5158,89 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long unmap_size =
- MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size,
- DMA_BIDIRECTIONAL);
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+#else
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
@@ -5065,7 +5248,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
pages[i] = NULL;
#endif
- remaining_size -= unmap_size;
+ remaining_size -= imported_size;
offset_within_page = 0;
}
#if !MALI_USE_CSF
@@ -5146,8 +5329,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
break;
}
default:
- WARN(1, "Invalid external resource GPU allocation type (%x) on mapping",
- alloc->type);
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid external resource GPU allocation type (%x) on mapping",
+ alloc->type);
return -EINVAL;
}
@@ -5180,7 +5364,8 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages,
kbase_reg_current_backed_size(reg),
- kctx->as_nr);
+ kbase_reg_current_backed_size(reg),
+ kctx->as_nr, true);
}
if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 9f8be10..02e5509 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,6 +37,8 @@
#include "mali_kbase_defs.h"
/* Required for kbase_mem_evictable_unmake */
#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_mem_migrate.h"
+#include "mali_kbase_refcount_defs.h"
static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
int pages);
@@ -182,6 +184,106 @@ struct kbase_mem_phy_alloc {
} imported;
};
+/**
+ * enum kbase_page_status - Status of a page used for page migration.
+ *
+ * @MEM_POOL: Stable state. Page is located in a memory pool and can safely
+ * be migrated.
+ * @ALLOCATE_IN_PROGRESS: Transitory state. A page is set to this status as
+ * soon as it leaves a memory pool.
+ * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory
+ * pool of a dying context are being moved to the device
+ * memory pool.
+ * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is
+ * not movable, but may return to be movable when the object
+ * is freed.
+ * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU
+ * and has reference to kbase_mem_phy_alloc object.
+ * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't
+ * reference kbase_mem_phy_alloc object. Used as a page in MMU
+ * page table.
+ * @FREE_IN_PROGRESS: Transitory state. A page is set to this status as soon as
+ * the driver manages to acquire a lock on the page while
+ * unmapping it. This status means that a memory release is
+ * happening and it's still not complete.
+ * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case.
+ * A page is isolated while it is in ALLOCATED_MAPPED state,
+ * but then the driver tries to destroy the allocation.
+ * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case.
+ * A page is isolated while it is in PT_MAPPED state, but
+ * then the driver tries to destroy the allocation.
+ *
+ * Pages can only be migrated in stable states.
+ */
+enum kbase_page_status {
+ MEM_POOL = 0,
+ ALLOCATE_IN_PROGRESS,
+ SPILL_IN_PROGRESS,
+ NOT_MOVABLE,
+ ALLOCATED_MAPPED,
+ PT_MAPPED,
+ FREE_IN_PROGRESS,
+ FREE_ISOLATED_IN_PROGRESS,
+ FREE_PT_ISOLATED_IN_PROGRESS,
+};
+
+#define PGD_VPFN_LEVEL_MASK ((u64)0x3)
+#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK)
+#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK)
+#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \
+ ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK))
+
+/**
+ * struct kbase_page_metadata - Metadata for each page in kbase
+ *
+ * @kbdev: Pointer to kbase device.
+ * @dma_addr: DMA address mapped to page.
+ * @migrate_lock: A spinlock to protect the private metadata.
+ * @data: Member in union valid based on @status.
+ * @status: Status to keep track if page can be migrated at any
+ * given moment. MSB will indicate if page is isolated.
+ * Protected by @migrate_lock.
+ * @vmap_count: Counter of kernel mappings.
+ * @group_id: Memory group ID obtained at the time of page allocation.
+ *
+ * Each 4KB page will have a reference to this struct in the private field.
+ * This will be used to keep track of information required for Linux page
+ * migration functionality as well as address for DMA mapping.
+ */
+struct kbase_page_metadata {
+ dma_addr_t dma_addr;
+ spinlock_t migrate_lock;
+
+ union {
+ struct {
+ struct kbase_mem_pool *pool;
+ /* Pool could be terminated after page is isolated and therefore
+ * won't be able to get reference to kbase device.
+ */
+ struct kbase_device *kbdev;
+ } mem_pool;
+ struct {
+ struct kbase_va_region *reg;
+ struct kbase_mmu_table *mmut;
+ u64 vpfn;
+ } mapped;
+ struct {
+ struct kbase_mmu_table *mmut;
+ u64 pgd_vpfn_level;
+ } pt_mapped;
+ struct {
+ struct kbase_device *kbdev;
+ } free_isolated;
+ struct {
+ struct kbase_device *kbdev;
+ } free_pt_isolated;
+ } data;
+
+ u8 status;
+ u8 vmap_count;
+ u8 group_id;
+};
+
/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
* used to signify that a buffer was pinned when it was imported. Since the
* reference count is limited by the number of atoms that can be submitted at
@@ -204,6 +306,20 @@ enum kbase_jit_report_flags {
KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0)
};
+/**
+ * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying
+ * physical allocation.
+ * @alloc: the physical allocation containing the pages whose metadata is going
+ * to be modified
+ * @status: the status the pages should end up in
+ *
+ * Note that this function does not go through all of the checking to ensure that
+ * proper states are set. Instead, it is only used when we change the allocation
+ * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED
+ */
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+ enum kbase_page_status status);
+
static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
{
KBASE_DEBUG_ASSERT(alloc);
@@ -224,8 +340,9 @@ static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *
}
/**
- * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings
- * counter for a memory region to prevent commit and flag changes
+ * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings counter for a
+ * memory region to prevent commit and flag
+ * changes
*
* @alloc: Pointer to physical pages tracking object
*/
@@ -303,8 +420,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
* @jit_usage_id: The last just-in-time memory usage ID for this region.
* @jit_bin_id: The just-in-time memory bin this region came from.
* @va_refcnt: Number of users of this region. Protected by reg_lock.
- * @no_user_free_refcnt: Number of users that want to prevent the region from
- * being freed by userspace.
+ * @no_user_free_count: Number of contexts that want to prevent the region
+ * from being freed by userspace.
* @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
* an allocated region
* The object can be one of:
@@ -565,8 +682,8 @@ struct kbase_va_region {
size_t used_pages;
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
- int va_refcnt;
- int no_user_free_refcnt;
+ kbase_refcount_t va_refcnt;
+ atomic_t no_user_free_count;
};
/**
@@ -643,15 +760,12 @@ static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
static inline struct kbase_va_region *kbase_va_region_alloc_get(
struct kbase_context *kctx, struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
- WARN_ON(!region->va_refcnt);
- WARN_ON(region->va_refcnt == INT_MAX);
+ WARN_ON(!kbase_refcount_read(&region->va_refcnt));
+ WARN_ON(kbase_refcount_read(&region->va_refcnt) == INT_MAX);
- /* non-atomic as kctx->reg_lock is held */
dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
- region->va_refcnt, (void *)region);
- region->va_refcnt++;
+ kbase_refcount_read(&region->va_refcnt), (void *)region);
+ kbase_refcount_inc(&region->va_refcnt);
return region;
}
@@ -659,17 +773,14 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
static inline struct kbase_va_region *kbase_va_region_alloc_put(
struct kbase_context *kctx, struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
- WARN_ON(region->va_refcnt <= 0);
+ WARN_ON(kbase_refcount_read(&region->va_refcnt) <= 0);
WARN_ON(region->flags & KBASE_REG_FREE);
- /* non-atomic as kctx->reg_lock is held */
- region->va_refcnt--;
- dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
- region->va_refcnt, (void *)region);
- if (!region->va_refcnt)
+ if (kbase_refcount_dec_and_test(&region->va_refcnt))
kbase_region_refcnt_free(kctx->kbdev, region);
+ else
+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
+ kbase_refcount_read(&region->va_refcnt), (void *)region);
return NULL;
}
@@ -683,58 +794,44 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
* Hence, callers cannot rely on this check alone to determine if a region might be shrunk
* by any part of kbase. Instead they should use kbase_is_region_shrinkable().
*
- * @kctx: Pointer to kbase context.
* @region: Pointer to region.
*
* Return: true if userspace cannot free the region, false if userspace can free the region.
*/
-static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
- struct kbase_va_region *region)
+static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
- return region->no_user_free_refcnt > 0;
+ return atomic_read(&region->no_user_free_count) > 0;
}
/**
- * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region.
* Calling this function will prevent the region to be shrunk by parts of kbase that
- * don't own the region (as long as the refcount stays above zero). Refer to
+ * don't own the region (as long as the count stays above zero). Refer to
* kbase_va_region_is_no_user_free() for more information.
*
- * @kctx: Pointer to kbase context.
* @region: Pointer to region (not shrinkable).
*
* Return: the pointer to the region passed as argument.
*/
-static inline struct kbase_va_region *
-kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
WARN_ON(kbase_is_region_shrinkable(region));
- WARN_ON(region->no_user_free_refcnt == INT_MAX);
+ WARN_ON(atomic_read(&region->no_user_free_count) == INT_MAX);
/* non-atomic as kctx->reg_lock is held */
- region->no_user_free_refcnt++;
-
- return region;
+ atomic_inc(&region->no_user_free_count);
}
/**
- * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region.
*
- * @kctx: Pointer to kbase context.
* @region: Pointer to region (not shrinkable).
*/
-static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
- struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region)
{
- lockdep_assert_held(&kctx->reg_lock);
-
- WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
+ WARN_ON(!kbase_va_region_is_no_user_free(region));
- /* non-atomic as kctx->reg_lock is held */
- region->no_user_free_refcnt--;
+ atomic_dec(&region->no_user_free_count);
}
/* Common functions */
@@ -950,12 +1047,9 @@ static inline size_t kbase_mem_pool_config_get_max_size(
*
* Return: 0 on success, negative -errno on error
*/
-int kbase_mem_pool_init(struct kbase_mem_pool *pool,
- const struct kbase_mem_pool_config *config,
- unsigned int order,
- int group_id,
- struct kbase_device *kbdev,
- struct kbase_mem_pool *next_pool);
+int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config,
+ unsigned int order, int group_id, struct kbase_device *kbdev,
+ struct kbase_mem_pool *next_pool);
/**
* kbase_mem_pool_term - Destroy a memory pool
@@ -1211,6 +1305,16 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
/**
+ * kbase_mem_pool_free_page - Free a page from a memory pool.
+ * @pool: Memory pool to free a page from
+ * @p: Page to free
+ *
+ * This will free any associated data stored for the page and release
+ * the page back to the kernel.
+ */
+void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p);
+
+/**
* kbase_region_tracker_init - Initialize the region tracker data structure
* @kctx: kbase context
*
@@ -1283,8 +1387,8 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
u64 gpu_addr);
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone);
void kbase_free_alloced_region(struct kbase_va_region *reg);
int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
u64 addr, size_t nr_pages, size_t align);
@@ -1295,6 +1399,32 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
bool kbase_check_alloc_flags(unsigned long flags);
bool kbase_check_import_flags(unsigned long flags);
+static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+ dev_dbg(
+ kbdev->dev,
+ "Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+ (unsigned long long)va_pages);
+ return false;
+ }
+
+ return true;
+}
+
+static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+ dev_dbg(
+ kbdev->dev,
+ "Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+ (unsigned long long)va_pages);
+ return false;
+ }
+
+ return true;
+}
+
/**
* kbase_check_alloc_sizes - check user space sizes parameters for an
* allocation
@@ -1329,7 +1459,55 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
int kbase_update_region_flags(struct kbase_context *kctx,
struct kbase_va_region *reg, unsigned long flags);
+/**
+ * kbase_gpu_vm_lock() - Acquire the per-context region list lock
+ * @kctx: KBase context
+ *
+ * Care must be taken when making an allocation whilst holding this lock, because of interaction
+ * with the Kernel's OoM-killer and use of this lock in &vm_operations_struct close() handlers.
+ *
+ * If this lock is taken during a syscall, and/or the allocation is 'small' then it is safe to use.
+ *
+ * If the caller is not in a syscall, and the allocation is 'large', then it must not hold this
+ * lock.
+ *
+ * This is because the kernel OoM killer might target the process corresponding to that same kbase
+ * context, and attempt to call the context's close() handlers for its open VMAs. This is safe if
+ * the allocating caller is in a syscall, because the VMA close() handlers are delayed until all
+ * syscalls have finished (noting that no new syscalls can start as the remaining user threads will
+ * have been killed too), and so there is no possibility of contention between the thread
+ * allocating with this lock held, and the VMA close() handler.
+ *
+ * However, outside of a syscall (e.g. a kworker or other kthread), one of kbase's VMA close()
+ * handlers (kbase_cpu_vm_close()) also takes this lock, and so prevents the process from being
+ * killed until the caller of the function allocating memory has released this lock. On subsequent
+ * retries for allocating a page, the OoM killer would be re-invoked but skips over the process
+ * stuck in its close() handler.
+ *
+ * Also because the caller is not in a syscall, the page allocation code in the kernel is not aware
+ * that the allocation is being done on behalf of another process, and so does not realize that
+ * process has received a kill signal due to an OoM, and so will continually retry with the OoM
+ * killer until enough memory has been released, or until all other killable processes have been
+ * killed (at which point the kernel halts with a panic).
+ *
+ * However, if the allocation outside of a syscall is small enough to be satisfied by killing
+ * another process, then the allocation completes, the caller releases this lock, and
+ * kbase_cpu_vm_close() can unblock and allow the process to be killed.
+ *
+ * Hence, this is effectively a deadlock with kbase_cpu_vm_close(), except that if the memory
+ * allocation is small enough the deadlock can be resolved. For that reason, such a memory deadlock
+ * is NOT discovered with CONFIG_PROVE_LOCKING.
+ *
+ * If this may be called outside of a syscall, consider moving allocations outside of this lock, or
+ * use __GFP_NORETRY for such allocations (which will allow direct-reclaim attempts, but will
+ * prevent OoM kills to satisfy the allocation, and will just fail the allocation instead).
+ */
void kbase_gpu_vm_lock(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_vm_unlock() - Release the per-context region list lock
+ * @kctx: KBase context
+ */
void kbase_gpu_vm_unlock(struct kbase_context *kctx);
int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
@@ -1547,15 +1725,21 @@ int kbasep_find_enclosing_gpu_mapping_start_and_offset(
* @alloc: allocation object to add pages to
* @nr_pages_requested: number of physical pages to allocate
*
- * Allocates \a nr_pages_requested and updates the alloc object.
+ * Allocates @nr_pages_requested and updates the alloc object.
*
- * Return: 0 if all pages have been successfully allocated. Error code otherwise
+ * Note: if kbase_gpu_vm_lock() is to be held around this function to ensure thread-safe updating
+ * of @alloc, then refer to the documentation of kbase_gpu_vm_lock() about the requirements of
+ * either calling during a syscall, or ensuring the allocation is small. These requirements prevent
+ * an effective deadlock between the kernel's OoM killer and kbase's VMA close() handlers, which
+ * could take kbase_gpu_vm_lock() too.
*
- * Note : The caller must not hold vm_lock, as this could cause a deadlock if
- * the kernel OoM killer runs. If the caller must allocate pages while holding
- * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ * If the requirements of kbase_gpu_vm_lock() cannot be satisfied when calling this function, but
+ * @alloc must still be updated in a thread-safe way, then instead use
+ * kbase_alloc_phy_pages_helper_locked() and restructure callers into the sequence outlined there.
*
* This function cannot be used from interrupt context
+ *
+ * Return: 0 if all pages have been successfully allocated. Error code otherwise
*/
int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
size_t nr_pages_requested);
@@ -1565,17 +1749,19 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
* @alloc: allocation object to add pages to
* @pool: Memory pool to allocate from
* @nr_pages_requested: number of physical pages to allocate
- * @prealloc_sa: Information about the partial allocation if the amount
- * of memory requested is not a multiple of 2MB. One
- * instance of struct kbase_sub_alloc must be allocated by
- * the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
*
- * Allocates \a nr_pages_requested and updates the alloc object. This function
- * does not allocate new pages from the kernel, and therefore will never trigger
- * the OoM killer. Therefore, it can be run while the vm_lock is held.
+ * @prealloc_sa: Information about the partial allocation if the amount of memory requested
+ * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be
+ * allocated by the caller if kbdev->pagesize_2mb is enabled.
*
- * As new pages can not be allocated, the caller must ensure there are
- * sufficient pages in the pool. Usage of this function should look like :
+ * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new
+ * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be
+ * called whilst a thread operating outside of a syscall has held the region list lock
+ * (kbase_gpu_vm_lock()), as it will not cause an effective deadlock with VMA close() handlers used
+ * by the OoM killer.
+ *
+ * As new pages can not be allocated, the caller must ensure there are sufficient pages in the
+ * pool. Usage of this function should look like :
*
* kbase_gpu_vm_lock(kctx);
* kbase_mem_pool_lock(pool)
@@ -1588,24 +1774,24 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
* }
* kbase_alloc_phy_pages_helper_locked(pool)
* kbase_mem_pool_unlock(pool)
- * Perform other processing that requires vm_lock...
+ * // Perform other processing that requires vm_lock...
* kbase_gpu_vm_unlock(kctx);
*
- * This ensures that the pool can be grown to the required size and that the
- * allocation can complete without another thread using the newly grown pages.
+ * This ensures that the pool can be grown to the required size and that the allocation can
+ * complete without another thread using the newly grown pages.
*
- * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
- * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be
- * alloc->imported.native.kctx->mem_pool.
- * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be
- * pre-allocated because we must not sleep (due to the usage of kmalloc())
- * whilst holding pool->pool_lock.
- * @prealloc_sa shall be set to NULL if it has been consumed by this function
- * to indicate that the caller must not free it.
+ * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the
+ * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the
+ * mempools from alloc->imported.native.kctx->mem_pools.small[].
*
- * Return: Pointer to array of allocated pages. NULL on failure.
+ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we
+ * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa
+ * shall be set to NULL if it has been consumed by this function to indicate that the caller no
+ * longer owns it and should not access it further.
+ *
+ * Note: Caller must hold @pool->pool_lock
*
- * Note : Caller must hold pool->pool_lock
+ * Return: Pointer to array of allocated pages. NULL on failure.
*/
struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
@@ -1644,7 +1830,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
struct kbase_mem_pool *pool, struct tagged_addr *pages,
size_t nr_pages_to_free);
-static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+static inline void kbase_set_dma_addr_as_priv(struct page *p, dma_addr_t dma_addr)
{
SetPagePrivate(p);
if (sizeof(dma_addr_t) > sizeof(p->private)) {
@@ -1660,7 +1846,7 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
}
}
-static inline dma_addr_t kbase_dma_addr(struct page *p)
+static inline dma_addr_t kbase_dma_addr_as_priv(struct page *p)
{
if (sizeof(dma_addr_t) > sizeof(p->private))
return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
@@ -1668,11 +1854,34 @@ static inline dma_addr_t kbase_dma_addr(struct page *p)
return (dma_addr_t)page_private(p);
}
-static inline void kbase_clear_dma_addr(struct page *p)
+static inline void kbase_clear_dma_addr_as_priv(struct page *p)
{
ClearPagePrivate(p);
}
+static inline struct kbase_page_metadata *kbase_page_private(struct page *p)
+{
+ return (struct kbase_page_metadata *)page_private(p);
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+ if (kbase_page_migration_enabled)
+ return kbase_page_private(p)->dma_addr;
+
+ return kbase_dma_addr_as_priv(p);
+}
+
+static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa)
+{
+ phys_addr_t pa = as_phys_addr_t(tagged_pa);
+ struct page *page = pfn_to_page(PFN_DOWN(pa));
+ dma_addr_t dma_addr =
+ is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page);
+
+ return dma_addr;
+}
+
/**
* kbase_flush_mmu_wqs() - Flush MMU workqueues.
* @kbdev: Device pointer.
@@ -2300,8 +2509,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
* kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
* @kctx: Pointer to kbase context
*
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
* from the forked child process using the mali device file fd inherited from
* the parent process.
*
@@ -2309,13 +2517,23 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
*/
static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
{
- bool allow_alloc = true;
-
- rcu_read_lock();
- allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
- rcu_read_unlock();
+ return (kctx->process_mm == current->mm);
+}
- return allow_alloc;
+/**
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+ /* This merely takes a reference on the memory descriptor structure
+ * i.e. mm_struct of current process and not on its address space and
+ * so won't block the freeing of address space on process exit.
+ */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+ atomic_inc(&current->mm->mm_count);
+#else
+ mmgrab(current->mm);
+#endif
}
/**
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 957b5da..e8df130 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,8 @@
#include <linux/cache.h>
#include <linux/memory_group_manager.h>
#include <linux/math64.h>
-
+#include <linux/migrate.h>
+#include <linux/version.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
@@ -382,8 +383,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
zone = KBASE_REG_ZONE_CUSTOM_VA;
}
- reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va),
- va_pages, zone);
+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);
if (!reg) {
dev_err(dev, "Failed to allocate free region");
@@ -476,7 +476,25 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
*gpu_va = (u64) cookie;
} else /* we control the VA */ {
- if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1,
+ size_t align = 1;
+
+ if (kctx->kbdev->pagesize_2mb) {
+ /* If there's enough (> 33 bits) of GPU VA space, align to 2MB
+ * boundaries. The similar condition is used for mapping from
+ * the SAME_VA zone inside kbase_context_get_unmapped_area().
+ */
+ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+ if (va_pages >= (SZ_2M / SZ_4K))
+ align = (SZ_2M / SZ_4K);
+ }
+ if (*gpu_va)
+ align = 1;
+#if !MALI_USE_CSF
+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
+ align = 1;
+#endif /* !MALI_USE_CSF */
+ }
+ if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align,
mmu_sync_info) != 0) {
dev_warn(dev, "Failed to map memory on GPU");
kbase_gpu_vm_unlock(kctx);
@@ -652,24 +670,36 @@ out_unlock:
* @s: Shrinker
* @sc: Shrinker control
*
- * Return: Number of pages which can be freed.
+ * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains.
*/
static
unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
struct shrink_control *sc)
{
- struct kbase_context *kctx;
-
- kctx = container_of(s, struct kbase_context, reclaim);
+ struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim);
+ int evict_nents = atomic_read(&kctx->evict_nents);
+ unsigned long nr_freeable_items;
WARN((sc->gfp_mask & __GFP_ATOMIC),
"Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n",
sc->gfp_mask);
WARN(in_atomic(),
- "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n",
+ "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n",
sc->gfp_mask);
- return atomic_read(&kctx->evict_nents);
+ if (unlikely(evict_nents < 0)) {
+ dev_err(kctx->kbdev->dev, "invalid evict_nents(%d)", evict_nents);
+ nr_freeable_items = 0;
+ } else {
+ nr_freeable_items = evict_nents;
+ }
+
+#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE
+ if (nr_freeable_items == 0)
+ nr_freeable_items = SHRINK_EMPTY;
+#endif
+
+ return nr_freeable_items;
}
/**
@@ -678,8 +708,8 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
* @s: Shrinker
* @sc: Shrinker control
*
- * Return: Number of pages freed (can be less then requested) or -1 if the
- * shrinker failed to free pages in its pool.
+ * Return: Number of pages freed (can be less then requested) or
+ * SHRINK_STOP if reclaim isn't possible.
*
* Note:
* This function accesses region structures without taking the region lock,
@@ -712,15 +742,10 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
0, alloc->nents);
- if (err != 0) {
- /*
- * Failed to remove GPU mapping, tell the shrinker
- * to stop trying to shrink our slab even though we
- * have pages in it.
- */
- freed = -1;
- goto out_unlock;
- }
+
+ /* Failed to remove GPU mapping, proceed to next one. */
+ if (err != 0)
+ continue;
/*
* Update alloc->evicted before freeing the backing so the
@@ -744,7 +769,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
if (freed > sc->nr_to_scan)
break;
}
-out_unlock:
+
mutex_unlock(&kctx->jit_evict_lock);
return freed;
@@ -764,7 +789,11 @@ int kbase_mem_evictable_init(struct kbase_context *kctx)
* struct shrinker does not define batch
*/
kctx->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
register_shrinker(&kctx->reclaim);
+#else
+ register_shrinker(&kctx->reclaim, "mali-mem");
+#endif
return 0;
}
@@ -828,6 +857,9 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
lockdep_assert_held(&kctx->reg_lock);
+ /* Memory is in the process of transitioning to the shrinker, and
+ * should ignore migration attempts
+ */
kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
0, gpu_alloc->nents);
@@ -835,12 +867,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
/* This allocation can't already be on a list. */
WARN_ON(!list_empty(&gpu_alloc->evict_node));
- /*
- * Add the allocation to the eviction list, after this point the shrink
+ /* Add the allocation to the eviction list, after this point the shrink
* can reclaim it.
*/
list_add(&gpu_alloc->evict_node, &kctx->evict_list);
atomic_add(gpu_alloc->nents, &kctx->evict_nents);
+
+ /* Indicate to page migration that the memory can be reclaimed by the shrinker.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
+
mutex_unlock(&kctx->jit_evict_lock);
kbase_mem_evictable_mark_reclaim(gpu_alloc);
@@ -892,6 +929,15 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
gpu_alloc->evicted, 0, mmu_sync_info);
gpu_alloc->evicted = 0;
+
+ /* Since the allocation is no longer evictable, and we ensure that
+ * it grows back to its pre-eviction size, we will consider the
+ * state of it to be ALLOCATED_MAPPED, as that is the only state
+ * in which a physical allocation could transition to NOT_MOVABLE
+ * from.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
}
}
@@ -950,7 +996,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
* & GPU queue ringbuffer and none of them needs to be explicitly marked
* as evictable by Userspace.
*/
- if (kbase_va_region_is_no_user_free(kctx, reg))
+ if (kbase_va_region_is_no_user_free(reg))
goto out_unlock;
/* Is the region being transitioning between not needed and needed? */
@@ -1270,11 +1316,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- alloc->group_id, mmu_sync_info);
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+ mmu_sync_info, NULL);
if (err)
goto bad_insert;
@@ -1287,11 +1333,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
* Assume alloc->nents is the number of actual pages in the
* dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + alloc->nents,
- kctx->aliasing_sink_page, reg->nr_pages - alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK, mmu_sync_info);
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page,
+ reg->nr_pages - alloc->nents,
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_pad_insert;
}
@@ -1300,7 +1346,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
bad_pad_insert:
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
- alloc->nents, kctx->as_nr);
+ alloc->nents, alloc->nents, kctx->as_nr, true);
bad_insert:
kbase_mem_umm_unmap_attachment(kctx, alloc);
bad_map_attachment:
@@ -1329,7 +1375,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
int err;
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, reg->nr_pages, kctx->as_nr);
+ alloc->pages, reg->nr_pages, reg->nr_pages,
+ kctx->as_nr, true);
WARN_ON(err);
}
@@ -1401,6 +1448,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
return NULL;
}
+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+ return NULL;
+
/* ignore SAME_VA */
*flags &= ~BASE_MEM_SAME_VA;
@@ -1421,23 +1471,21 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
need_sync = true;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/*
* 64-bit tasks require us to reserve VA on the CPU that we use
* on the GPU.
*/
shared_zone = true;
}
-#endif
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
- 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
- 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg) {
@@ -1529,10 +1577,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
int zone = KBASE_REG_ZONE_CUSTOM_VA;
bool shared_zone = false;
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
- unsigned long offset_within_page;
- unsigned long remaining_size;
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
+ struct tagged_addr *pa;
+ struct device *dev;
int write;
/* Flag supported only for dma-buf imported memory */
@@ -1570,21 +1618,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
/* 64-bit address range is the max */
goto bad_size;
+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+ goto bad_size;
+
/* SAME_VA generally not supported with imported memory (no known use cases) */
*flags &= ~BASE_MEM_SAME_VA;
if (*flags & BASE_MEM_IMPORT_SHARED)
shared_zone = true;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/*
* 64-bit tasks require us to reserve VA on the CPU that we use
* on the GPU.
*/
shared_zone = true;
}
-#endif
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
@@ -1593,7 +1642,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
} else
rbtree = &kctx->reg_rbtree_custom;
- reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);
if (!reg)
goto no_region;
@@ -1619,11 +1668,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
user_buf->address = address;
user_buf->nr_pages = *va_pages;
user_buf->mm = current->mm;
-#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
- atomic_inc(&current->mm->mm_count);
-#else
- mmgrab(current->mm);
-#endif
+ kbase_mem_mmgrab();
if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
else
@@ -1680,29 +1725,44 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
reg->gpu_alloc->nents = 0;
reg->extension = 0;
- if (pages) {
- struct device *dev = kctx->kbdev->dev;
- struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+ pa = kbase_get_gpu_phy_pages(reg);
+ dev = kctx->kbdev->dev;
+ if (pages) {
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
- offset_within_page = user_buf->address & ~PAGE_MASK;
- remaining_size = user_buf->size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < faulted_pages; i++) {
- unsigned long map_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
- dma_addr_t dma_addr = dma_map_page(dev, pages[i],
- offset_within_page, map_size, DMA_BIDIRECTIONAL);
-
+ dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- remaining_size -= map_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1711,19 +1771,23 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
return reg;
unwind_dma_map:
- offset_within_page = user_buf->address & ~PAGE_MASK;
- remaining_size = user_buf->size;
dma_mapped_pages = i;
- /* Run the unmap loop in the same order as map loop */
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This precautionary measure is kept here to keep this code
+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+ * in the future.
+ */
for (i = 0; i < dma_mapped_pages; i++) {
- unsigned long unmap_size =
- MIN(PAGE_SIZE - offset_within_page, remaining_size);
+ dma_addr_t dma_addr = user_buf->dma_addrs[i];
- dma_unmap_page(kctx->kbdev->dev,
- user_buf->dma_addrs[i],
- unmap_size, DMA_BIDIRECTIONAL);
- remaining_size -= unmap_size;
- offset_within_page = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
fault_mismatch:
if (pages) {
@@ -1743,7 +1807,6 @@ no_alloc_obj:
no_region:
bad_size:
return NULL;
-
}
@@ -1800,22 +1863,19 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
/* calculate the number of pages this alias will cover */
*num_pages = nents * stride;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages))
+ goto bad_size;
+
+ if (!kbase_ctx_compat_mode(kctx)) {
/* 64-bit tasks must MMAP anyway, but not expose this address to
* clients
*/
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
- *num_pages,
- KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
-#else
- if (1) {
-#endif
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
- 0, *num_pages,
- KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg)
@@ -1866,7 +1926,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
goto bad_handle; /* Not found/already free */
if (kbase_is_region_shrinkable(aliasing_reg))
goto bad_handle; /* Ephemeral region */
- if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
+ if (kbase_va_region_is_no_user_free(aliasing_reg))
goto bad_handle; /* JIT regions can't be
* aliased. NO_USER_FREE flag
* covers the entire lifetime
@@ -1921,8 +1981,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
}
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/* Bind to a cookie */
if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) {
dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
@@ -1937,10 +1996,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
/* relocate to correct base */
gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
gpu_va <<= PAGE_SHIFT;
- } else /* we control the VA */ {
-#else
- if (1) {
-#endif
+ } else {
+ /* we control the VA */
if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,
mmu_sync_info) != 0) {
dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
@@ -1957,9 +2014,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
return gpu_va;
-#if IS_ENABLED(CONFIG_64BIT)
no_cookie:
-#endif
no_mmap:
bad_handle:
/* Marking the source allocs as not being mapped on the GPU and putting
@@ -2026,7 +2081,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
*flags &= ~BASE_MEM_COHERENT_SYSTEM;
}
-
+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+ goto bad_flags;
+ }
if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
dev_warn(kctx->kbdev->dev,
"padding is only supported for UMM");
@@ -2140,11 +2198,9 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
/* Map the new pages into the GPU */
phy_pages = kbase_get_gpu_phy_pages(reg);
- ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn + old_pages,
- phy_pages + old_pages, delta, reg->flags,
- kctx->as_nr, reg->gpu_alloc->group_id,
- mmu_sync_info);
+ ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+ phy_pages + old_pages, delta, reg->flags, kctx->as_nr,
+ reg->gpu_alloc->group_id, mmu_sync_info, reg, false);
return ret;
}
@@ -2173,7 +2229,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
int ret = 0;
ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
- alloc->pages + new_pages, delta, kctx->as_nr);
+ alloc->pages + new_pages, delta, delta, kctx->as_nr, false);
return ret;
}
@@ -2241,7 +2297,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
if (kbase_is_region_shrinkable(reg))
goto out_unlock;
- if (kbase_va_region_is_no_user_free(kctx, reg))
+ if (kbase_va_region_is_no_user_free(reg))
goto out_unlock;
#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2344,18 +2400,19 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
if (reg->cpu_alloc != reg->gpu_alloc)
kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (kbase_reg_current_backed_size(reg) > new_pages) {
- old_pages = new_pages;
- new_pages = kbase_reg_current_backed_size(reg);
-
- /* Update GPU mapping. */
- err = kbase_mem_grow_gpu_mapping(kctx, reg,
- new_pages, old_pages, CALLER_MMU_ASYNC);
+
+ if (kctx->kbdev->pagesize_2mb) {
+ if (kbase_reg_current_backed_size(reg) > new_pages) {
+ old_pages = new_pages;
+ new_pages = kbase_reg_current_backed_size(reg);
+
+ /* Update GPU mapping. */
+ err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
+ CALLER_MMU_ASYNC);
+ }
+ } else {
+ WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
}
-#else
- WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
-#endif
}
return err;
@@ -2638,6 +2695,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
size_t size;
int err = 0;
+ lockdep_assert_held(&kctx->reg_lock);
+
dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
size = (vma->vm_end - vma->vm_start);
nr_pages = size >> PAGE_SHIFT;
@@ -2651,8 +2710,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
goto out;
}
- new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
- KBASE_REG_ZONE_SAME_VA);
+ new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
+ KBASE_REG_ZONE_SAME_VA);
if (!new_reg) {
err = -ENOMEM;
WARN_ON(1);
@@ -2710,7 +2769,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
size_t *nr_pages, size_t *aligned_offset)
{
- int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+ unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
struct kbase_va_region *reg;
int err = 0;
@@ -2751,7 +2810,6 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
/* adjust down nr_pages to what we have physically */
*nr_pages = kbase_reg_current_backed_size(reg);
-
if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
reg->nr_pages, 1, mmu_sync_info) != 0) {
dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
@@ -2992,6 +3050,99 @@ void kbase_sync_mem_regions(struct kbase_context *kctx,
}
}
+/**
+ * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for
+ * array of physical pages
+ *
+ * @pages: Array of pages.
+ * @page_count: Number of pages.
+ * @flags: Region flags.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is incremented by 1. Errors are handled by making pages
+ * not movable. Permanent kernel mappings will be marked as not movable, too.
+ */
+static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages,
+ size_t page_count, unsigned long flags)
+{
+ size_t i;
+
+ for (i = 0; i < page_count; i++) {
+ struct page *p = as_page(pages[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(pages[i]) || is_partial(pages[i]))
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely
+ * to stay mapped for a long time. However, keep on counting the number
+ * of mappings even for them: they don't represent an exception for the
+ * vmap_count.
+ *
+ * At the same time, errors need to be handled if a client tries to add
+ * too many mappings, hence a page may end up in the NOT_MOVABLE state
+ * anyway even if it's not a permanent kernel mapping.
+ */
+ if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING)
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ if (page_md->vmap_count < U8_MAX)
+ page_md->vmap_count++;
+ else
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
+
+/**
+ * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for
+ * array of physical pages
+ *
+ * @pages: Array of pages.
+ * @page_count: Number of pages.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is decremented by 1. Errors are handled by making pages
+ * not movable.
+ */
+static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages,
+ size_t page_count)
+{
+ size_t i;
+
+ for (i = 0; i < page_count; i++) {
+ struct page *p = as_page(pages[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(pages[i]) || is_partial(pages[i]))
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ /* Decrement the number of mappings for all kinds of pages, including
+ * pages which are NOT_MOVABLE (e.g. permanent kernel mappings).
+ * However, errors still need to be handled if a client tries to remove
+ * more mappings than created.
+ */
+ if (page_md->vmap_count == 0)
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ else
+ page_md->vmap_count--;
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
+
static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
kbase_vmap_flag vmap_flags)
@@ -3064,6 +3215,13 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
*/
cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+ /* If page migration is enabled, increment the number of VMA mappings
+ * of all physical pages. In case of errors, e.g. too many mappings,
+ * make the page not movable to prevent trouble.
+ */
+ if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type))
+ kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags);
+
kfree(pages);
if (!cpu_addr)
@@ -3087,6 +3245,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
atomic_add(page_count, &kctx->permanent_mapped_pages);
kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
+
return 0;
}
@@ -3168,6 +3327,17 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
vunmap(addr);
+ /* If page migration is enabled, decrement the number of VMA mappings
+ * for all physical pages. Now is a good time to do it because references
+ * haven't been released yet.
+ */
+ if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) {
+ const size_t page_count = PFN_UP(map->offset_in_page + map->size);
+ struct tagged_addr *pages_array = map->cpu_pages;
+
+ kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count);
+ }
+
if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) {
@@ -3211,79 +3381,29 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = kctx->process_mm;
- rcu_read_lock();
- mm = rcu_dereference(kctx->process_mm);
- if (mm) {
- atomic_add(pages, &kctx->nonmapped_pages);
-#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
- spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
- spin_unlock(&mm->page_table_lock);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
- int pages;
- struct mm_struct *mm;
-
- spin_lock(&kctx->mm_update_lock);
- mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
- if (!mm) {
- spin_unlock(&kctx->mm_update_lock);
+ if (unlikely(!mm))
return;
- }
-
- rcu_assign_pointer(kctx->process_mm, NULL);
- spin_unlock(&kctx->mm_update_lock);
- synchronize_rcu();
- pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+ atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
- struct kbase_context *kctx;
-
- kctx = vma->vm_private_data;
- kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
- .close = kbase_special_vm_close,
-};
-
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
- /* check that this is the only tracking page */
- spin_lock(&kctx->mm_update_lock);
- if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
- spin_unlock(&kctx->mm_update_lock);
- return -EFAULT;
- }
-
- rcu_assign_pointer(kctx->process_mm, current->mm);
-
- spin_unlock(&kctx->mm_update_lock);
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
/* no real access */
vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
- vma->vm_ops = &kbase_vm_special_ops;
- vma->vm_private_data = kctx;
return 0;
}
@@ -3556,23 +3676,27 @@ static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma)
static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
{
struct kbase_context *kctx = vma->vm_private_data;
+ struct kbase_device *kbdev;
- if (!kctx) {
+ if (unlikely(!kctx)) {
pr_debug("Close function called for the unexpected mapping");
return;
}
- if (unlikely(!kctx->csf.user_reg_vma))
- dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL");
+ kbdev = kctx->kbdev;
- kctx->csf.user_reg_vma = NULL;
+ if (unlikely(!kctx->csf.user_reg.vma))
+ dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
+ kctx->tgid, kctx->id);
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0))
- dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter");
- else
- kctx->kbdev->csf.nr_user_page_mapped--;
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
+ mutex_lock(&kbdev->csf.reg_lock);
+ list_del_init(&kctx->csf.user_reg.link);
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ kctx->csf.user_reg.vma = NULL;
+
+ /* Now as the VMA is closed, drop the reference on mali device file */
+ fput(kctx->filp);
}
/**
@@ -3617,10 +3741,11 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
unsigned long flags;
/* Few sanity checks up front */
- if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) ||
- (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) {
- pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
- current->comm, current->tgid, current->pid);
+
+ if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) ||
+ (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) {
+ pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
+ current->comm, current->tgid, current->pid);
return VM_FAULT_SIGBUS;
}
@@ -3629,22 +3754,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
mutex_lock(&kbdev->csf.reg_lock);
+
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- /* Don't map in the actual register page if GPU is powered down.
- * Always map in the dummy page in no mali builds.
+ /* Dummy page will be mapped during GPU off.
+ *
+ * In no mail builds, always map in the dummy page.
*/
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#else
- if (!kbdev->pm.backend.gpu_powered)
- pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#endif
+ if (IS_ENABLED(CONFIG_MALI_NO_MALI) || !kbdev->pm.backend.gpu_powered)
+ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page));
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list);
ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
KBASE_MEM_GROUP_CSF_FW, vma,
vma->vm_start, pfn,
vma->vm_page_prot);
+
mutex_unlock(&kbdev->csf.reg_lock);
return ret;
@@ -3657,20 +3782,6 @@ static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
.fault = kbase_csf_user_reg_vm_fault
};
-/**
- * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page.
- *
- * @kctx: Pointer of the kernel context.
- * @vma: Pointer to the struct containing the information about
- * the userspace mapping of USER page.
- *
- * Return: 0 on success, error code otherwise.
- *
- * Note:
- * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf.
- * But this function needs to be kept for backward-compatibility as old Base (<=1.12)
- * will try to mmap USER page for direct access when it creates a base context.
- */
static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
struct vm_area_struct *vma)
{
@@ -3678,7 +3789,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
struct kbase_device *kbdev = kctx->kbdev;
/* Few sanity checks */
- if (kctx->csf.user_reg_vma)
+ if (kctx->csf.user_reg.vma)
return -EBUSY;
if (nr_pages != 1)
@@ -3697,19 +3808,21 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
*/
vma->vm_flags |= VM_PFNMAP;
- kctx->csf.user_reg_vma = vma;
+ kctx->csf.user_reg.vma = vma;
mutex_lock(&kbdev->csf.reg_lock);
- kbdev->csf.nr_user_page_mapped++;
-
- if (!kbdev->csf.mali_file_inode)
- kbdev->csf.mali_file_inode = kctx->filp->f_inode;
-
- if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode))
- dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts");
-
+ kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++;
mutex_unlock(&kbdev->csf.reg_lock);
+ /* Make VMA point to the special internal file, but don't drop the
+ * reference on mali device file (that would be done later when the
+ * VMA is closed).
+ */
+ vma->vm_file = kctx->kbdev->csf.user_reg.filp;
+ get_file(vma->vm_file);
+
+ /* Also adjust the vm_pgoff */
+ vma->vm_pgoff = kctx->csf.user_reg.file_offset;
vma->vm_ops = &kbase_csf_user_reg_vm_ops;
vma->vm_private_data = kctx;
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
new file mode 100644
index 0000000..1dc76d0
--- /dev/null
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Base kernel page migration implementation.
+ */
+#include <linux/migrate.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_migrate.h>
+#include <mmu/mali_kbase_mmu.h>
+
+/* Global integer used to determine if module parameter value has been
+ * provided and if page migration feature is enabled.
+ * Feature is disabled on all platforms by default.
+ */
+int kbase_page_migration_enabled;
+module_param(kbase_page_migration_enabled, int, 0444);
+KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+static const struct movable_operations movable_ops;
+#endif
+
+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
+ u8 group_id)
+{
+ struct kbase_page_metadata *page_md =
+ kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
+
+ if (!page_md)
+ return false;
+
+ SetPagePrivate(p);
+ set_page_private(p, (unsigned long)page_md);
+ page_md->dma_addr = dma_addr;
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS);
+ page_md->vmap_count = 0;
+ page_md->group_id = group_id;
+ spin_lock_init(&page_md->migrate_lock);
+
+ lock_page(p);
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ __SetPageMovable(p, &movable_ops);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+#else
+ /* In some corner cases, the driver may attempt to allocate memory pages
+ * even before the device file is open and the mapping for address space
+ * operations is created. In that case, it is impossible to assign address
+ * space operations to memory pages: simply pretend that they are movable,
+ * even if they are not.
+ *
+ * The page will go through all state transitions but it will never be
+ * actually considered movable by the kernel. This is due to the fact that
+ * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the
+ * memory pool will always refuse to add it to the pool and schedule
+ * a worker thread to free it later.
+ *
+ * Page metadata may seem redundant in this case, but they are not,
+ * because memory pools expect metadata to be present when page migration
+ * is enabled and because the pages may always return to memory pools and
+ * gain the movable property later on in their life cycle.
+ */
+ if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) {
+ __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+ }
+#endif
+ unlock_page(p);
+
+ return true;
+}
+
+static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id)
+{
+ struct device *const dev = kbdev->dev;
+ struct kbase_page_metadata *page_md;
+ dma_addr_t dma_addr;
+
+ page_md = kbase_page_private(p);
+ if (!page_md)
+ return;
+
+ if (group_id)
+ *group_id = page_md->group_id;
+ dma_addr = kbase_dma_addr(p);
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ kfree(page_md);
+ set_page_private(p, 0);
+ ClearPagePrivate(p);
+}
+
+static void kbase_free_pages_worker(struct work_struct *work)
+{
+ struct kbase_mem_migrate *mem_migrate =
+ container_of(work, struct kbase_mem_migrate, free_pages_work);
+ struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate);
+ struct page *p, *tmp;
+ struct kbase_page_metadata *page_md;
+ LIST_HEAD(free_list);
+
+ spin_lock(&mem_migrate->free_pages_lock);
+ list_splice_init(&mem_migrate->free_pages_list, &free_list);
+ spin_unlock(&mem_migrate->free_pages_lock);
+
+ list_for_each_entry_safe(p, tmp, &free_list, lru) {
+ u8 group_id = 0;
+ list_del_init(&p->lru);
+
+ lock_page(p);
+ page_md = kbase_page_private(p);
+ if (IS_PAGE_MOVABLE(page_md->status)) {
+ __ClearPageMovable(p);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+ }
+ unlock_page(p);
+
+ kbase_free_page_metadata(kbdev, p, &group_id);
+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0);
+ }
+}
+
+void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
+{
+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+ spin_lock(&mem_migrate->free_pages_lock);
+ list_add(&p->lru, &mem_migrate->free_pages_list);
+ spin_unlock(&mem_migrate->free_pages_lock);
+}
+
+/**
+ * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped
+ * in a PGD of kbase_mmu_table.
+ *
+ * @old_page: Existing PGD page to remove
+ * @new_page: Destination for migrating the existing PGD page to
+ *
+ * Replace an existing PGD page with a new page by migrating its content. More specifically:
+ * the new page shall replace the existing PGD page in the MMU page table. Before returning,
+ * the new page shall be set as movable and not isolated, while the old page shall lose
+ * the movable property. The meta data attached to the PGD page is transferred to the
+ * new (replacement) page.
+ *
+ * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure
+ * and the migration is aborted.
+ */
+static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+ struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ dma_addr_t old_dma_addr = page_md->dma_addr;
+ dma_addr_t new_dma_addr;
+ int ret;
+
+ /* Create a new dma map for the new page */
+ new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(kbdev->dev, new_dma_addr))
+ return -ENOMEM;
+
+ /* Lock context to protect access to the page in physical allocation.
+ * This blocks the CPU page fault handler from remapping pages.
+ * Only MCU's mmut is device wide, i.e. no corresponding kctx.
+ */
+ kbase_gpu_vm_lock(kctx);
+
+ ret = kbase_mmu_migrate_page(
+ as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr,
+ new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level));
+
+ if (ret == 0) {
+ dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __ClearPageMovable(old_page);
+ ClearPagePrivate(old_page);
+ put_page(old_page);
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ __SetPageMovable(new_page, &movable_ops);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+#else
+ if (kbdev->mem_migrate.inode->i_mapping) {
+ __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+ }
+#endif
+ SetPagePrivate(new_page);
+ get_page(new_page);
+ } else
+ dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Page fault handler for CPU mapping unblocked. */
+ kbase_gpu_vm_unlock(kctx);
+
+ return ret;
+}
+
+/*
+ * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both
+ * allocated and mapped.
+ *
+ * @old_page: Page to remove.
+ * @new_page: Page to add.
+ *
+ * Replace an old page with a new page by migrating its content and all its
+ * CPU and GPU mappings. More specifically: the new page shall replace the
+ * old page in the MMU page table, as well as in the page array of the physical
+ * allocation, which is used to create CPU mappings. Before returning, the new
+ * page shall be set as movable and not isolated, while the old page shall lose
+ * the movable property.
+ */
+static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+ struct kbase_context *kctx = page_md->data.mapped.mmut->kctx;
+ dma_addr_t old_dma_addr, new_dma_addr;
+ int ret;
+
+ old_dma_addr = page_md->dma_addr;
+ new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr))
+ return -ENOMEM;
+
+ /* Lock context to protect access to array of pages in physical allocation.
+ * This blocks the CPU page fault handler from remapping pages.
+ */
+ kbase_gpu_vm_lock(kctx);
+
+ /* Unmap the old physical range. */
+ unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT,
+ PAGE_SIZE, 1);
+
+ ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)),
+ as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr,
+ MIDGARD_MMU_BOTTOMLEVEL);
+
+ if (ret == 0) {
+ dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ SetPagePrivate(new_page);
+ get_page(new_page);
+
+ /* Clear PG_movable from the old page and release reference. */
+ ClearPagePrivate(old_page);
+ __ClearPageMovable(old_page);
+ put_page(old_page);
+
+ /* Set PG_movable to the new page. */
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+ __SetPageMovable(new_page, &movable_ops);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+#else
+ if (kctx->kbdev->mem_migrate.inode->i_mapping) {
+ __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping);
+ page_md->status = PAGE_MOVABLE_SET(page_md->status);
+ }
+#endif
+ } else
+ dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Page fault handler for CPU mapping unblocked. */
+ kbase_gpu_vm_unlock(kctx);
+
+ return ret;
+}
+
+/**
+ * kbase_page_isolate - Isolate a page for migration.
+ *
+ * @p: Pointer of the page struct of page to isolate.
+ * @mode: LRU Isolation modes.
+ *
+ * Callback function for Linux to isolate a page and prepare it for migration.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
+{
+ bool status_mem_pool = false;
+ struct kbase_mem_pool *mem_pool = NULL;
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+ CSTD_UNUSED(mode);
+
+ if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
+ return false;
+
+ if (!spin_trylock(&page_md->migrate_lock))
+ return false;
+
+ if (WARN_ON(IS_PAGE_ISOLATED(page_md->status))) {
+ spin_unlock(&page_md->migrate_lock);
+ return false;
+ }
+
+ switch (PAGE_STATUS_GET(page_md->status)) {
+ case MEM_POOL:
+ /* Prepare to remove page from memory pool later only if pool is not
+ * in the process of termination.
+ */
+ mem_pool = page_md->data.mem_pool.pool;
+ status_mem_pool = true;
+ preempt_disable();
+ atomic_inc(&mem_pool->isolation_in_progress_cnt);
+ break;
+ case ALLOCATED_MAPPED:
+ /* Mark the page into isolated state, but only if it has no
+ * kernel CPU mappings
+ */
+ if (page_md->vmap_count == 0)
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
+ break;
+ case PT_MAPPED:
+ /* Mark the page into isolated state. */
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
+ break;
+ case SPILL_IN_PROGRESS:
+ case ALLOCATE_IN_PROGRESS:
+ case FREE_IN_PROGRESS:
+ break;
+ case NOT_MOVABLE:
+ /* Opportunistically clear the movable property for these pages */
+ __ClearPageMovable(p);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+ break;
+ default:
+ /* State should always fall in one of the previous cases!
+ * Also notice that FREE_ISOLATED_IN_PROGRESS or
+ * FREE_PT_ISOLATED_IN_PROGRESS is impossible because
+ * that state only applies to pages that are already isolated.
+ */
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+ break;
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+
+ /* If the page is still in the memory pool: try to remove it. This will fail
+ * if pool lock is taken which could mean page no longer exists in pool.
+ */
+ if (status_mem_pool) {
+ if (!spin_trylock(&mem_pool->pool_lock)) {
+ atomic_dec(&mem_pool->isolation_in_progress_cnt);
+ preempt_enable();
+ return false;
+ }
+
+ spin_lock(&page_md->migrate_lock);
+ /* Check status again to ensure page has not been removed from memory pool. */
+ if (PAGE_STATUS_GET(page_md->status) == MEM_POOL) {
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
+ list_del_init(&p->lru);
+ mem_pool->cur_size--;
+ }
+ spin_unlock(&page_md->migrate_lock);
+ spin_unlock(&mem_pool->pool_lock);
+ atomic_dec(&mem_pool->isolation_in_progress_cnt);
+ preempt_enable();
+ }
+
+ return IS_PAGE_ISOLATED(page_md->status);
+}
+
+/**
+ * kbase_page_migrate - Migrate content of old page to new page provided.
+ *
+ * @mapping: Pointer to address_space struct associated with pages.
+ * @new_page: Pointer to the page struct of new page.
+ * @old_page: Pointer to the page struct of old page.
+ * @mode: Mode to determine if migration will be synchronised.
+ *
+ * Callback function for Linux to migrate the content of the old page to the
+ * new page provided.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+static int kbase_page_migrate(struct address_space *mapping, struct page *new_page,
+ struct page *old_page, enum migrate_mode mode)
+#else
+static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode)
+#endif
+{
+ int err = 0;
+ bool status_mem_pool = false;
+ bool status_free_pt_isolated_in_progress = false;
+ bool status_free_isolated_in_progress = false;
+ bool status_pt_mapped = false;
+ bool status_mapped = false;
+ bool status_not_movable = false;
+ struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+ struct kbase_device *kbdev = NULL;
+
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+ CSTD_UNUSED(mapping);
+#endif
+ CSTD_UNUSED(mode);
+
+ if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
+ return -EINVAL;
+
+ if (!spin_trylock(&page_md->migrate_lock))
+ return -EAGAIN;
+
+ if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) {
+ spin_unlock(&page_md->migrate_lock);
+ return -EINVAL;
+ }
+
+ switch (PAGE_STATUS_GET(page_md->status)) {
+ case MEM_POOL:
+ status_mem_pool = true;
+ kbdev = page_md->data.mem_pool.kbdev;
+ break;
+ case ALLOCATED_MAPPED:
+ status_mapped = true;
+ break;
+ case PT_MAPPED:
+ status_pt_mapped = true;
+ break;
+ case FREE_ISOLATED_IN_PROGRESS:
+ status_free_isolated_in_progress = true;
+ kbdev = page_md->data.free_isolated.kbdev;
+ break;
+ case FREE_PT_ISOLATED_IN_PROGRESS:
+ status_free_pt_isolated_in_progress = true;
+ kbdev = page_md->data.free_pt_isolated.kbdev;
+ break;
+ case NOT_MOVABLE:
+ status_not_movable = true;
+ break;
+ default:
+ /* State should always fall in one of the previous cases! */
+ err = -EAGAIN;
+ break;
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+
+ if (status_mem_pool || status_free_isolated_in_progress ||
+ status_free_pt_isolated_in_progress) {
+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+ kbase_free_page_metadata(kbdev, old_page, NULL);
+ __ClearPageMovable(old_page);
+ put_page(old_page);
+
+ /* Just free new page to avoid lock contention. */
+ INIT_LIST_HEAD(&new_page->lru);
+ get_page(new_page);
+ set_page_private(new_page, 0);
+ kbase_free_page_later(kbdev, new_page);
+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+ } else if (status_not_movable) {
+ err = -EINVAL;
+ } else if (status_mapped) {
+ err = kbasep_migrate_page_allocated_mapped(old_page, new_page);
+ } else if (status_pt_mapped) {
+ err = kbasep_migrate_page_pt_mapped(old_page, new_page);
+ }
+
+ /* While we want to preserve the movability of pages for which we return
+ * EAGAIN, according to the kernel docs, movable pages for which a critical
+ * error is returned are called putback on, which may not be what we
+ * expect.
+ */
+ if (err < 0 && err != -EAGAIN) {
+ __ClearPageMovable(old_page);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+ }
+
+ return err;
+}
+
+/**
+ * kbase_page_putback - Return isolated page back to kbase.
+ *
+ * @p: Pointer of the page struct of page.
+ *
+ * Callback function for Linux to return isolated page back to kbase. This
+ * will only be called for a page that has been isolated but failed to
+ * migrate. This function will put back the given page to the state it was
+ * in before it was isolated.
+ */
+static void kbase_page_putback(struct page *p)
+{
+ bool status_mem_pool = false;
+ bool status_free_isolated_in_progress = false;
+ bool status_free_pt_isolated_in_progress = false;
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+ struct kbase_device *kbdev = NULL;
+
+ /* If we don't have page metadata, the page may not belong to the
+ * driver or may already have been freed, and there's nothing we can do
+ */
+ if (!page_md)
+ return;
+
+ spin_lock(&page_md->migrate_lock);
+
+ if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) {
+ spin_unlock(&page_md->migrate_lock);
+ return;
+ }
+
+ switch (PAGE_STATUS_GET(page_md->status)) {
+ case MEM_POOL:
+ status_mem_pool = true;
+ kbdev = page_md->data.mem_pool.kbdev;
+ break;
+ case ALLOCATED_MAPPED:
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+ break;
+ case PT_MAPPED:
+ case NOT_MOVABLE:
+ /* Pages should no longer be isolated if they are in a stable state
+ * and used by the driver.
+ */
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+ break;
+ case FREE_ISOLATED_IN_PROGRESS:
+ status_free_isolated_in_progress = true;
+ kbdev = page_md->data.free_isolated.kbdev;
+ break;
+ case FREE_PT_ISOLATED_IN_PROGRESS:
+ status_free_pt_isolated_in_progress = true;
+ kbdev = page_md->data.free_pt_isolated.kbdev;
+ break;
+ default:
+ /* State should always fall in one of the previous cases! */
+ break;
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+
+ /* If page was in a memory pool then just free it to avoid lock contention. The
+ * same is also true to status_free_pt_isolated_in_progress.
+ */
+ if (status_mem_pool || status_free_isolated_in_progress ||
+ status_free_pt_isolated_in_progress) {
+ __ClearPageMovable(p);
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+ if (!WARN_ON_ONCE(!kbdev)) {
+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+ kbase_free_page_later(kbdev, p);
+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+ }
+ }
+}
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+static const struct movable_operations movable_ops = {
+ .isolate_page = kbase_page_isolate,
+ .migrate_page = kbase_page_migrate,
+ .putback_page = kbase_page_putback,
+};
+#else
+static const struct address_space_operations kbase_address_space_ops = {
+ .isolate_page = kbase_page_isolate,
+ .migratepage = kbase_page_migrate,
+ .putback_page = kbase_page_putback,
+};
+#endif
+
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp)
+{
+ mutex_lock(&kbdev->fw_load_lock);
+
+ if (filp) {
+ filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops;
+
+ if (!kbdev->mem_migrate.inode) {
+ kbdev->mem_migrate.inode = filp->f_inode;
+ /* This reference count increment is balanced by iput()
+ * upon termination.
+ */
+ atomic_inc(&filp->f_inode->i_count);
+ } else {
+ WARN_ON(kbdev->mem_migrate.inode != filp->f_inode);
+ }
+ }
+
+ mutex_unlock(&kbdev->fw_load_lock);
+}
+#endif
+
+void kbase_mem_migrate_init(struct kbase_device *kbdev)
+{
+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+ if (kbase_page_migration_enabled < 0)
+ kbase_page_migration_enabled = 0;
+
+ spin_lock_init(&mem_migrate->free_pages_lock);
+ INIT_LIST_HEAD(&mem_migrate->free_pages_list);
+
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+ mem_migrate->inode = NULL;
+#endif
+ mem_migrate->free_pages_workq =
+ alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+ INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker);
+}
+
+void kbase_mem_migrate_term(struct kbase_device *kbdev)
+{
+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+ if (mem_migrate->free_pages_workq)
+ destroy_workqueue(mem_migrate->free_pages_workq);
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+ iput(mem_migrate->inode);
+#endif
+}
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
new file mode 100644
index 0000000..76bbc99
--- /dev/null
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Base kernel page migration implementation.
+ */
+
+#define PAGE_STATUS_MASK ((u8)0x3F)
+#define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK)
+#define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK))
+
+#define PAGE_ISOLATE_SHIFT (7)
+#define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT)
+#define PAGE_ISOLATE_SET(status, value) \
+ ((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT))
+#define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK))
+
+#define PAGE_MOVABLE_SHIFT (6)
+#define PAGE_MOVABLE_MASK ((u8)1 << PAGE_MOVABLE_SHIFT)
+#define PAGE_MOVABLE_CLEAR(status) ((status) & ~PAGE_MOVABLE_MASK)
+#define PAGE_MOVABLE_SET(status) (status | PAGE_MOVABLE_MASK)
+
+#define IS_PAGE_MOVABLE(status) ((bool)(status & PAGE_MOVABLE_MASK))
+
+/* Global integer used to determine if module parameter value has been
+ * provided and if page migration feature is enabled.
+ */
+extern int kbase_page_migration_enabled;
+
+/**
+ * kbase_alloc_page_metadata - Allocate and initialize page metadata
+ * @kbdev: Pointer to kbase device.
+ * @p: Page to assign metadata to.
+ * @dma_addr: DMA address mapped to paged.
+ * @group_id: Memory group ID associated with the entity that is
+ * allocating the page metadata.
+ *
+ * This will allocate memory for the page's metadata, initialize it and
+ * assign a reference to the page's private field. Importantly, once
+ * the metadata is set and ready this function will mark the page as
+ * movable.
+ *
+ * Return: true if successful or false otherwise.
+ */
+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
+ u8 group_id);
+
+/**
+ * kbase_free_page_later - Defer freeing of given page.
+ * @kbdev: Pointer to kbase device
+ * @p: Page to free
+ *
+ * This will add given page to a list of pages which will be freed at
+ * a later time.
+ */
+void kbase_free_page_later(struct kbase_device *kbdev, struct page *p);
+
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+/*
+ * kbase_mem_migrate_set_address_space_ops - Set address space operations
+ *
+ * @kbdev: Pointer to object representing an instance of GPU platform device.
+ * @filp: Pointer to the struct file corresponding to device file
+ * /dev/malixx instance, passed to the file's open method.
+ *
+ * Assign address space operations to the given file struct @filp and
+ * add a reference to @kbdev.
+ */
+void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp);
+#endif
+
+/*
+ * kbase_mem_migrate_init - Initialise kbase page migration
+ *
+ * @kbdev: Pointer to kbase device
+ *
+ * Enables page migration by default based on GPU and setup work queue to
+ * defer freeing pages during page migration callbacks.
+ */
+void kbase_mem_migrate_init(struct kbase_device *kbdev);
+
+/*
+ * kbase_mem_migrate_term - Terminate kbase page migration
+ *
+ * @kbdev: Pointer to kbase device
+ *
+ * This will flush any work left to free pages from page migration
+ * and destroy workqueue associated.
+ */
+void kbase_mem_migrate_term(struct kbase_device *kbdev);
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index 5e9a276..58716be 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -21,6 +21,7 @@
#include <mali_kbase.h>
#include <linux/mm.h>
+#include <linux/migrate.h>
#include <linux/dma-mapping.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
@@ -128,9 +129,43 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
return kbase_mem_pool_size(pool) == 0;
}
+static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p,
+ struct list_head *page_list, size_t *list_size)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+ bool not_movable = false;
+
+ lockdep_assert_held(&pool->pool_lock);
+
+ /* Free the page instead of adding it to the pool if it's not movable.
+ * Only update page status and add the page to the memory pool if
+ * it is not isolated.
+ */
+ spin_lock(&page_md->migrate_lock);
+ if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+ not_movable = true;
+ } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
+ page_md->data.mem_pool.pool = pool;
+ page_md->data.mem_pool.kbdev = pool->kbdev;
+ list_add(&p->lru, page_list);
+ (*list_size)++;
+ }
+ spin_unlock(&page_md->migrate_lock);
+
+ if (not_movable) {
+ kbase_free_page_later(pool->kbdev, p);
+ pool_dbg(pool, "skipping a not movable page\n");
+ }
+
+ return not_movable;
+}
+
static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
struct page *p)
{
+ bool queue_work_to_free = false;
+
if (mali_kbase_mem_pool_order_pages_enabled) {
kbase_mem_pool_ordered_add_locked(pool, p);
return;
@@ -138,8 +173,19 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
lockdep_assert_held(&pool->pool_lock);
- list_add(&p->lru, &pool->page_list);
- pool->cur_size++;
+ if (!pool->order && kbase_page_migration_enabled) {
+ if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
+ queue_work_to_free = true;
+ } else {
+ list_add(&p->lru, &pool->page_list);
+ pool->cur_size++;
+ }
+
+ if (queue_work_to_free) {
+ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+ }
pool_dbg(pool, "added page\n");
}
@@ -154,10 +200,28 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
struct list_head *page_list, size_t nr_pages)
{
+ bool queue_work_to_free = false;
+
lockdep_assert_held(&pool->pool_lock);
- list_splice(page_list, &pool->page_list);
- pool->cur_size += nr_pages;
+ if (!pool->order && kbase_page_migration_enabled) {
+ struct page *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, page_list, lru) {
+ list_del_init(&p->lru);
+ if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
+ queue_work_to_free = true;
+ }
+ } else {
+ list_splice(page_list, &pool->page_list);
+ pool->cur_size += nr_pages;
+ }
+
+ if (queue_work_to_free) {
+ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+ }
pool_dbg(pool, "added %zu pages\n", nr_pages);
}
@@ -170,7 +234,8 @@ static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
kbase_mem_pool_unlock(pool);
}
-static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool,
+ enum kbase_page_status status)
{
struct page *p;
@@ -180,6 +245,16 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
return NULL;
p = list_first_entry(&pool->page_list, struct page, lru);
+
+ if (!pool->order && kbase_page_migration_enabled) {
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+ spin_lock(&page_md->migrate_lock);
+ WARN_ON(PAGE_STATUS_GET(page_md->status) != (u8)MEM_POOL);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
+ spin_unlock(&page_md->migrate_lock);
+ }
+
list_del_init(&p->lru);
pool->cur_size--;
@@ -188,12 +263,13 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
return p;
}
-static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool,
+ enum kbase_page_status status)
{
struct page *p;
kbase_mem_pool_lock(pool);
- p = kbase_mem_pool_remove_locked(pool);
+ p = kbase_mem_pool_remove_locked(pool, status);
kbase_mem_pool_unlock(pool);
return p;
@@ -203,9 +279,9 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
struct page *p)
{
struct device *dev = pool->kbdev->dev;
+ dma_addr_t dma_addr = pool->order ? kbase_dma_addr_as_priv(p) : kbase_dma_addr(p);
- dma_sync_single_for_device(dev, kbase_dma_addr(p),
- (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
}
static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
@@ -236,7 +312,7 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
{
struct page *p;
- gfp_t gfp = GFP_HIGHUSER | __GFP_ZERO;
+ gfp_t gfp = __GFP_ZERO;
struct kbase_device *const kbdev = pool->kbdev;
struct device *const dev = kbdev->dev;
dma_addr_t dma_addr;
@@ -244,7 +320,9 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
/* don't warn on higher order failures */
if (pool->order)
- gfp |= __GFP_NOWARN;
+ gfp |= GFP_HIGHUSER | __GFP_NOWARN;
+ else
+ gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev,
pool->group_id, gfp, pool->order);
@@ -260,30 +338,59 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
return NULL;
}
- WARN_ON(dma_addr != page_to_phys(p));
- for (i = 0; i < (1u << pool->order); i++)
- kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i);
+ /* Setup page metadata for 4KB pages when page migration is enabled */
+ if (!pool->order && kbase_page_migration_enabled) {
+ INIT_LIST_HEAD(&p->lru);
+ if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) {
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p,
+ pool->order);
+ return NULL;
+ }
+ } else {
+ WARN_ON(dma_addr != page_to_phys(p));
+ for (i = 0; i < (1u << pool->order); i++)
+ kbase_set_dma_addr_as_priv(p + i, dma_addr + PAGE_SIZE * i);
+ }
return p;
}
-static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
- struct page *p)
+static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool)
{
- struct kbase_device *const kbdev = pool->kbdev;
- struct device *const dev = kbdev->dev;
- dma_addr_t dma_addr = kbase_dma_addr(p);
- int i;
+ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+ if (!pool->order && kbase_page_migration_enabled)
+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+}
- dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order),
- DMA_BIDIRECTIONAL);
- for (i = 0; i < (1u << pool->order); i++)
- kbase_clear_dma_addr(p+i);
+void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p)
+{
+ struct kbase_device *kbdev;
+
+ if (WARN_ON(!pool))
+ return;
+ if (WARN_ON(!p))
+ return;
- kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev,
- pool->group_id, p, pool->order);
+ kbdev = pool->kbdev;
- pool_dbg(pool, "freed page to kernel\n");
+ if (!pool->order && kbase_page_migration_enabled) {
+ kbase_free_page_later(kbdev, p);
+ pool_dbg(pool, "page to be freed to kernel later\n");
+ } else {
+ int i;
+ dma_addr_t dma_addr = kbase_dma_addr_as_priv(p);
+
+ for (i = 0; i < (1u << pool->order); i++)
+ kbase_clear_dma_addr_as_priv(p + i);
+
+ dma_unmap_page(kbdev->dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+
+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order);
+
+ pool_dbg(pool, "freed page to kernel\n");
+ }
}
static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
@@ -295,10 +402,13 @@ static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
lockdep_assert_held(&pool->pool_lock);
for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
- p = kbase_mem_pool_remove_locked(pool);
+ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS);
kbase_mem_pool_free_page(pool, p);
}
+ /* Freeing of pages will be deferred when page migration is enabled. */
+ enqueue_free_pool_pages_work(pool);
+
return i;
}
@@ -353,6 +463,7 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
return 0;
}
+KBASE_EXPORT_TEST_API(kbase_mem_pool_grow);
void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
{
@@ -408,6 +519,9 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
kbase_mem_pool_lock(pool);
if (pool->dont_reclaim && !pool->dying) {
kbase_mem_pool_unlock(pool);
+ /* Tell shrinker to skip reclaim
+ * even though freeable pages are available
+ */
return 0;
}
pool_size = kbase_mem_pool_size(pool);
@@ -427,7 +541,10 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
kbase_mem_pool_lock(pool);
if (pool->dont_reclaim && !pool->dying) {
kbase_mem_pool_unlock(pool);
- return 0;
+ /* Tell shrinker that reclaim can't be made and
+ * do not attempt again for this reclaim context.
+ */
+ return SHRINK_STOP;
}
pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
@@ -441,12 +558,9 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
return freed;
}
-int kbase_mem_pool_init(struct kbase_mem_pool *pool,
- const struct kbase_mem_pool_config *config,
- unsigned int order,
- int group_id,
- struct kbase_device *kbdev,
- struct kbase_mem_pool *next_pool)
+int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config,
+ unsigned int order, int group_id, struct kbase_device *kbdev,
+ struct kbase_mem_pool *next_pool)
{
if (WARN_ON(group_id < 0) ||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
@@ -460,6 +574,7 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool,
pool->kbdev = kbdev;
pool->next_pool = next_pool;
pool->dying = false;
+ atomic_set(&pool->isolation_in_progress_cnt, 0);
spin_lock_init(&pool->pool_lock);
INIT_LIST_HEAD(&pool->page_list);
@@ -471,12 +586,17 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool,
* struct shrinker does not define batch
*/
pool->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
register_shrinker(&pool->reclaim);
+#else
+ register_shrinker(&pool->reclaim, "mali-mem-pool");
+#endif
pool_dbg(pool, "initialized\n");
return 0;
}
+KBASE_EXPORT_TEST_API(kbase_mem_pool_init);
void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool)
{
@@ -508,15 +628,17 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
/* Zero pages first without holding the next_pool lock */
for (i = 0; i < nr_to_spill; i++) {
- p = kbase_mem_pool_remove_locked(pool);
- list_add(&p->lru, &spill_list);
+ p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS);
+ if (p)
+ list_add(&p->lru, &spill_list);
}
}
while (!kbase_mem_pool_is_empty(pool)) {
/* Free remaining pages to kernel */
- p = kbase_mem_pool_remove_locked(pool);
- list_add(&p->lru, &free_list);
+ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS);
+ if (p)
+ list_add(&p->lru, &free_list);
}
kbase_mem_pool_unlock(pool);
@@ -549,8 +671,18 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
kbase_mem_pool_free_page(pool, p);
}
+ /* Freeing of pages will be deferred when page migration is enabled. */
+ enqueue_free_pool_pages_work(pool);
+
+ /* Before returning wait to make sure there are no pages undergoing page isolation
+ * which will require reference to this pool.
+ */
+ while (atomic_read(&pool->isolation_in_progress_cnt))
+ cpu_relax();
+
pool_dbg(pool, "terminated\n");
}
+KBASE_EXPORT_TEST_API(kbase_mem_pool_term);
struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
{
@@ -558,7 +690,7 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
do {
pool_dbg(pool, "alloc()\n");
- p = kbase_mem_pool_remove(pool);
+ p = kbase_mem_pool_remove(pool, ALLOCATE_IN_PROGRESS);
if (p)
return p;
@@ -571,17 +703,10 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
{
- struct page *p;
-
lockdep_assert_held(&pool->pool_lock);
pool_dbg(pool, "alloc_locked()\n");
- p = kbase_mem_pool_remove_locked(pool);
-
- if (p)
- return p;
-
- return NULL;
+ return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
}
void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
@@ -608,6 +733,8 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
} else {
/* Free page */
kbase_mem_pool_free_page(pool, p);
+ /* Freeing of pages will be deferred when page migration is enabled. */
+ enqueue_free_pool_pages_work(pool);
}
}
@@ -632,6 +759,8 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
} else {
/* Free page */
kbase_mem_pool_free_page(pool, p);
+ /* Freeing of pages will be deferred when page migration is enabled. */
+ enqueue_free_pool_pages_work(pool);
}
}
@@ -656,10 +785,12 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
/* Get pages from this pool */
kbase_mem_pool_lock(pool);
nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+
while (nr_from_pool--) {
int j;
- p = kbase_mem_pool_remove_locked(pool);
+ p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
+
if (pool->order) {
pages[i++] = as_tagged_tag(page_to_phys(p),
HUGE_HEAD | HUGE_PAGE);
@@ -753,7 +884,7 @@ int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
for (i = 0; i < nr_pages_internal; i++) {
int j;
- p = kbase_mem_pool_remove_locked(pool);
+ p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
if (pool->order) {
*pages++ = as_tagged_tag(page_to_phys(p),
HUGE_HEAD | HUGE_PAGE);
@@ -860,6 +991,7 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
size_t nr_to_pool;
LIST_HEAD(to_pool_list);
size_t i = 0;
+ bool pages_released = false;
if (mali_kbase_mem_pool_order_pages_enabled) {
kbase_mem_pool_ordered_free_pages(pool, nr_pages, pages, dirty,
@@ -898,13 +1030,17 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
pages[i] = as_tagged(0);
continue;
}
-
p = as_page(pages[i]);
kbase_mem_pool_free_page(pool, p);
pages[i] = as_tagged(0);
+ pages_released = true;
}
+ /* Freeing of pages will be deferred when page migration is enabled. */
+ if (pages_released)
+ enqueue_free_pool_pages_work(pool);
+
pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
}
@@ -917,6 +1053,7 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
size_t nr_to_pool;
LIST_HEAD(to_pool_list);
size_t i = 0;
+ bool pages_released = false;
if (mali_kbase_mem_pool_order_pages_enabled) {
kbase_mem_pool_ordered_free_pages_locked(pool, nr_pages, pages,
@@ -953,8 +1090,13 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
kbase_mem_pool_free_page(pool, p);
pages[i] = as_tagged(0);
+ pages_released = true;
}
+ /* Freeing of pages will be deferred when page migration is enabled. */
+ if (pages_released)
+ enqueue_free_pool_pages_work(pool);
+
pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages);
}
diff --git a/mali_kbase/mali_kbase_mem_pool_group.c b/mali_kbase/mali_kbase_mem_pool_group.c
index 8d7bb4d..49c4b04 100644
--- a/mali_kbase/mali_kbase_mem_pool_group.c
+++ b/mali_kbase/mali_kbase_mem_pool_group.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,29 +43,22 @@ void kbase_mem_pool_group_config_set_max_size(
}
}
-int kbase_mem_pool_group_init(
- struct kbase_mem_pool_group *const mem_pools,
- struct kbase_device *const kbdev,
- const struct kbase_mem_pool_group_config *const configs,
- struct kbase_mem_pool_group *next_pools)
+int kbase_mem_pool_group_init(struct kbase_mem_pool_group *const mem_pools,
+ struct kbase_device *const kbdev,
+ const struct kbase_mem_pool_group_config *const configs,
+ struct kbase_mem_pool_group *next_pools)
{
int gid, err = 0;
for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
- err = kbase_mem_pool_init(&mem_pools->small[gid],
- &configs->small[gid],
- KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
- gid,
- kbdev,
- next_pools ? &next_pools->small[gid] : NULL);
+ err = kbase_mem_pool_init(&mem_pools->small[gid], &configs->small[gid],
+ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, gid, kbdev,
+ next_pools ? &next_pools->small[gid] : NULL);
if (!err) {
- err = kbase_mem_pool_init(&mem_pools->large[gid],
- &configs->large[gid],
- KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
- gid,
- kbdev,
- next_pools ? &next_pools->large[gid] : NULL);
+ err = kbase_mem_pool_init(&mem_pools->large[gid], &configs->large[gid],
+ KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, gid, kbdev,
+ next_pools ? &next_pools->large[gid] : NULL);
if (err)
kbase_mem_pool_term(&mem_pools->small[gid]);
}
diff --git a/mali_kbase/mali_kbase_mem_pool_group.h b/mali_kbase/mali_kbase_mem_pool_group.h
index f97f47d..fe8ce77 100644
--- a/mali_kbase/mali_kbase_mem_pool_group.h
+++ b/mali_kbase/mali_kbase_mem_pool_group.h
@@ -86,10 +86,9 @@ void kbase_mem_pool_group_config_set_max_size(
*
* Return: 0 on success, otherwise a negative error code
*/
-int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools,
- struct kbase_device *kbdev,
- const struct kbase_mem_pool_group_config *configs,
- struct kbase_mem_pool_group *next_pools);
+int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, struct kbase_device *kbdev,
+ const struct kbase_mem_pool_group_config *configs,
+ struct kbase_mem_pool_group *next_pools);
/**
* kbase_mem_pool_group_mark_dying - Mark a set of memory pools as dying
diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c
index 90406b2..b65f9e7 100644
--- a/mali_kbase/mali_kbase_pbha.c
+++ b/mali_kbase/mali_kbase_pbha.c
@@ -209,20 +209,13 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev)
}
}
-int kbase_pbha_read_dtb(struct kbase_device *kbdev)
+static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
+ const struct device_node *pbha_node)
{
u32 dtb_data[SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE];
- const struct device_node *pbha_node;
int sz, i;
bool valid = true;
- if (!kbasep_pbha_supported(kbdev))
- return 0;
-
- pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha");
- if (!pbha_node)
- return 0;
-
sz = of_property_count_elems_of_size(pbha_node, "int_id_override",
sizeof(u32));
if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) {
@@ -256,3 +249,58 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev)
}
return 0;
}
+
+#if MALI_USE_CSF
+static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
+ const struct device_node *pbha_node)
+{
+ u32 bits;
+ int err;
+
+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
+ return 0;
+
+ err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+
+ if (err < 0) {
+ if (err != -EINVAL) {
+ dev_err(kbdev->dev,
+ "DTB value for propagate_bits is improperly formed (err=%d)\n",
+ err);
+ return err;
+ }
+ }
+
+ if (bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) {
+ dev_err(kbdev->dev, "Bad DTB value for propagate_bits: 0x%x\n", bits);
+ return -EINVAL;
+ }
+
+ kbdev->pbha_propagate_bits = bits;
+ return 0;
+}
+#endif
+
+int kbase_pbha_read_dtb(struct kbase_device *kbdev)
+{
+ const struct device_node *pbha_node;
+ int err;
+
+ if (!kbasep_pbha_supported(kbdev))
+ return 0;
+
+ pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha");
+ if (!pbha_node)
+ return 0;
+
+ err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node);
+
+#if MALI_USE_CSF
+ if (err < 0)
+ return err;
+
+ err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node);
+#endif
+
+ return err;
+}
diff --git a/mali_kbase/mali_kbase_pbha_debugfs.c b/mali_kbase/mali_kbase_pbha_debugfs.c
index 4130dd6..1cc29c7 100644
--- a/mali_kbase/mali_kbase_pbha_debugfs.c
+++ b/mali_kbase/mali_kbase_pbha_debugfs.c
@@ -20,13 +20,15 @@
*/
#include "mali_kbase_pbha_debugfs.h"
-
#include "mali_kbase_pbha.h"
-
#include <device/mali_kbase_device.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase.h>
+#if MALI_USE_CSF
+#include "backend/gpu/mali_kbase_pm_internal.h"
+#endif
+
static int int_id_overrides_show(struct seq_file *sfile, void *data)
{
struct kbase_device *kbdev = sfile->private;
@@ -108,6 +110,90 @@ static int int_id_overrides_open(struct inode *in, struct file *file)
return single_open(file, int_id_overrides_show, in->i_private);
}
+#if MALI_USE_CSF
+/**
+ * propagate_bits_show - Read PBHA bits from L2_CONFIG out to debugfs.
+ *
+ * @sfile: The debugfs entry.
+ * @data: Data associated with the entry.
+ *
+ * Return: 0 in all cases.
+ */
+static int propagate_bits_show(struct seq_file *sfile, void *data)
+{
+ struct kbase_device *kbdev = sfile->private;
+ u32 l2_config_val;
+
+ kbase_csf_scheduler_pm_active(kbdev);
+ kbase_pm_wait_for_l2_powered(kbdev);
+ l2_config_val = L2_CONFIG_PBHA_HWU_GET(kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)));
+ kbase_csf_scheduler_pm_idle(kbdev);
+
+ seq_printf(sfile, "PBHA Propagate Bits: 0x%x\n", l2_config_val);
+ return 0;
+}
+
+static int propagate_bits_open(struct inode *in, struct file *file)
+{
+ return single_open(file, propagate_bits_show, in->i_private);
+}
+
+/**
+ * propagate_bits_write - Write input value from debugfs to PBHA bits of L2_CONFIG register.
+ *
+ * @file: Pointer to file struct of debugfs node.
+ * @ubuf: Pointer to user buffer with value to be written.
+ * @count: Size of user buffer.
+ * @ppos: Not used.
+ *
+ * Return: Size of buffer passed in when successful, but error code E2BIG/EINVAL otherwise.
+ */
+static ssize_t propagate_bits_write(struct file *file, const char __user *ubuf, size_t count,
+ loff_t *ppos)
+{
+ struct seq_file *sfile = file->private_data;
+ struct kbase_device *kbdev = sfile->private;
+ /* 32 characters should be enough for the input string in any base */
+ char raw_str[32];
+ unsigned long propagate_bits;
+
+ if (count >= sizeof(raw_str))
+ return -E2BIG;
+ if (copy_from_user(raw_str, ubuf, count))
+ return -EINVAL;
+ raw_str[count] = '\0';
+ if (kstrtoul(raw_str, 0, &propagate_bits))
+ return -EINVAL;
+
+ /* Check propagate_bits input argument does not
+ * exceed the maximum size of the propagate_bits mask.
+ */
+ if (propagate_bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT))
+ return -EINVAL;
+ /* Cast to u8 is safe as check is done already to ensure size is within
+ * correct limits.
+ */
+ kbdev->pbha_propagate_bits = (u8)propagate_bits;
+
+ /* GPU Reset will set new values in L2 config */
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) {
+ kbase_reset_gpu(kbdev);
+ kbase_reset_gpu_wait(kbdev);
+ }
+
+ return count;
+}
+
+static const struct file_operations pbha_propagate_bits_fops = {
+ .owner = THIS_MODULE,
+ .open = propagate_bits_open,
+ .read = seq_read,
+ .write = propagate_bits_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif /* MALI_USE_CSF */
+
static const struct file_operations pbha_int_id_overrides_fops = {
.owner = THIS_MODULE,
.open = int_id_overrides_open,
@@ -132,5 +218,10 @@ void kbase_pbha_debugfs_init(struct kbase_device *kbdev)
debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir,
kbdev, &pbha_int_id_overrides_fops);
+#if MALI_USE_CSF
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
+ debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev,
+ &pbha_propagate_bits_fops);
+#endif /* MALI_USE_CSF */
}
}
diff --git a/mali_kbase/mali_kbase_platform_fake.c b/mali_kbase/mali_kbase_platform_fake.c
index 761a636..265c676 100644
--- a/mali_kbase/mali_kbase_platform_fake.c
+++ b/mali_kbase/mali_kbase_platform_fake.c
@@ -32,12 +32,12 @@
*/
#include <mali_kbase_config.h>
+#ifndef CONFIG_OF
+
#define PLATFORM_CONFIG_RESOURCE_COUNT 4
-#define PLATFORM_CONFIG_IRQ_RES_COUNT 3
static struct platform_device *mali_device;
-#ifndef CONFIG_OF
/**
* kbasep_config_parse_io_resources - Convert data in struct kbase_io_resources
* struct to Linux-specific resources
@@ -73,14 +73,11 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io
linux_resources[3].end = io_resources->gpu_irq_number;
linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
}
-#endif /* CONFIG_OF */
int kbase_platform_register(void)
{
struct kbase_platform_config *config;
-#ifndef CONFIG_OF
struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
-#endif
int err;
config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
@@ -93,7 +90,6 @@ int kbase_platform_register(void)
if (mali_device == NULL)
return -ENOMEM;
-#ifndef CONFIG_OF
kbasep_config_parse_io_resources(config->io_resources, resources);
err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
if (err) {
@@ -101,7 +97,6 @@ int kbase_platform_register(void)
mali_device = NULL;
return err;
}
-#endif /* CONFIG_OF */
err = platform_device_add(mali_device);
if (err) {
@@ -120,3 +115,5 @@ void kbase_platform_unregister(void)
platform_device_unregister(mali_device);
}
EXPORT_SYMBOL(kbase_platform_unregister);
+
+#endif /* CONFIG_OF */
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index 6a9a941..bfd5b7e 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,7 @@
#include <gpu/mali_kbase_gpu_regmap.h>
#include <mali_kbase_vinstr.h>
#include <mali_kbase_kinstr_prfcnt.h>
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
diff --git a/mali_kbase/mali_kbase_refcount_defs.h b/mali_kbase/mali_kbase_refcount_defs.h
new file mode 100644
index 0000000..c517a2d
--- /dev/null
+++ b/mali_kbase/mali_kbase_refcount_defs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_REFCOUNT_DEFS_H_
+#define _KBASE_REFCOUNT_DEFS_H_
+
+/*
+ * The Refcount API is available from 4.11 onwards
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#include <linux/version.h>
+#include <linux/types.h>
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+
+#define kbase_refcount_t atomic_t
+#define kbase_refcount_read(x) atomic_read(x)
+#define kbase_refcount_set(x, v) atomic_set(x, v)
+#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x)
+#define kbase_refcount_dec(x) atomic_dec(x)
+#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x)
+#define kbase_refcount_inc(x) atomic_inc(x)
+
+#else
+
+#include <linux/refcount.h>
+
+#define kbase_refcount_t refcount_t
+#define kbase_refcount_read(x) refcount_read(x)
+#define kbase_refcount_set(x, v) refcount_set(x, v)
+#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x)
+#define kbase_refcount_dec(x) refcount_dec(x)
+#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x)
+#define kbase_refcount_inc(x) refcount_inc(x)
+
+#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */
+
+#endif /* _KBASE_REFCOUNT_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_reset_gpu.h b/mali_kbase/mali_kbase_reset_gpu.h
index c0f20d5..5063b64 100644
--- a/mali_kbase/mali_kbase_reset_gpu.h
+++ b/mali_kbase/mali_kbase_reset_gpu.h
@@ -248,6 +248,18 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev);
bool kbase_reset_gpu_is_active(struct kbase_device *kbdev);
/**
+ * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending
+ *
+ * @kbdev: Device pointer
+ *
+ * Note that unless appropriate locks are held when using this function, the
+ * state could change immediately afterwards.
+ *
+ * Return: True if the GPU reset isn't pending.
+ */
+bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev);
+
+/**
* kbase_reset_gpu_wait - Wait for a GPU reset to complete
* @kbdev: Device pointer
*
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index ad9147c..d65ff2d 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -23,7 +23,7 @@
#include <linux/dma-buf.h>
#include <asm/cacheflush.h>
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
#include <mali_kbase_sync.h>
#include <mali_kbase_fence.h>
#endif
@@ -206,7 +206,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
return 0;
}
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
/* Called by the explicit fence mechanism when a fence wait has completed */
void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
{
@@ -676,8 +676,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
{
struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
- unsigned long nr_pages =
- alloc->imported.user_buf.nr_pages;
+ const unsigned long nr_pages = alloc->imported.user_buf.nr_pages;
+ const unsigned long start = alloc->imported.user_buf.address;
if (alloc->imported.user_buf.mm != current->mm) {
ret = -EINVAL;
@@ -689,11 +689,9 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
ret = -ENOMEM;
goto out_unlock;
}
-
- ret = get_user_pages_fast(
- alloc->imported.user_buf.address,
- nr_pages, 0,
- buffers[i].extres_pages);
+ kbase_gpu_vm_unlock(katom->kctx);
+ ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages);
+ kbase_gpu_vm_lock(katom->kctx);
if (ret != nr_pages) {
/* Adjust number of pages, so that we only
* attempt to release pages in the array that we
@@ -937,26 +935,6 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx,
#if !MALI_USE_CSF
-/*
- * Sizes of user data to copy for each just-in-time memory interface version
- *
- * In interface version 2 onwards this is the same as the struct size, allowing
- * copying of arrays of structures from userspace.
- *
- * In interface version 1 the structure size was variable, and hence arrays of
- * structures cannot be supported easily, and were not a feature present in
- * version 1 anyway.
- */
-static const size_t jit_info_copy_size_for_jit_version[] = {
- /* in jit_version 1, the structure did not have any end padding, hence
- * it could be a different size on 32 and 64-bit clients. We therefore
- * do not copy past the last member
- */
- [1] = offsetofend(struct base_jit_alloc_info_10_2, id),
- [2] = sizeof(struct base_jit_alloc_info_11_5),
- [3] = sizeof(struct base_jit_alloc_info)
-};
-
static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
{
__user u8 *data = (__user u8 *)(uintptr_t) katom->jc;
@@ -966,13 +944,6 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
u32 count;
int ret;
u32 i;
- size_t jit_info_user_copy_size;
-
- WARN_ON(kctx->jit_version >=
- ARRAY_SIZE(jit_info_copy_size_for_jit_version));
- jit_info_user_copy_size =
- jit_info_copy_size_for_jit_version[kctx->jit_version];
- WARN_ON(jit_info_user_copy_size > sizeof(*info));
if (!kbase_mem_allow_alloc(kctx)) {
dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
@@ -984,7 +955,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
/* For backwards compatibility, and to prevent reading more than 1 jit
* info struct on jit version 1
*/
- if (katom->nr_extres == 0 || kctx->jit_version == 1)
+ if (katom->nr_extres == 0)
katom->nr_extres = 1;
count = katom->nr_extres;
@@ -1004,17 +975,11 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
katom->softjob_data = info;
- for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) {
- if (copy_from_user(info, data, jit_info_user_copy_size) != 0) {
+ for (i = 0; i < count; i++, info++, data += sizeof(*info)) {
+ if (copy_from_user(info, data, sizeof(*info)) != 0) {
ret = -EINVAL;
goto free_info;
}
- /* Clear any remaining bytes when user struct is smaller than
- * kernel struct. For jit version 1, this also clears the
- * padding bytes
- */
- memset(((u8 *)info) + jit_info_user_copy_size, 0,
- sizeof(*info) - jit_info_user_copy_size);
ret = kbasep_jit_alloc_validate(kctx, info);
if (ret)
@@ -1559,7 +1524,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
ret = kbase_dump_cpu_gpu_time(katom);
break;
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
katom->event_code = kbase_sync_fence_out_trigger(katom,
katom->event_code == BASE_JD_EVENT_DONE ?
@@ -1621,7 +1586,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
{
switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
case BASE_JD_REQ_SOFT_FENCE_WAIT:
kbase_sync_fence_in_cancel_wait(katom);
break;
@@ -1644,7 +1609,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
return -EINVAL;
}
break;
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
{
struct base_fence fence;
@@ -1699,20 +1664,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
fence.basep.fd);
if (ret < 0)
return ret;
-
-#ifdef CONFIG_MALI_DMA_FENCE
- /*
- * Set KCTX_NO_IMPLICIT_FENCE in the context the first
- * time a soft fence wait job is observed. This will
- * prevent the implicit dma-buf fence to conflict with
- * the Android native sync fences.
- */
- if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
- kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
-#endif /* CONFIG_MALI_DMA_FENCE */
}
break;
-#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#endif /* CONFIG_SYNC_FILE */
case BASE_JD_REQ_SOFT_JIT_ALLOC:
return kbase_jit_allocate_prepare(katom);
case BASE_JD_REQ_SOFT_JIT_FREE:
@@ -1747,7 +1701,7 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom)
case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
/* Nothing to do */
break;
-#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
/* If fence has not yet been signaled, do it now */
kbase_sync_fence_out_trigger(katom, katom->event_code ==
@@ -1757,7 +1711,7 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom)
/* Release katom's reference to fence object */
kbase_sync_fence_in_remove(katom);
break;
-#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#endif /* CONFIG_SYNC_FILE */
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_JD_REQ_SOFT_DEBUG_COPY:
kbase_debug_copy_finish(katom);
diff --git a/mali_kbase/mali_kbase_sync.h b/mali_kbase/mali_kbase_sync.h
index e820dcc..2b466a6 100644
--- a/mali_kbase/mali_kbase_sync.h
+++ b/mali_kbase/mali_kbase_sync.h
@@ -30,9 +30,6 @@
#include <linux/fdtable.h>
#include <linux/syscalls.h>
-#if IS_ENABLED(CONFIG_SYNC)
-#include <sync.h>
-#endif
#if IS_ENABLED(CONFIG_SYNC_FILE)
#include "mali_kbase_fence_defs.h"
#include <linux/sync_file.h>
@@ -181,7 +178,7 @@ int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
struct kbase_sync_fence_info *info);
#endif /* !MALI_USE_CSF */
-#if defined(CONFIG_SYNC_FILE)
+#if IS_ENABLED(CONFIG_SYNC_FILE)
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
void kbase_sync_fence_info_get(struct fence *fence,
struct kbase_sync_fence_info *info);
diff --git a/mali_kbase/mali_kbase_sync_android.c b/mali_kbase/mali_kbase_sync_android.c
deleted file mode 100644
index ae6e669..0000000
--- a/mali_kbase/mali_kbase_sync_android.c
+++ /dev/null
@@ -1,515 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- *
- * (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * Code for supporting explicit Android fences (CONFIG_SYNC)
- * Known to be good for kernels 4.5 and earlier.
- * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels
- * (see mali_kbase_sync_file.c)
- */
-
-#include <linux/sched.h>
-#include <linux/fdtable.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/anon_inodes.h>
-#include <linux/version.h>
-#include "sync.h"
-#include <mali_kbase.h>
-#include <mali_kbase_sync.h>
-
-struct mali_sync_timeline {
- struct sync_timeline timeline;
- atomic_t counter;
- atomic_t signaled;
-};
-
-struct mali_sync_pt {
- struct sync_pt pt;
- int order;
- int result;
-};
-
-static struct mali_sync_timeline *to_mali_sync_timeline(
- struct sync_timeline *timeline)
-{
- return container_of(timeline, struct mali_sync_timeline, timeline);
-}
-
-static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
-{
- return container_of(pt, struct mali_sync_pt, pt);
-}
-
-static struct sync_pt *timeline_dup(struct sync_pt *pt)
-{
- struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
- struct mali_sync_pt *new_mpt;
- struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt),
- sizeof(struct mali_sync_pt));
-
- if (!new_pt)
- return NULL;
-
- new_mpt = to_mali_sync_pt(new_pt);
- new_mpt->order = mpt->order;
- new_mpt->result = mpt->result;
-
- return new_pt;
-}
-
-static int timeline_has_signaled(struct sync_pt *pt)
-{
- struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
- struct mali_sync_timeline *mtl = to_mali_sync_timeline(
- sync_pt_parent(pt));
- int result = mpt->result;
-
- int diff = atomic_read(&mtl->signaled) - mpt->order;
-
- if (diff >= 0)
- return (result < 0) ? result : 1;
-
- return 0;
-}
-
-static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
-{
- struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
- struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
-
- int diff = ma->order - mb->order;
-
- if (diff == 0)
- return 0;
-
- return (diff < 0) ? -1 : 1;
-}
-
-static void timeline_value_str(struct sync_timeline *timeline, char *str,
- int size)
-{
- struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
-
- snprintf(str, size, "%d", atomic_read(&mtl->signaled));
-}
-
-static void pt_value_str(struct sync_pt *pt, char *str, int size)
-{
- struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
-
- snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
-}
-
-static struct sync_timeline_ops mali_timeline_ops = {
- .driver_name = "Mali",
- .dup = timeline_dup,
- .has_signaled = timeline_has_signaled,
- .compare = timeline_compare,
- .timeline_value_str = timeline_value_str,
- .pt_value_str = pt_value_str,
-};
-
-/* Allocates a timeline for Mali
- *
- * One timeline should be allocated per API context.
- */
-static struct sync_timeline *mali_sync_timeline_alloc(const char *name)
-{
- struct sync_timeline *tl;
- struct mali_sync_timeline *mtl;
-
- tl = sync_timeline_create(&mali_timeline_ops,
- sizeof(struct mali_sync_timeline), name);
- if (!tl)
- return NULL;
-
- /* Set the counter in our private struct */
- mtl = to_mali_sync_timeline(tl);
- atomic_set(&mtl->counter, 0);
- atomic_set(&mtl->signaled, 0);
-
- return tl;
-}
-
-static int kbase_stream_close(struct inode *inode, struct file *file)
-{
- struct sync_timeline *tl;
-
- tl = (struct sync_timeline *)file->private_data;
- sync_timeline_destroy(tl);
- return 0;
-}
-
-static const struct file_operations stream_fops = {
- .owner = THIS_MODULE,
- .release = kbase_stream_close,
-};
-
-int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
-{
- struct sync_timeline *tl;
-
- if (!out_fd)
- return -EINVAL;
-
- tl = mali_sync_timeline_alloc(name);
- if (!tl)
- return -EINVAL;
-
- *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC);
-
- if (*out_fd < 0) {
- sync_timeline_destroy(tl);
- return -EINVAL;
- }
-
- return 0;
-}
-
-#if !MALI_USE_CSF
-/* Allocates a sync point within the timeline.
- *
- * The timeline must be the one allocated by kbase_sync_timeline_alloc
- *
- * Sync points must be triggered in *exactly* the same order as they are
- * allocated.
- */
-static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
-{
- struct sync_pt *pt = sync_pt_create(parent,
- sizeof(struct mali_sync_pt));
- struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
- struct mali_sync_pt *mpt;
-
- if (!pt)
- return NULL;
-
- mpt = to_mali_sync_pt(pt);
- mpt->order = atomic_inc_return(&mtl->counter);
- mpt->result = 0;
-
- return pt;
-}
-
-int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
-{
- struct sync_timeline *tl;
- struct sync_pt *pt;
- struct sync_fence *fence;
- int fd;
- struct file *tl_file;
-
- tl_file = fget(tl_fd);
- if (tl_file == NULL)
- return -EBADF;
-
- if (tl_file->f_op != &stream_fops) {
- fd = -EBADF;
- goto out;
- }
-
- tl = tl_file->private_data;
-
- pt = kbase_sync_pt_alloc(tl);
- if (!pt) {
- fd = -EFAULT;
- goto out;
- }
-
- fence = sync_fence_create("mali_fence", pt);
- if (!fence) {
- sync_pt_free(pt);
- fd = -EFAULT;
- goto out;
- }
-
- /* from here the fence owns the sync_pt */
-
- /* create a fd representing the fence */
- fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
- if (fd < 0) {
- sync_pt_free(pt);
- sync_fence_put(fence);
- katom->fence = NULL;
- goto out;
- }
-
- /* Place the successfully created fence in katom */
- katom->fence = fence;
-
- /* bind fence to the new fd */
- sync_fence_install(fence, fd);
-out:
- fput(tl_file);
-
- return fd;
-}
-
-int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
-{
- katom->fence = sync_fence_fdget(fd);
- return katom->fence ? 0 : -ENOENT;
-}
-#endif /* !MALI_USE_CSF */
-
-int kbase_sync_fence_validate(int fd)
-{
- struct sync_fence *fence;
-
- fence = sync_fence_fdget(fd);
- if (!fence)
- return -EINVAL;
-
- sync_fence_put(fence);
- return 0;
-}
-
-#if !MALI_USE_CSF
-/* Returns true if the specified timeline is allocated by Mali */
-static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
-{
- return timeline->ops == &mali_timeline_ops;
-}
-
-/* Signals a particular sync point
- *
- * Sync points must be triggered in *exactly* the same order as they are
- * allocated.
- *
- * If they are signaled in the wrong order then a message will be printed in
- * debug builds and otherwise attempts to signal order sync_pts will be ignored.
- *
- * result can be negative to indicate error, any other value is interpreted as
- * success.
- */
-static void kbase_sync_signal_pt(struct sync_pt *pt, int result)
-{
- struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
- struct mali_sync_timeline *mtl = to_mali_sync_timeline(
- sync_pt_parent(pt));
- int signaled;
- int diff;
-
- mpt->result = result;
-
- do {
- signaled = atomic_read(&mtl->signaled);
-
- diff = signaled - mpt->order;
-
- if (diff > 0) {
- /* The timeline is already at or ahead of this point.
- * This should not happen unless userspace has been
- * signaling fences out of order, so warn but don't
- * violate the sync_pt API.
- * The warning is only in debug builds to prevent
- * a malicious user being able to spam dmesg.
- */
-#ifdef CONFIG_MALI_DEBUG
- pr_err("Fences were triggered in a different order to allocation!");
-#endif /* CONFIG_MALI_DEBUG */
- return;
- }
- } while (atomic_cmpxchg(&mtl->signaled,
- signaled, mpt->order) != signaled);
-}
-
-enum base_jd_event_code
-kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
-{
- struct sync_pt *pt;
- struct sync_timeline *timeline;
-
- if (!katom->fence)
- return BASE_JD_EVENT_JOB_CANCELLED;
-
- if (katom->fence->num_fences != 1) {
- /* Not exactly one item in the list - so it didn't (directly)
- * come from us
- */
- return BASE_JD_EVENT_JOB_CANCELLED;
- }
-
- pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
- timeline = sync_pt_parent(pt);
-
- if (!kbase_sync_timeline_is_ours(timeline)) {
- /* Fence has a sync_pt which isn't ours! */
- return BASE_JD_EVENT_JOB_CANCELLED;
- }
-
- kbase_sync_signal_pt(pt, result);
-
- sync_timeline_signal(timeline);
-
- kbase_sync_fence_out_remove(katom);
-
- return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
-}
-
-static inline int kbase_fence_get_status(struct sync_fence *fence)
-{
- if (!fence)
- return -ENOENT;
-
- return atomic_read(&fence->status);
-}
-
-static void kbase_fence_wait_callback(struct sync_fence *fence,
- struct sync_fence_waiter *waiter)
-{
- struct kbase_jd_atom *katom = container_of(waiter,
- struct kbase_jd_atom, sync_waiter);
- struct kbase_context *kctx = katom->kctx;
-
- /* Propagate the fence status to the atom.
- * If negative then cancel this atom and its dependencies.
- */
- if (kbase_fence_get_status(fence) < 0)
- katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-
- /* To prevent a potential deadlock we schedule the work onto the
- * job_done_worker kthread
- *
- * The issue is that we may signal the timeline while holding
- * kctx->jctx.lock and the callbacks are run synchronously from
- * sync_timeline_signal. So we simply defer the work.
- */
-
- kthread_init_work(&katom->work, kbase_sync_fence_wait_worker);
- kthread_queue_work(&kctx->kbdev->job_done_worker, &katom->work);
-}
-
-int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
-{
- int ret;
-
- sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
-
- ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
-
- if (ret == 1) {
- /* Already signaled */
- return 0;
- }
-
- if (ret < 0) {
- katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
- /* We should cause the dependent jobs in the bag to be failed,
- * to do this we schedule the work queue to complete this job
- */
- kthread_init_work(&katom->work, kbase_sync_fence_wait_worker);
- kthread_queue_work(&katom->kctx->kbdev->job_done_worker, &katom->work);
-
- }
-
- return 1;
-}
-
-void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
-{
- if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
- /* The wait wasn't cancelled - leave the cleanup for
- * kbase_fence_wait_callback
- */
- return;
- }
-
- /* Wait was cancelled - zap the atoms */
- katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-
- kbasep_remove_waiting_soft_job(katom);
- kbase_finish_soft_job(katom);
-
- if (kbase_jd_done_nolock(katom, true))
- kbase_js_sched_all(katom->kctx->kbdev);
-}
-
-void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
-{
- if (katom->fence) {
- sync_fence_put(katom->fence);
- katom->fence = NULL;
- }
-}
-
-void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
-{
- if (katom->fence) {
- sync_fence_put(katom->fence);
- katom->fence = NULL;
- }
-}
-
-int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
- struct kbase_sync_fence_info *info)
-{
- u32 string_len;
-
- if (!katom->fence)
- return -ENOENT;
-
- info->fence = katom->fence;
- info->status = kbase_fence_get_status(katom->fence);
-
- string_len = strscpy(info->name, katom->fence->name, sizeof(info->name));
- string_len += sizeof(char);
- /* Make sure that the source string fit into the buffer. */
- KBASE_DEBUG_ASSERT(string_len <= sizeof(info->name));
- CSTD_UNUSED(string_len);
-
- return 0;
-}
-
-int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
- struct kbase_sync_fence_info *info)
-{
- u32 string_len;
-
- if (!katom->fence)
- return -ENOENT;
-
- info->fence = katom->fence;
- info->status = kbase_fence_get_status(katom->fence);
-
- string_len = strscpy(info->name, katom->fence->name, sizeof(info->name));
- string_len += sizeof(char);
- /* Make sure that the source string fit into the buffer. */
- KBASE_DEBUG_ASSERT(string_len <= sizeof(info->name));
- CSTD_UNUSED(string_len);
-
- return 0;
-}
-
-#ifdef CONFIG_MALI_FENCE_DEBUG
-void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
-{
- /* Dump out the full state of all the Android sync fences.
- * The function sync_dump() isn't exported to modules, so force
- * sync_fence_wait() to time out to trigger sync_dump().
- */
- if (katom->fence)
- sync_fence_wait(katom->fence, 1);
-}
-#endif
-#endif /* !MALI_USE_CSF */
diff --git a/mali_kbase/mali_kbase_sync_file.c b/mali_kbase/mali_kbase_sync_file.c
index 649a862..d98eba9 100644
--- a/mali_kbase/mali_kbase_sync_file.c
+++ b/mali_kbase/mali_kbase_sync_file.c
@@ -21,9 +21,6 @@
/*
* Code for supporting explicit Linux fences (CONFIG_SYNC_FILE)
- * Introduced in kernel 4.9.
- * Android explicit fences (CONFIG_SYNC) can be used for older kernels
- * (see mali_kbase_sync_android.c)
*/
#include <linux/sched.h>
@@ -101,10 +98,13 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
struct dma_fence *fence = sync_file_get_fence(fd);
#endif
+ lockdep_assert_held(&katom->kctx->jctx.lock);
+
if (!fence)
return -ENOENT;
kbase_fence_fence_in_set(katom, fence);
+ katom->dma_fence.fence_cb_added = false;
return 0;
}
@@ -156,36 +156,31 @@ static void kbase_fence_wait_callback(struct dma_fence *fence,
struct dma_fence_cb *cb)
#endif
{
- struct kbase_fence_cb *kcb = container_of(cb,
- struct kbase_fence_cb,
- fence_cb);
- struct kbase_jd_atom *katom = kcb->katom;
+ struct kbase_jd_atom *katom = container_of(cb, struct kbase_jd_atom,
+ dma_fence.fence_cb);
struct kbase_context *kctx = katom->kctx;
/* Cancel atom if fence is erroneous */
+ if (dma_fence_is_signaled(katom->dma_fence.fence_in) &&
#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
(KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
- if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error < 0)
+ katom->dma_fence.fence_in->error < 0)
#else
- if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
+ katom->dma_fence.fence_in->status < 0)
#endif
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
- if (kbase_fence_dep_count_dec_and_test(katom)) {
- /* We take responsibility of handling this */
- kbase_fence_dep_count_set(katom, -1);
- /* To prevent a potential deadlock we schedule the work onto the
- * job_done_worker kthread
- *
- * The issue is that we may signal the timeline while holding
- * kctx->jctx.lock and the callbacks are run synchronously from
- * sync_timeline_signal. So we simply defer the work.
- */
- kthread_init_work(&katom->work, kbase_sync_fence_wait_worker);
- kthread_queue_work(&kctx->kbdev->job_done_worker, &katom->work);
- }
+ /* To prevent a potential deadlock we schedule the work onto the
+ * job_done_wq workqueue
+ *
+ * The issue is that we may signal the timeline while holding
+ * kctx->jctx.lock and the callbacks are run synchronously from
+ * sync_timeline_signal. So we simply defer the work.
+ */
+ kthread_init_work(&katom->work, kbase_sync_fence_wait_worker);
+ kthread_queue_work(&kctx->kbdev->job_done_worker, &katom->work);
}
int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
@@ -197,53 +192,77 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
struct dma_fence *fence;
#endif
- fence = kbase_fence_in_get(katom);
+ lockdep_assert_held(&katom->kctx->jctx.lock);
+
+ fence = katom->dma_fence.fence_in;
if (!fence)
return 0; /* no input fence to wait for, good to go! */
- kbase_fence_dep_count_set(katom, 1);
+ err = dma_fence_add_callback(fence, &katom->dma_fence.fence_cb,
+ kbase_fence_wait_callback);
+ if (err == -ENOENT) {
+ int fence_status = dma_fence_get_status(fence);
- err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback);
+ if (fence_status == 1) {
+ /* Fence is already signaled with no error. The completion
+ * for FENCE_WAIT softjob can be done right away.
+ */
+ return 0;
+ }
- kbase_fence_put(fence);
+ /* Fence shouldn't be in not signaled state */
+ if (!fence_status) {
+ struct kbase_sync_fence_info info;
+
+ kbase_sync_fence_in_info_get(katom, &info);
- if (likely(!err)) {
- /* Test if the callbacks are already triggered */
- if (kbase_fence_dep_count_dec_and_test(katom)) {
- kbase_fence_free_callbacks(katom);
- kbase_fence_dep_count_set(katom, -1);
- return 0; /* Already signaled, good to go right now */
+ dev_warn(katom->kctx->kbdev->dev,
+ "Unexpected status for fence %s of ctx:%d_%d atom:%d",
+ info.name, katom->kctx->tgid, katom->kctx->id,
+ kbase_jd_atom_id(katom->kctx, katom));
}
- /* Callback installed, so we just need to wait for it... */
- } else {
- /* Failure */
- kbase_fence_free_callbacks(katom);
- kbase_fence_dep_count_set(katom, -1);
+ /* If fence is signaled with an error, then the FENCE_WAIT softjob is
+ * considered to be failed.
+ */
+ }
+ if (unlikely(err)) {
+ /* We should cause the dependent jobs in the bag to be failed. */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
- /* We should cause the dependent jobs in the bag to be failed,
- * to do this we schedule the work queue to complete this job
- */
- kthread_init_work(&katom->work, kbase_sync_fence_wait_worker);
- kthread_queue_work(&katom->kctx->kbdev->job_done_worker, &katom->work);
+ /* The completion for FENCE_WAIT softjob can be done right away. */
+ return 0;
}
- return 1; /* completion to be done later by callback/worker */
+ /* Callback was successfully installed */
+ katom->dma_fence.fence_cb_added = true;
+
+ /* Completion to be done later by callback/worker */
+ return 1;
}
void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
{
- if (!kbase_fence_free_callbacks(katom)) {
- /* The wait wasn't cancelled -
- * leave the cleanup for kbase_fence_wait_callback
- */
- return;
- }
+ lockdep_assert_held(&katom->kctx->jctx.lock);
+
+ if (katom->dma_fence.fence_cb_added) {
+ if (!dma_fence_remove_callback(katom->dma_fence.fence_in,
+ &katom->dma_fence.fence_cb)) {
+ /* The callback is already removed so leave the cleanup
+ * for kbase_fence_wait_callback.
+ */
+ return;
+ }
+ } else {
+ struct kbase_sync_fence_info info;
- /* Take responsibility of completion */
- kbase_fence_dep_count_set(katom, -1);
+ kbase_sync_fence_in_info_get(katom, &info);
+ dev_warn(katom->kctx->kbdev->dev,
+ "Callback was not added earlier for fence %s of ctx:%d_%d atom:%d",
+ info.name, katom->kctx->tgid, katom->kctx->id,
+ kbase_jd_atom_id(katom->kctx, katom));
+ }
/* Wait was cancelled - zap the atoms */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
@@ -262,8 +281,29 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
{
- kbase_fence_free_callbacks(katom);
+ lockdep_assert_held(&katom->kctx->jctx.lock);
+
+ if (katom->dma_fence.fence_cb_added) {
+ bool removed = dma_fence_remove_callback(katom->dma_fence.fence_in,
+ &katom->dma_fence.fence_cb);
+
+ /* Here it is expected that the callback should have already been removed
+ * previously either by kbase_sync_fence_in_cancel_wait() or when the fence
+ * was signaled and kbase_sync_fence_wait_worker() was called.
+ */
+ if (removed) {
+ struct kbase_sync_fence_info info;
+
+ kbase_sync_fence_in_info_get(katom, &info);
+ dev_warn(katom->kctx->kbdev->dev,
+ "Callback was not removed earlier for fence %s of ctx:%d_%d atom:%d",
+ info.name, katom->kctx->tgid, katom->kctx->id,
+ kbase_jd_atom_id(katom->kctx, katom));
+ }
+ }
+
kbase_fence_in_remove(katom);
+ katom->dma_fence.fence_cb_added = false;
}
#endif /* !MALI_USE_CSF */
@@ -277,7 +317,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence,
{
info->fence = fence;
- /* translate into CONFIG_SYNC status:
+ /* Translate into the following status, with support for error handling:
* < 0 : error
* 0 : active
* 1 : signaled
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index e9f843b..5f3dabd 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -20,11 +20,11 @@
*/
#include "mali_kbase_vinstr.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_types.h"
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_gpu_narrow.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include "mali_malisw.h"
#include "mali_kbase_debug.h"
@@ -41,6 +41,11 @@
#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
/* Hwcnt reader API version */
#define HWCNT_READER_API 1
@@ -1034,24 +1039,25 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(
* @filp: Non-NULL pointer to file structure.
* @wait: Non-NULL pointer to poll table.
*
- * Return: POLLIN if data can be read without blocking, 0 if data can not be
- * read without blocking, else error code.
+ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if
+ * data can not be read without blocking, else EPOLLHUP | EPOLLERR.
*/
static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait)
{
struct kbase_vinstr_client *cli;
if (!filp || !wait)
- return (__poll_t)-EINVAL;
+ return EPOLLHUP | EPOLLERR;
cli = filp->private_data;
if (!cli)
- return (__poll_t)-EINVAL;
+ return EPOLLHUP | EPOLLERR;
poll_wait(filp, &cli->waitq, wait);
if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
- return POLLIN;
- return 0;
+ return EPOLLIN | EPOLLRDNORM;
+
+ return (__poll_t)0;
}
/**
diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h
index d25c29f..d9db189 100644
--- a/mali_kbase/mali_malisw.h
+++ b/mali_kbase/mali_malisw.h
@@ -97,16 +97,12 @@
*/
#define CSTD_STR2(x) CSTD_STR1(x)
-/* LINUX_VERSION_CODE < 5.4 */
-#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
-#if defined(GCC_VERSION) && GCC_VERSION >= 70000
+ #ifndef fallthrough
+ #define fallthrough __fallthrough
+ #endif /* fallthrough */
+
#ifndef __fallthrough
#define __fallthrough __attribute__((fallthrough))
#endif /* __fallthrough */
-#define fallthrough __fallthrough
-#else
-#define fallthrough CSTD_NOP(...) /* fallthrough */
-#endif /* GCC_VERSION >= 70000 */
-#endif /* KERNEL_VERSION(5, 4, 0) */
#endif /* _MALISW_H_ */
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 6c52f0c..4cac787 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -88,12 +88,11 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr,
* context's address space, when the page fault occurs for
* MCU's address space.
*/
- if (!queue_work(as->pf_wq, &as->work_pagefault))
- kbase_ctx_sched_release_ctx(kctx);
- else {
+ if (!queue_work(as->pf_wq, &as->work_pagefault)) {
dev_dbg(kbdev->dev,
- "Page fault is already pending for as %u\n",
- as_nr);
+ "Page fault is already pending for as %u", as_nr);
+ kbase_ctx_sched_release_ctx(kctx);
+ } else {
atomic_inc(&kbdev->faults_pending);
}
}
@@ -122,6 +121,8 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev,
access_type, kbase_gpu_access_type_name(fault->status),
source_id);
+ kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT);
+
/* Report MMU fault for all address spaces (except MCU_AS_NR) */
for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++)
submit_work_pagefault(kbdev, as_no, fault);
@@ -189,6 +190,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CLEAR_FAULT);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
}
/*
@@ -250,6 +252,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT);
/* Switching to UNMAPPED mode above would have enabled the firmware to
* recover from the fault (if the memory access was made by firmware)
* and it can then respond to CSG termination requests to be sent now.
@@ -263,6 +266,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
}
/**
@@ -548,14 +552,15 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
}
KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt);
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
{
kbdev->as[i].number = i;
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
kbdev->as[i].gf_data.addr = 0ULL;
+ kbdev->as[i].is_unresponsive = false;
- kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
if (!kbdev->as[i].pf_wq)
return -ENOMEM;
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index 2442149..d716ce0 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -95,6 +95,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
}
/*
@@ -328,7 +329,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
while (bf_bits | pf_bits) {
struct kbase_as *as;
- int as_no;
+ unsigned int as_no;
struct kbase_context *kctx;
struct kbase_fault *fault;
@@ -423,13 +424,14 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx,
return kbase_job_slot_softstop_start_rp(kctx, reg);
}
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
{
kbdev->as[i].number = i;
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
+ kbdev->as[i].is_unresponsive = false;
- kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
if (!kbdev->as[i].pf_wq)
return -ENOMEM;
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 8f7b9b5..c689a63 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -25,6 +25,7 @@
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
+#include <linux/migrate.h>
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_fault.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -110,7 +111,8 @@ static void mmu_hw_operation_end(struct kbase_device *kbdev)
/**
* mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
- * through GPU_CONTROL interface
+ * through GPU_CONTROL interface.
+ *
* @kbdev: kbase device to check GPU model ID on.
*
* This function returns whether a cache flush for page table update should
@@ -128,6 +130,39 @@ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
}
/**
+ * mmu_flush_pa_range() - Flush physical address range
+ *
+ * @kbdev: kbase device to issue the MMU operation on.
+ * @phys: Starting address of the physical range to start the operation on.
+ * @nr_bytes: Number of bytes to work on.
+ * @op: Type of cache flush operation to perform.
+ *
+ * Issue a cache flush physical range command.
+ */
+#if MALI_USE_CSF
+static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes,
+ enum kbase_mmu_op_type op)
+{
+ u32 flush_op;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* Translate operation to command */
+ if (op == KBASE_MMU_OP_FLUSH_PT)
+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
+ else if (op == KBASE_MMU_OP_FLUSH_MEM)
+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
+ else {
+ dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
+ return;
+ }
+
+ if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
+ dev_err(kbdev->dev, "Flush for physical address range did not complete");
+}
+#endif
+
+/**
* mmu_invalidate() - Perform an invalidate operation on MMU caches.
* @kbdev: The Kbase device.
* @kctx: The Kbase context.
@@ -141,21 +176,15 @@ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
const struct kbase_mmu_hw_op_param *op_param)
{
- int err = 0;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) {
as_nr = kctx ? kctx->as_nr : as_nr;
- err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param);
- }
-
- if (err) {
- dev_err(kbdev->dev,
- "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover");
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
+ if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
+ dev_err(kbdev->dev,
+ "Invalidate after GPU page table update did not complete");
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -166,26 +195,14 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct
static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
- int err = 0;
unsigned long flags;
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- if (kbdev->pm.backend.gpu_ready)
- err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param);
-
- if (err) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover.
- */
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
-
- if (kbase_prepare_to_reset_gpu(
- kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
- }
+ if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -210,9 +227,6 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as
* If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue
* a cache flush + invalidate to the L2 and GPU Load/Store caches as well as
* invalidating the TLBs.
- *
- * If operation is set to KBASE_MMU_OP_UNLOCK then this function will only
- * invalidate the MMU caches and TLBs.
*/
static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
const struct kbase_mmu_hw_op_param *op_param)
@@ -263,7 +277,6 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex
static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
int as_nr, const struct kbase_mmu_hw_op_param *op_param)
{
- int err = 0;
unsigned long flags;
/* AS transaction begin */
@@ -272,19 +285,8 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct
if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) {
as_nr = kctx ? kctx->as_nr : as_nr;
- err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr],
- op_param);
- }
-
- if (err) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover.
- */
- dev_err(kbdev->dev,
- "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
-
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
+ if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -295,6 +297,7 @@ static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_cont
phys_addr_t phys, size_t size,
enum kbase_mmu_op_type flush_op)
{
+ kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
}
static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
@@ -345,9 +348,9 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context
* a 4kB physical page.
*/
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr, unsigned long flags,
- int group_id, u64 *dirty_pgds);
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int group_id, u64 *dirty_pgds);
/**
* kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
@@ -360,13 +363,70 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
* @level: The level of MMU page table.
* @flush_op: The type of MMU flush operation to perform.
* @dirty_pgds: Flags to track every level where a PGD has been updated.
- * @free_pgds_list: Linked list of the page directory pages to free.
*/
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, phys_addr_t *pgds,
u64 vpfn, int level,
- enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
- struct list_head *free_pgds_list);
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
+
+static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+ atomic_sub(1, &kbdev->memdev.used_pages);
+
+ /* If MMU tables belong to a context then pages will have been accounted
+ * against it, so we must decrement the usage counts here.
+ */
+ if (mmut->kctx) {
+ kbase_process_page_usage_dec(mmut->kctx, 1);
+ atomic_sub(1, &mmut->kctx->used_pages);
+ }
+
+ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+}
+
+static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ struct page *p)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+ bool page_is_isolated = false;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ if (!kbase_page_migration_enabled)
+ return false;
+
+ spin_lock(&page_md->migrate_lock);
+ if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
+ WARN_ON_ONCE(!mmut->kctx);
+ if (IS_PAGE_ISOLATED(page_md->status)) {
+ page_md->status = PAGE_STATUS_SET(page_md->status,
+ FREE_PT_ISOLATED_IN_PROGRESS);
+ page_md->data.free_pt_isolated.kbdev = kbdev;
+ page_is_isolated = true;
+ } else {
+ page_md->status =
+ PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
+ }
+ } else {
+ WARN_ON_ONCE(mmut->kctx);
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
+ }
+ spin_unlock(&page_md->migrate_lock);
+
+ if (unlikely(page_is_isolated)) {
+ /* Do the CPU cache flush and accounting here for the isolated
+ * PGD page, which is done inside kbase_mmu_free_pgd() for the
+ * PGD page that did not get isolated.
+ */
+ dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
+ }
+
+ return page_is_isolated;
+}
+
/**
* kbase_mmu_free_pgd() - Free memory of the page directory
*
@@ -381,24 +441,17 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
phys_addr_t pgd)
{
struct page *p;
+ bool page_is_isolated = false;
lockdep_assert_held(&mmut->mmu_lock);
p = pfn_to_page(PFN_DOWN(pgd));
+ page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
-
- atomic_sub(1, &kbdev->memdev.used_pages);
-
- /* If MMU tables belong to a context then pages will have been accounted
- * against it, so we must decrement the usage counts here.
- */
- if (mmut->kctx) {
- kbase_process_page_usage_dec(mmut->kctx, 1);
- atomic_sub(1, &mmut->kctx->used_pages);
+ if (likely(!page_is_isolated)) {
+ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
}
-
- kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
}
/**
@@ -406,27 +459,42 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
*
* @kbdev: Device pointer.
* @mmut: GPU MMU page table.
- * @free_pgds_list: Linked list of the page directory pages to free.
*
* This function will call kbase_mmu_free_pgd() on each page directory page
- * present in the @free_pgds_list.
+ * present in the list of free PGDs inside @mmut.
*
* The function is supposed to be called after the GPU cache and MMU TLB has
* been invalidated post the teardown loop.
+ *
+ * The mmu_lock shall be held prior to calling the function.
*/
-static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- struct list_head *free_pgds_list)
+static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
- struct page *page, *next_page;
+ size_t i;
- rt_mutex_lock(&mmut->mmu_lock);
+ lockdep_assert_held(&mmut->mmu_lock);
- list_for_each_entry_safe(page, next_page, free_pgds_list, lru) {
- list_del_init(&page->lru);
- kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page));
- }
+ for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
+ kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
- rt_mutex_unlock(&mmut->mmu_lock);
+ mmut->scratch_mem.free_pgds.head_index = 0;
+}
+
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
+{
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
+ return;
+
+ mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
+}
+
+static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
+{
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ mmut->scratch_mem.free_pgds.head_index = 0;
}
/**
@@ -456,7 +524,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
if (!multiple) {
dev_warn(
kbdev->dev,
- "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+ "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
return minimum_extra;
}
@@ -514,13 +582,12 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
u64 start_pfn, size_t nr,
u32 kctx_id, u64 dirty_pgds)
{
- int err;
-
/* Calls to this function are inherently synchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
struct kbase_mmu_hw_op_param op_param;
+ int ret = 0;
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -539,17 +606,20 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
op_param.flush_skip_levels =
pgd_level_to_skip_flush(dirty_pgds);
- err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as,
- &op_param);
+ ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
} else {
mmu_hw_operation_begin(kbdev);
- err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
+ ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
mmu_hw_operation_end(kbdev);
}
mutex_unlock(&kbdev->mmu_hw_mutex);
+ if (ret)
+ dev_err(kbdev->dev,
+ "Flush for GPU page fault due to write access did not complete");
+
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
}
@@ -582,7 +652,6 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
struct tagged_addr *fault_phys_addr;
struct kbase_fault *fault;
u64 fault_pfn, pfn_offset;
- int ret;
int as_no;
u64 dirty_pgds = 0;
@@ -643,8 +712,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
}
/* Now make this faulting page writable to GPU. */
- ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags,
- region->gpu_alloc->group_id, &dirty_pgds);
+ kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
+ region->flags, region->gpu_alloc->group_id, &dirty_pgds);
kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
kctx->id, dirty_pgds);
@@ -678,31 +747,68 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
}
#endif
-#define MAX_POOL_LEVEL 2
+/**
+ * estimate_pool_space_required - Determine how much a pool should be grown by to support a future
+ * allocation
+ * @pool: The memory pool to check, including its linked pools
+ * @pages_required: Number of 4KiB pages require for the pool to support a future allocation
+ *
+ * The value returned is accounting for the size of @pool and the size of each memory pool linked to
+ * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to
+ * allocate from.
+ *
+ * Note: this is only an estimate, because even during the calculation the memory pool(s) involved
+ * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an
+ * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller
+ * should keep attempting an allocation and then re-growing with a new value queried form this
+ * function until the allocation succeeds.
+ *
+ * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an
+ * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the
+ * allocation.
+ */
+static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required)
+{
+ size_t pages_still_required;
+
+ for (pages_still_required = pages_required; pool != NULL && pages_still_required;
+ pool = pool->next_pool) {
+ size_t pool_size_4k;
+
+ kbase_mem_pool_lock(pool);
+
+ pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+ if (pool_size_4k >= pages_still_required)
+ pages_still_required = 0;
+ else
+ pages_still_required -= pool_size_4k;
+
+ kbase_mem_pool_unlock(pool);
+ }
+ return pages_still_required;
+}
/**
* page_fault_try_alloc - Try to allocate memory from a context pool
* @kctx: Context pointer
* @region: Region to grow
- * @new_pages: Number of 4 kB pages to allocate
- * @pages_to_grow: Pointer to variable to store number of outstanding pages on
- * failure. This can be either 4 kB or 2 MB pages, depending on
- * the number of pages requested.
- * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
- * for 2 MB, false for 4 kB.
+ * @new_pages: Number of 4 KiB pages to allocate
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be
+ * either 4 KiB or 2 MiB pages, depending on the number of pages requested.
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for
+ * 4 KiB.
* @prealloc_sas: Pointer to kbase_sub_alloc structures
*
- * This function will try to allocate as many pages as possible from the context
- * pool, then if required will try to allocate the remaining pages from the
- * device pool.
+ * This function will try to allocate as many pages as possible from the context pool, then if
+ * required will try to allocate the remaining pages from the device pool.
*
- * This function will not allocate any new memory beyond that is already
- * present in the context or device pools. This is because it is intended to be
- * called with the vm_lock held, which could cause recursive locking if the
- * allocation caused the out-of-memory killer to run.
+ * This function will not allocate any new memory beyond that is already present in the context or
+ * device pools. This is because it is intended to be called whilst the thread has acquired the
+ * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is
+ * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close().
*
- * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
- * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
+ * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB
+ * pages, otherwise it will be a count of 4 KiB pages.
*
* Return: true if successful, false on failure
*/
@@ -711,13 +817,15 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
int *pages_to_grow, bool *grow_2mb_pool,
struct kbase_sub_alloc **prealloc_sas)
{
- struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
- struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
- size_t pages_alloced[MAX_POOL_LEVEL] = {0};
+ size_t total_gpu_pages_alloced = 0;
+ size_t total_cpu_pages_alloced = 0;
struct kbase_mem_pool *pool, *root_pool;
- int pool_level = 0;
bool alloc_failed = false;
size_t pages_still_required;
+ size_t total_mempools_free_4k = 0;
+
+ lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&kctx->mem_partials_lock);
if (WARN_ON(region->gpu_alloc->group_id >=
MEMORY_GROUP_MANAGER_NR_GROUPS)) {
@@ -726,42 +834,21 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return false;
}
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (new_pages >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) {
root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
*grow_2mb_pool = true;
} else {
-#endif
root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
*grow_2mb_pool = false;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (region->gpu_alloc != region->cpu_alloc)
new_pages *= 2;
- pages_still_required = new_pages;
-
/* Determine how many pages are in the pools before trying to allocate.
* Don't attempt to allocate & free if the allocation can't succeed.
*/
- for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
- size_t pool_size_4k;
-
- kbase_mem_pool_lock(pool);
-
- pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
- if (pool_size_4k >= pages_still_required)
- pages_still_required = 0;
- else
- pages_still_required -= pool_size_4k;
-
- kbase_mem_pool_unlock(pool);
-
- if (!pages_still_required)
- break;
- }
+ pages_still_required = estimate_pool_space_required(root_pool, new_pages);
if (pages_still_required) {
/* Insufficient pages in pools. Don't try to allocate - just
@@ -772,11 +859,11 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return false;
}
- /* Since we've dropped the pool locks, the amount of memory in the pools
- * may change between the above check and the actual allocation.
+ /* Since we're not holding any of the mempool locks, the amount of memory in the pools may
+ * change between the above estimate and the actual allocation.
*/
- pool = root_pool;
- for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
+ pages_still_required = new_pages;
+ for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) {
size_t pool_size_4k;
size_t pages_to_alloc_4k;
size_t pages_to_alloc_4k_per_alloc;
@@ -785,94 +872,92 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
/* Allocate as much as possible from this pool*/
pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
- pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
+ total_mempools_free_4k += pool_size_4k;
+ pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k);
if (region->gpu_alloc == region->cpu_alloc)
pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
else
pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
- pages_alloced[pool_level] = pages_to_alloc_4k;
if (pages_to_alloc_4k) {
- gpu_pages[pool_level] =
- kbase_alloc_phy_pages_helper_locked(
- region->gpu_alloc, pool,
- pages_to_alloc_4k_per_alloc,
- &prealloc_sas[0]);
+ struct tagged_addr *gpu_pages =
+ kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool,
+ pages_to_alloc_4k_per_alloc,
+ &prealloc_sas[0]);
- if (!gpu_pages[pool_level]) {
+ if (!gpu_pages)
alloc_failed = true;
- } else if (region->gpu_alloc != region->cpu_alloc) {
- cpu_pages[pool_level] =
- kbase_alloc_phy_pages_helper_locked(
- region->cpu_alloc, pool,
- pages_to_alloc_4k_per_alloc,
- &prealloc_sas[1]);
-
- if (!cpu_pages[pool_level])
+ else
+ total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc;
+
+ if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) {
+ struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked(
+ region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc,
+ &prealloc_sas[1]);
+
+ if (!cpu_pages)
alloc_failed = true;
+ else
+ total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc;
}
}
kbase_mem_pool_unlock(pool);
if (alloc_failed) {
- WARN_ON(!new_pages);
- WARN_ON(pages_to_alloc_4k >= new_pages);
- WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
+ WARN_ON(!pages_still_required);
+ WARN_ON(pages_to_alloc_4k >= pages_still_required);
+ WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required);
break;
}
- new_pages -= pages_to_alloc_4k;
-
- if (!new_pages)
- break;
-
- pool = pool->next_pool;
- if (!pool)
- break;
+ pages_still_required -= pages_to_alloc_4k;
}
- if (new_pages) {
- /* Allocation was unsuccessful */
- int max_pool_level = pool_level;
-
- pool = root_pool;
-
- /* Free memory allocated so far */
- for (pool_level = 0; pool_level <= max_pool_level;
- pool_level++) {
- kbase_mem_pool_lock(pool);
+ if (pages_still_required) {
+ /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation,
+ * so must in any case use kbase_free_phy_pages_helper() rather than
+ * kbase_free_phy_pages_helper_locked()
+ */
+ if (total_gpu_pages_alloced > 0)
+ kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced);
+ if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0)
+ kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced);
- if (region->gpu_alloc != region->cpu_alloc) {
- if (pages_alloced[pool_level] &&
- cpu_pages[pool_level])
- kbase_free_phy_pages_helper_locked(
- region->cpu_alloc,
- pool, cpu_pages[pool_level],
- pages_alloced[pool_level]);
+ if (alloc_failed) {
+ /* Note that in allocating from the above memory pools, we always ensure
+ * never to request more than is available in each pool with the pool's
+ * lock held. Hence failing to allocate in such situations would be unusual
+ * and we should cancel the growth instead (as re-growing the memory pool
+ * might not fix the situation)
+ */
+ dev_warn(
+ kctx->kbdev->dev,
+ "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
+ new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
+ total_mempools_free_4k);
+ *pages_to_grow = 0;
+ } else {
+ /* Tell the caller to try to grow the memory pool
+ *
+ * Freeing pages above may have spilled or returned them to the OS, so we
+ * have to take into account how many are still in the pool before giving a
+ * new estimate for growth required of the pool. We can just re-estimate a
+ * new value.
+ */
+ pages_still_required = estimate_pool_space_required(root_pool, new_pages);
+ if (pages_still_required) {
+ *pages_to_grow = pages_still_required;
+ } else {
+ /* It's possible another thread could've grown the pool to be just
+ * big enough after we rolled back the allocation. Request at least
+ * one more page to ensure the caller doesn't fail the growth by
+ * conflating it with the alloc_failed case above
+ */
+ *pages_to_grow = 1u;
}
-
- if (pages_alloced[pool_level] && gpu_pages[pool_level])
- kbase_free_phy_pages_helper_locked(
- region->gpu_alloc,
- pool, gpu_pages[pool_level],
- pages_alloced[pool_level]);
-
- kbase_mem_pool_unlock(pool);
-
- pool = pool->next_pool;
}
- /*
- * If the allocation failed despite there being enough memory in
- * the pool, then just fail. Otherwise, try to grow the memory
- * pool.
- */
- if (alloc_failed)
- *pages_to_grow = 0;
- else
- *pages_to_grow = new_pages;
-
return false;
}
@@ -916,9 +1001,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
as_no = faulting_as->number;
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
- dev_dbg(kbdev->dev,
- "Entering %s %pK, fault_pfn %lld, as_no %d\n",
- __func__, (void *)data, fault_pfn, as_no);
+ dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data,
+ fault_pfn, as_no);
/* Grab the context that was already refcounted in kbase_mmu_interrupt()
* Therefore, it cannot be scheduled out of this AS until we explicitly
@@ -941,8 +1025,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
#ifdef CONFIG_MALI_ARBITER_SUPPORT
/* check if we still have GPU */
if (unlikely(kbase_is_gpu_removed(kbdev))) {
- dev_dbg(kbdev->dev,
- "%s: GPU has been removed\n", __func__);
+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
goto fault_done;
}
#endif
@@ -1005,20 +1088,24 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
goto fault_done;
}
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs if necessary */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
- if (!prealloc_sas[i]) {
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
- "Failed pre-allocating memory for sub-allocations' metadata",
- fault);
- goto fault_done;
+page_fault_retry:
+ if (kbdev->pagesize_2mb) {
+ /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ if (!prealloc_sas[i]) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+
+ if (!prealloc_sas[i]) {
+ kbase_mmu_report_fault_and_kill(
+ kctx, faulting_as,
+ "Failed pre-allocating memory for sub-allocations' metadata",
+ fault);
+ goto fault_done;
+ }
+ }
}
}
-#endif /* CONFIG_MALI_2MB_ALLOC */
-page_fault_retry:
/* so we have a translation fault,
* let's see if it is for growable memory
*/
@@ -1133,8 +1220,7 @@ page_fault_retry:
/* cap to max vsize */
new_pages = min(new_pages, region->nr_pages - current_backed_size);
- dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
- new_pages);
+ dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
if (new_pages == 0) {
struct kbase_mmu_hw_op_param op_param;
@@ -1211,11 +1297,10 @@ page_fault_retry:
* so the no_flush version of insert_pages is used which allows
* us to unlock the MMU as we see fit.
*/
- err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
- region->start_pfn + pfn_offset,
- &kbase_get_gpu_phy_pages(region)[pfn_offset],
- new_pages, region->flags,
- region->gpu_alloc->group_id, &dirty_pgds);
+ err = kbase_mmu_insert_pages_no_flush(
+ kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
+ &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
+ region->gpu_alloc->group_id, &dirty_pgds, region, false);
if (err) {
kbase_free_phy_pages_helper(region->gpu_alloc,
new_pages);
@@ -1241,16 +1326,11 @@ page_fault_retry:
if (region->threshold_pages &&
kbase_reg_current_backed_size(region) >
region->threshold_pages) {
-
- dev_dbg(kctx->kbdev->dev,
- "%zu pages exceeded IR threshold %zu\n",
- new_pages + current_backed_size,
- region->threshold_pages);
+ dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
+ new_pages + current_backed_size, region->threshold_pages);
if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
- dev_dbg(kctx->kbdev->dev,
- "Get region %pK for IR\n",
- (void *)region);
+ dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
kbase_va_region_alloc_get(kctx, region);
}
}
@@ -1339,8 +1419,7 @@ page_fault_retry:
* Otherwise fail the allocation.
*/
if (pages_to_grow > 0) {
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (grow_2mb_pool) {
+ if (kbdev->pagesize_2mb && grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
&kctx->mem_pools.large[group_id];
@@ -1349,23 +1428,22 @@ page_fault_retry:
((1 << lp_mem_pool->order) - 1))
>> lp_mem_pool->order;
- ret = kbase_mem_pool_grow(lp_mem_pool, pages_to_grow, kctx->task);
+ ret = kbase_mem_pool_grow(lp_mem_pool,
+ pages_to_grow, kctx->task);
} else {
-#endif
struct kbase_mem_pool *const mem_pool =
&kctx->mem_pools.small[group_id];
- ret = kbase_mem_pool_grow(mem_pool, pages_to_grow, kctx->task);
-#ifdef CONFIG_MALI_2MB_ALLOC
+ ret = kbase_mem_pool_grow(mem_pool,
+ pages_to_grow, kctx->task);
}
-#endif
}
if (ret < 0) {
/* failed to extend, handle as a normal PF */
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Page allocation failure", fault);
} else {
- dev_dbg(kbdev->dev, "Try again after pool_grow\n");
+ dev_dbg(kbdev->dev, "Try again after pool_grow");
goto page_fault_retry;
}
}
@@ -1392,7 +1470,7 @@ fault_done:
release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
- dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
+ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
}
static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
@@ -1448,19 +1526,27 @@ alloc_free:
return KBASE_MMU_INVALID_PGD_ADDRESS;
}
-/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
- * new table from the pool if needed and possible
+/**
+ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @pgd: Physical addresse of level N page directory.
+ * @vpfn: The virtual page frame number.
+ * @level: The level of MMU page table (N).
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
*/
static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd,
- u64 *dirty_pgds)
+ phys_addr_t *pgd, u64 vpfn, int level)
{
u64 *page;
phys_addr_t target_pgd;
struct page *p;
- KBASE_DEBUG_ASSERT(*pgd);
-
lockdep_assert_held(&mmut->mmu_lock);
/*
@@ -1473,49 +1559,15 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
p = pfn_to_page(PFN_DOWN(*pgd));
page = kmap(p);
if (page == NULL) {
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
return -EINVAL;
}
if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
- enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
- unsigned int current_valid_entries;
- u64 managed_pte;
-
- target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
- if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
- dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
- __func__);
- kunmap(p);
- return -ENOMEM;
- }
-
- current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page);
- kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd);
- page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
- kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
- kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1);
-
- /* Rely on the caller to update the address space flags. */
- if (newly_created_pgd && !*newly_created_pgd) {
- *newly_created_pgd = true;
- /* If code reaches here we know parent PGD of target PGD was
- * not newly created and should be flushed.
- */
- flush_op = KBASE_MMU_OP_FLUSH_PT;
-
- if (dirty_pgds)
- *dirty_pgds |= 1ULL << level;
- }
-
- /* A new valid entry is added to an existing PGD. Perform the
- * invalidate operation for GPU cache as it could be having a
- * cacheline that contains the entry (in an invalid form).
- */
- kbase_mmu_sync_pgd(kbdev, mmut->kctx,
- *pgd + (vpfn * sizeof(u64)),
- kbase_dma_addr(p) + (vpfn * sizeof(u64)),
- sizeof(u64), flush_op);
+ dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
+ vpfn);
+ kunmap(p);
+ return -EFAULT;
} else {
target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
@@ -1528,12 +1580,69 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
return 0;
}
+/**
+ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @vpfn: The virtual page frame number.
+ * @in_level: The level of MMU page table (N).
+ * @out_level: Set to the level of the lowest valid PGD found on success.
+ * Invalid on error.
+ * @out_pgd: Set to the lowest valid PGD found on success.
+ * Invalid on error.
+ *
+ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
+ * closest to in_level
+ *
+ * Terminology:
+ * Level-0 = Top-level = highest
+ * Level-3 = Bottom-level = lowest
+ *
+ * Return:
+ * * 0 - OK
+ * * -EINVAL - kmap() failed during page table walk.
+ */
+static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
+{
+ phys_addr_t pgd;
+ int l;
+ int err = 0;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+ pgd = mmut->pgd;
+
+ for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
+ err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+
+ /* Handle failure condition */
+ if (err) {
+ dev_dbg(kbdev->dev,
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+ __func__, l + 1);
+ break;
+ }
+ }
+
+ *out_pgd = pgd;
+ *out_level = l;
+
+ /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
+ * This implies that we have found the lowest valid pgd. Reset the error code.
+ */
+ if (err == -EFAULT)
+ err = 0;
+
+ return err;
+}
+
/*
- * Returns the PGD for the specified level of translation
+ * On success, sets out_pgd to the PGD for the specified level of translation
+ * Returns -EFAULT if a valid PGD is not found
*/
static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- int level, phys_addr_t *out_pgd, bool *newly_created_pgd,
- u64 *dirty_pgds)
+ int level, phys_addr_t *out_pgd)
{
phys_addr_t pgd;
int l;
@@ -1542,13 +1651,12 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
pgd = mmut->pgd;
for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
- int err =
- mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds);
+ int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
/* Handle failure condition */
if (err) {
- dev_dbg(kbdev->dev,
- "%s: mmu_get_next_pgd failure at level %d\n",
- __func__, l);
+ dev_err(kbdev->dev,
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+ __func__, l + 1);
return err;
}
}
@@ -1558,17 +1666,10 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
return 0;
}
-static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds)
-{
- return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd,
- newly_created_pgd, dirty_pgds);
-}
-
static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 from_vpfn,
u64 to_vpfn, u64 *dirty_pgds,
- struct list_head *free_pgds_list)
+ struct tagged_addr *phys, bool ignore_page_migration)
{
u64 vpfn = from_vpfn;
struct kbase_mmu_mode const *mmu_mode;
@@ -1580,6 +1681,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
lockdep_assert_held(&mmut->mmu_lock);
mmu_mode = kbdev->mmu_mode;
+ kbase_mmu_reset_free_pgds_list(mmut);
while (vpfn < to_vpfn) {
unsigned int idx = vpfn & 0x1FF;
@@ -1621,8 +1723,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
pcount = count;
break;
default:
- dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
- __func__, level);
+ dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
goto next;
}
@@ -1641,11 +1742,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
if (!num_of_valid_entries) {
kunmap(p);
- list_add(&p->lru, free_pgds_list);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
- KBASE_MMU_OP_NONE, dirty_pgds,
- free_pgds_list);
+ KBASE_MMU_OP_NONE, dirty_pgds);
vpfn += count;
continue;
}
@@ -1662,12 +1762,34 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
next:
vpfn += count;
}
+
+ /* If page migration is enabled: the only way to recover from failure
+ * is to mark all pages as not movable. It is not predictable what's
+ * going to happen to these pages at this stage. They might return
+ * movable once they are returned to a memory pool.
+ */
+ if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
+ const u64 num_pages = to_vpfn - from_vpfn + 1;
+ u64 i;
+
+ for (i = 0; i < num_pages; i++) {
+ struct page *phys_page = as_page(phys[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ if (page_md) {
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+ }
}
static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, const u64 vpfn,
size_t nr, u64 dirty_pgds,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool insert_pages_failed)
{
struct kbase_mmu_hw_op_param op_param;
int as_nr = 0;
@@ -1692,41 +1814,220 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
*
* Operations that affect the whole GPU cache shall only be done if it's
* impossible to update physical ranges.
+ *
+ * On GPUs where flushing by physical address range is supported,
+ * full cache flush is done when an error occurs during
+ * insert_pages() to keep the error handling simpler.
*/
- if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
else
mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
}
-/*
- * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+/**
+ * update_parent_pgds() - Updates the page table from bottom level towards
+ * the top level to insert a new ATE
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @cur_level: The level of MMU page table where the ATE needs to be added.
+ * The bottom PGD level.
+ * @insert_level: The level of MMU page table where the chain of newly allocated
+ * PGDs needs to be linked-in/inserted.
+ * The top-most PDG level to be updated.
+ * @insert_vpfn: The virtual page frame number for the ATE.
+ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
+ * the physical addresses of newly allocated PGDs from index
+ * insert_level+1 to cur_level, and an existing PGD at index
+ * insert_level.
+ *
+ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
+ * at insert_level which already exists in the MMU Page Tables.Migration status is also
+ * updated for all the newly allocated PGD pages.
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
*/
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int const group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ int cur_level, int insert_level, u64 insert_vpfn,
+ phys_addr_t *pgds_to_insert)
+{
+ int pgd_index;
+ int err = 0;
+
+ /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
+ * Loop runs from the bottom-most to the top-most level so that all entries in the chain
+ * are valid when they are inserted into the MMU Page table via the insert_level PGD.
+ */
+ for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
+ int parent_index = pgd_index - 1;
+ phys_addr_t parent_pgd = pgds_to_insert[parent_index];
+ unsigned int current_valid_entries;
+ u64 pte;
+ phys_addr_t target_pgd = pgds_to_insert[pgd_index];
+ u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
+ struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
+ u64 *parent_page_va;
+
+ if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) {
+ err = -EFAULT;
+ goto failure_recovery;
+ }
+
+ parent_page_va = kmap(parent_page);
+ if (unlikely(parent_page_va == NULL)) {
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
+ err = -EINVAL;
+ goto failure_recovery;
+ }
+
+ current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
+
+ kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
+ parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
+ kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
+ kunmap(parent_page);
+
+ if (parent_index != insert_level) {
+ /* Newly allocated PGDs */
+ kbase_mmu_sync_pgd_cpu(
+ kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+ sizeof(u64));
+ } else {
+ /* A new valid entry is added to an existing PGD. Perform the
+ * invalidate operation for GPU cache as it could be having a
+ * cacheline that contains the entry (in an invalid form).
+ */
+ kbase_mmu_sync_pgd(
+ kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
+ kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+ sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+ }
+
+ /* Update the new target_pgd page to its stable state */
+ if (kbase_page_migration_enabled) {
+ struct kbase_page_metadata *page_md =
+ kbase_page_private(phys_to_page(target_pgd));
+
+ spin_lock(&page_md->migrate_lock);
+
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
+ IS_PAGE_ISOLATED(page_md->status));
+
+ if (mmut->kctx) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
+ page_md->data.pt_mapped.mmut = mmut;
+ page_md->data.pt_mapped.pgd_vpfn_level =
+ PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
+ } else {
+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+
+ return 0;
+
+failure_recovery:
+ /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
+ for (; pgd_index < cur_level; pgd_index++) {
+ phys_addr_t pgd = pgds_to_insert[pgd_index];
+ struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
+ u64 *pgd_page_va = kmap(pgd_page);
+ u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
+ kunmap(pgd_page);
+ }
+
+ return err;
+}
+
+/**
+ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
+ * level_high (inclusive)
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @level_low: The lower bound for the levels for which the PGD allocs are required
+ * @level_high: The higher bound for the levels for which the PGD allocs are required
+ * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
+ * newly allocated PGD addresses to.
+ *
+ * Numerically, level_low < level_high, not to be confused with top level and
+ * bottom level concepts for MMU PGDs. They are only used as low and high bounds
+ * in an incrementing for-loop.
+ *
+ * Return:
+ * * 0 - OK
+ * * -ENOMEM - allocation failed for a PGD.
+ */
+static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t *new_pgds, int level_low, int level_high)
+{
+ int err = 0;
+ int i;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ for (i = level_low; i <= level_high; i++) {
+ do {
+ new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+ break;
+
+ rt_mutex_unlock(&mmut->mmu_lock);
+ err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
+ level_high, NULL);
+ rt_mutex_lock(&mmut->mmu_lock);
+ if (err) {
+ dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
+ __func__, err);
+
+ /* Free all PGDs allocated in previous successful iterations
+ * from (i-1) to level_low
+ */
+ for (i = (i - 1); i >= level_low; i--) {
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
+ }
+
+ return err;
+ }
+ } while (1);
+ }
+
+ return 0;
+}
+
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
- /* In case the insert_single_page only partially completes
- * we need to be able to recover
- */
- bool recover_required = false;
- u64 start_vpfn = vpfn;
- size_t recover_count = 0;
+ u64 insert_vpfn = start_vpfn;
size_t remain = nr;
int err;
struct kbase_device *kbdev;
- enum kbase_mmu_op_type flush_op;
u64 dirty_pgds = 0;
- LIST_HEAD(free_pgds_list);
+ unsigned int i;
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ enum kbase_mmu_op_type flush_op;
+ struct kbase_mmu_table *mmut = &kctx->mmu;
+ int l, cur_level, insert_level;
if (WARN_ON(kctx == NULL))
return -EINVAL;
/* 64-bit address range is the max */
- KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
kbdev = kctx->kbdev;
@@ -1734,12 +2035,25 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
if (nr == 0)
return 0;
- rt_mutex_lock(&kctx->mmu.mmu_lock);
+ /* If page migration is enabled, pages involved in multiple GPU mappings
+ * are always treated as not movable.
+ */
+ if (kbase_page_migration_enabled && !ignore_page_migration) {
+ struct page *phys_page = as_page(phys);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ if (page_md) {
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+
+ rt_mutex_lock(&mmut->mmu_lock);
while (remain) {
- unsigned int i;
- unsigned int index = vpfn & 0x1FF;
- unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ unsigned int vindex = insert_vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
register unsigned int num_of_valid_entries;
bool newly_created_pgd = false;
@@ -1747,61 +2061,61 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
if (count > remain)
count = remain;
+ cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+ insert_level = cur_level;
+
/*
- * Repeatedly calling mmu_get_bottom_pgd() is clearly
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
* 256 pages at once (on average). Do we really care?
*/
- do {
- err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd,
- &dirty_pgds);
- if (err != -ENOMEM)
- break;
- /* Fill the memory pool with enough pages for
- * the page walk to succeed
- */
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
- err = kbase_mem_pool_grow(&kbdev->mem_pools.small[kctx->mmu.group_id],
- MIDGARD_MMU_BOTTOMLEVEL, kctx->task);
- rt_mutex_lock(&kctx->mmu.mmu_lock);
- } while (!err);
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+ &pgd);
+
if (err) {
- dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n",
- __func__);
- if (recover_required) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
- start_vpfn + recover_count,
- &dirty_pgds, &free_pgds_list);
- }
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+ __func__, err);
goto fail_unlock;
}
+ /* No valid pgd at cur_level */
+ if (insert_level != cur_level) {
+ /* Allocate new pgds for all missing levels from the required level
+ * down to the lowest valid pgd at insert_level
+ */
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+ cur_level);
+ if (err)
+ goto fail_unlock;
+
+ newly_created_pgd = true;
+
+ new_pgds[insert_level] = pgd;
+
+ /* If we didn't find an existing valid pgd at cur_level,
+ * we've now allocated one. The ATE in the next step should
+ * be inserted in this newly allocated pgd.
+ */
+ pgd = new_pgds[cur_level];
+ }
+
p = pfn_to_page(PFN_DOWN(pgd));
pgd_page = kmap(p);
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
- if (recover_required) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
- start_vpfn + recover_count,
- &dirty_pgds, &free_pgds_list);
- }
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
- goto fail_unlock;
+
+ goto fail_unlock_free_pgds;
}
num_of_valid_entries =
kbdev->mmu_mode->get_num_valid_entries(pgd_page);
for (i = 0; i < count; i++) {
- unsigned int ofs = index + i;
+ unsigned int ofs = vindex + i;
/* Fail if the current page is a valid ATE entry */
KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
@@ -1813,49 +2127,166 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
kbdev->mmu_mode->set_num_valid_entries(
pgd_page, num_of_valid_entries + count);
- vpfn += count;
- remain -= count;
-
- if (count > 0 && !newly_created_pgd)
- dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL;
+ dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
/* MMU cache flush operation here will depend on whether bottom level
* PGD is newly created or not.
*
- * If bottom level PGD is newly created then no cache maintenance is
+ * If bottom level PGD is newly created then no GPU cache maintenance is
* required as the PGD will not exist in GPU cache. Otherwise GPU cache
* maintenance is required for existing PGD.
*/
flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
- kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
- kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64),
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
flush_op);
+ if (newly_created_pgd) {
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+ new_pgds);
+ if (err) {
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+ __func__, err);
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+ kunmap(p);
+ goto fail_unlock_free_pgds;
+ }
+ }
+
+ insert_vpfn += count;
+ remain -= count;
kunmap(p);
- /* We have started modifying the page table.
- * If further pages need inserting and fail we need to undo what
- * has already taken place
- */
- recover_required = true;
- recover_count += count;
}
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
- mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
- mmu_sync_info);
+ rt_mutex_unlock(&mmut->mmu_lock);
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+ false);
return 0;
+fail_unlock_free_pgds:
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
+ for (l = cur_level; l > insert_level; l--)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
fail_unlock:
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
+ if (insert_vpfn != start_vpfn) {
+ /* Invalidate the pages we have partially completed */
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds,
+ NULL, true);
+ }
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+ true);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+ rt_mutex_unlock(&mmut->mmu_lock);
- mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
- mmu_sync_info);
- kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list);
return err;
}
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+ false);
+}
+
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+ false);
+}
+
+static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
+ struct kbase_va_region *reg,
+ struct kbase_mmu_table *mmut, const u64 vpfn)
+{
+ struct page *phys_page = as_page(phys);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ spin_lock(&page_md->migrate_lock);
+
+ /* If no GPU va region is given: the metadata provided are
+ * invalid.
+ *
+ * If the page is already allocated and mapped: this is
+ * an additional GPU mapping, probably to create a memory
+ * alias, which means it is no longer possible to migrate
+ * the page easily because tracking all the GPU mappings
+ * would be too costly.
+ *
+ * In any case: the page becomes not movable. It is kept
+ * alive, but attempts to migrate it will fail. The page
+ * will be freed if it is still not movable when it returns
+ * to a memory pool. Notice that the movable flag is not
+ * cleared because that would require taking the page lock.
+ */
+ if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
+ page_md->data.mapped.reg = reg;
+ page_md->data.mapped.mmut = mmut;
+ page_md->data.mapped.vpfn = vpfn;
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+}
+
+static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
+ struct tagged_addr *phys, size_t requested_nr)
+{
+ size_t i;
+
+ for (i = 0; i < requested_nr; i++) {
+ struct page *phys_page = as_page(phys[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(phys[i]) || is_partial(phys[i]))
+ continue;
+
+ if (page_md) {
+ u8 status;
+
+ spin_lock(&page_md->migrate_lock);
+ status = PAGE_STATUS_GET(page_md->status);
+
+ if (status == ALLOCATED_MAPPED) {
+ if (IS_PAGE_ISOLATED(page_md->status)) {
+ page_md->status = PAGE_STATUS_SET(
+ page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
+ page_md->data.free_isolated.kbdev = kbdev;
+ /* At this point, we still have a reference
+ * to the page via its page migration metadata,
+ * and any page with the FREE_ISOLATED_IN_PROGRESS
+ * status will subsequently be freed in either
+ * kbase_page_migrate() or kbase_page_putback()
+ */
+ phys[i] = as_tagged(0);
+ } else
+ page_md->status = PAGE_STATUS_SET(page_md->status,
+ (u8)FREE_IN_PROGRESS);
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+}
+
u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
struct tagged_addr const phy, unsigned long const flags,
int const level, int const group_id)
@@ -1869,7 +2300,8 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id, u64 *dirty_pgds)
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg, bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
@@ -1877,7 +2309,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
size_t remain = nr;
int err;
struct kbase_mmu_mode const *mmu_mode;
- LIST_HEAD(free_pgds_list);
+ unsigned int i;
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ int l, cur_level, insert_level;
/* Note that 0 is a valid start_vpfn */
/* 64-bit address range is the max */
@@ -1892,14 +2326,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
rt_mutex_lock(&mmut->mmu_lock);
while (remain) {
- unsigned int i;
unsigned int vindex = insert_vpfn & 0x1FF;
unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
- int cur_level;
register unsigned int num_of_valid_entries;
- enum kbase_mmu_op_type flush_op;
bool newly_created_pgd = false;
+ enum kbase_mmu_op_type flush_op;
if (count > remain)
count = remain;
@@ -1909,55 +2341,53 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
else
cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+ insert_level = cur_level;
+
/*
- * Repeatedly calling mmu_get_pgd_at_level() is clearly
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
* 256 pages at once (on average). Do we really care?
*/
- do {
- err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd,
- &newly_created_pgd, dirty_pgds);
- if (err != -ENOMEM)
- break;
- /* Fill the memory pool with enough pages for
- * the page walk to succeed
- */
- rt_mutex_unlock(&mmut->mmu_lock);
- err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
- cur_level, mmut->kctx ? mmut->kctx->task : NULL);
- rt_mutex_lock(&mmut->mmu_lock);
- } while (!err);
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+ &pgd);
if (err) {
- dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__);
- if (insert_vpfn != start_vpfn) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
- insert_vpfn, dirty_pgds,
- &free_pgds_list);
- }
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+ __func__, err);
goto fail_unlock;
}
+ /* No valid pgd at cur_level */
+ if (insert_level != cur_level) {
+ /* Allocate new pgds for all missing levels from the required level
+ * down to the lowest valid pgd at insert_level
+ */
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+ cur_level);
+ if (err)
+ goto fail_unlock;
+
+ newly_created_pgd = true;
+
+ new_pgds[insert_level] = pgd;
+
+ /* If we didn't find an existing valid pgd at cur_level,
+ * we've now allocated one. The ATE in the next step should
+ * be inserted in this newly allocated pgd.
+ */
+ pgd = new_pgds[cur_level];
+ }
+
p = pfn_to_page(PFN_DOWN(pgd));
pgd_page = kmap(p);
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure\n",
- __func__);
- if (insert_vpfn != start_vpfn) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
- insert_vpfn, dirty_pgds,
- &free_pgds_list);
- }
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
- goto fail_unlock;
+
+ goto fail_unlock_free_pgds;
}
num_of_valid_entries =
@@ -1985,28 +2415,29 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
*target = kbase_mmu_create_ate(kbdev,
phys[i], flags, cur_level, group_id);
+
+ /* If page migration is enabled, this is the right time
+ * to update the status of the page.
+ */
+ if (kbase_page_migration_enabled && !ignore_page_migration &&
+ !is_huge(phys[i]) && !is_partial(phys[i]))
+ kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
+ insert_vpfn + i);
}
num_of_valid_entries += count;
}
mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
- if (dirty_pgds && count > 0 && !newly_created_pgd)
- *dirty_pgds |= 1ULL << cur_level;
-
- phys += count;
- insert_vpfn += count;
- remain -= count;
+ if (dirty_pgds)
+ *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
- /* For the most part, the creation of a new virtual memory mapping does
- * not require cache flush operations, because the operation results
- * into the creation of new memory pages which are not present in GPU
- * caches. Therefore the defaul operation is NONE.
+ /* MMU cache flush operation here will depend on whether bottom level
+ * PGD is newly created or not.
*
- * However, it is quite common for the mapping to start and/or finish
- * at an already existing PGD. Moreover, the PTEs modified are not
- * necessarily aligned with GPU cache lines. Therefore, GPU cache
- * maintenance is required for existing PGDs.
+ * If bottom level PGD is newly created then no GPU cache maintenance is
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
+ * maintenance is required for existing PGD.
*/
flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
@@ -2014,6 +2445,23 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
flush_op);
+ if (newly_created_pgd) {
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+ new_pgds);
+ if (err) {
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+ __func__, err);
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+ kunmap(p);
+ goto fail_unlock_free_pgds;
+ }
+ }
+
+ phys += count;
+ insert_vpfn += count;
+ remain -= count;
kunmap(p);
}
@@ -2021,12 +2469,22 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
return 0;
+fail_unlock_free_pgds:
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
+ for (l = cur_level; l > insert_level; l--)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
fail_unlock:
- rt_mutex_unlock(&mmut->mmu_lock);
+ if (insert_vpfn != start_vpfn) {
+ /* Invalidate the pages we have partially completed */
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds,
+ phys, ignore_page_migration);
+ }
- mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds,
- CALLER_MMU_ASYNC);
- kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
+ dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+ rt_mutex_unlock(&mmut->mmu_lock);
return err;
}
@@ -2035,32 +2493,82 @@ fail_unlock:
* Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
* number 'as_nr'.
*/
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int const group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg, bool ignore_page_migration)
{
int err;
u64 dirty_pgds = 0;
- LIST_HEAD(free_pgds_list);
/* Early out if there is nothing to do */
if (nr == 0)
return 0;
err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
- &dirty_pgds);
+ &dirty_pgds, reg, ignore_page_migration);
if (err)
return err;
- mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info);
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
return 0;
}
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
+{
+ int err;
+ u64 dirty_pgds = 0;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ /* Imported allocations don't have metadata and therefore always ignore the
+ * page migration logic.
+ */
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds, reg, true);
+ if (err)
+ return err;
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
+
+ return 0;
+}
+
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
+{
+ int err;
+ u64 dirty_pgds = 0;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ /* Memory aliases are always built on top of existing allocations,
+ * therefore the state of physical pages shall be updated.
+ */
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds, reg, false);
+ if (err)
+ return err;
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
+
+ return 0;
+}
+
#if !MALI_USE_CSF
/**
* kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
@@ -2257,8 +2765,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable);
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, phys_addr_t *pgds,
u64 vpfn, int level,
- enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
- struct list_head *free_pgds_list)
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
{
int current_level;
@@ -2290,7 +2797,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
current_pgd + (index * sizeof(u64)),
sizeof(u64), flush_op);
- list_add(&p->lru, free_pgds_list);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
} else {
current_valid_entries--;
@@ -2310,13 +2817,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
/**
* mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
*
- * @kbdev: Pointer to kbase device.
- * @kctx: Pointer to kbase context.
- * @as_nr: Address space number, for GPU cache maintenance operations
- * that happen outside a specific kbase context.
- * @phys: Array of physical pages to flush.
- * @op_param: Non-NULL pointer to struct containing information about the flush
- * operation to perform.
+ * @kbdev: Pointer to kbase device.
+ * @kctx: Pointer to kbase context.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @phys: Array of physical pages to flush.
+ * @phys_page_nr: Number of physical pages to flush.
+ * @op_param: Non-NULL pointer to struct containing information about the flush
+ * operation to perform.
*
* This function will do one of three things:
* 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
@@ -2324,100 +2832,54 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
* 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
* supported on GPU or,
* 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
+ *
+ * When performing a partial GPU cache flush, the number of physical
+ * pages does not have to be identical to the number of virtual pages on the MMU,
+ * to support a single physical address flush for an aliased page.
*/
static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
struct kbase_context *kctx, int as_nr,
- struct tagged_addr *phys,
+ struct tagged_addr *phys, size_t phys_page_nr,
struct kbase_mmu_hw_op_param *op_param)
{
-
if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ /* Full cache flush through the MMU_COMMAND */
mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
- return;
} else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
+ /* Full cache flush through the GPU_CONTROL */
mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param);
- return;
}
-
+#if MALI_USE_CSF
+ else {
+ /* Partial GPU cache flush with MMU cache invalidation */
+ unsigned long irq_flags;
+ unsigned int i;
+ bool flush_done = false;
+
+ mmu_invalidate(kbdev, kctx, as_nr, op_param);
+
+ for (i = 0; !flush_done && i < phys_page_nr; i++) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+ mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
+ KBASE_MMU_OP_FLUSH_MEM);
+ else
+ flush_done = true;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+ }
+ }
+#endif
}
-/**
- * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
- *
- * @kbdev: Pointer to kbase device.
- * @mmut: Pointer to GPU MMU page table.
- * @vpfn: Start page frame number of the GPU virtual pages to unmap.
- * @phys: Array of physical pages currently mapped to the virtual
- * pages to unmap, or NULL. This is only used for GPU cache
- * maintenance.
- * @nr: Number of pages to unmap.
- * @as_nr: Address space number, for GPU cache maintenance operations
- * that happen outside a specific kbase context.
- *
- * We actually discard the ATE and free the page table pages if no valid entries
- * exist in PGD.
- *
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
- *
- * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
- * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
- * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
- * instead of specific physical address ranges.
- *
- * Return: 0 on success, otherwise an error code.
- */
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr, int as_nr)
+static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, size_t nr, u64 *dirty_pgds,
+ struct list_head *free_pgds_list,
+ enum kbase_mmu_op_type flush_op)
{
- u64 start_vpfn = vpfn;
- size_t requested_nr = nr;
- enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
- struct kbase_mmu_mode const *mmu_mode;
- struct kbase_mmu_hw_op_param op_param;
- int err = -EFAULT;
- u64 dirty_pgds = 0;
- LIST_HEAD(free_pgds_list);
-
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
- if (nr == 0) {
- /* early out if nothing to do */
- return 0;
- }
- /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
- * the operation is invalidate but the granularity of cache maintenance may change
- * according to the situation.
- *
- * If GPU control command operations are present and the number of pages is "small",
- * then the optimal strategy is flushing on the physical address range of the pages
- * which are affected by the operation. That implies both the PGDs which are modified
- * or removed from the page table and the physical pages which are freed from memory.
- *
- * Otherwise, there's no alternative to invalidating the whole GPU cache.
- */
- if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
- flush_op = KBASE_MMU_OP_FLUSH_PT;
-
- if (!rt_mutex_trylock(&mmut->mmu_lock)) {
- /*
- * Sometimes, mmu_lock takes long time to be released.
- * In that case, kswapd is stuck until it can hold
- * the lock. Instead, just bail out here so kswapd
- * could reclaim other pages.
- */
- if (current_is_kswapd())
- return -EBUSY;
- rt_mutex_lock(&mmut->mmu_lock);
- }
+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
- mmu_mode = kbdev->mmu_mode;
+ lockdep_assert_held(&mmut->mmu_lock);
+ kbase_mmu_reset_free_pgds_list(mmut);
while (nr) {
unsigned int index = vpfn & 0x1FF;
@@ -2474,9 +2936,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
switch (level) {
case MIDGARD_MMU_LEVEL(0):
case MIDGARD_MMU_LEVEL(1):
- dev_warn(kbdev->dev,
- "%s: No support for ATEs at level %d\n",
- __func__, level);
+ dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
+ level);
kunmap(p);
goto out;
case MIDGARD_MMU_LEVEL(2):
@@ -2484,9 +2945,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
if (count >= 512) {
pcount = 1;
} else {
- dev_warn(kbdev->dev,
- "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
- __func__, count);
+ dev_warn(
+ kbdev->dev,
+ "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
+ __func__, count);
pcount = 0;
}
break;
@@ -2495,16 +2957,14 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
pcount = count;
break;
default:
- dev_err(kbdev->dev,
- "%s: found non-mapped memory, early out\n",
- __func__);
+ dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
vpfn += count;
nr -= count;
continue;
}
if (pcount > 0)
- dirty_pgds |= 1ULL << level;
+ *dirty_pgds |= 1ULL << level;
num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
if (WARN_ON_ONCE(num_of_valid_entries < pcount))
@@ -2526,11 +2986,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
pgd + (index * sizeof(u64)),
pcount * sizeof(u64), flush_op);
- list_add(&p->lru, &free_pgds_list);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
- flush_op, &dirty_pgds,
- &free_pgds_list);
+ flush_op, dirty_pgds);
vpfn += count;
nr -= count;
@@ -2547,32 +3006,110 @@ next:
vpfn += count;
nr -= count;
}
- err = 0;
out:
- rt_mutex_unlock(&mmut->mmu_lock);
+ return 0;
+}
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr, bool ignore_page_migration)
+{
+ u64 start_vpfn = vpfn;
+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
+ struct kbase_mmu_hw_op_param op_param;
+ int err = -EFAULT;
+ u64 dirty_pgds = 0;
+ LIST_HEAD(free_pgds_list);
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ /* This function performs two operations: MMU maintenance and flushing
+ * the caches. To ensure internal consistency between the caches and the
+ * MMU, it does not make sense to be able to flush only the physical pages
+ * from the cache and keep the PTE, nor does it make sense to use this
+ * function to remove a PTE and keep the physical pages in the cache.
+ *
+ * However, we have legitimate cases where we can try to tear down a mapping
+ * with zero virtual and zero physical pages, so we must have the following
+ * behaviour:
+ * - if both physical and virtual page counts are zero, return early
+ * - if either physical and virtual page counts are zero, return early
+ * - if there are fewer physical pages than virtual pages, return -EINVAL
+ */
+ if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
+ return 0;
+
+ if (unlikely(nr_virt_pages < nr_phys_pages))
+ return -EINVAL;
+
+ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
+ * the operation is invalidate but the granularity of cache maintenance may change
+ * according to the situation.
+ *
+ * If GPU control command operations are present and the number of pages is "small",
+ * then the optimal strategy is flushing on the physical address range of the pages
+ * which are affected by the operation. That implies both the PGDs which are modified
+ * or removed from the page table and the physical pages which are freed from memory.
+ *
+ * Otherwise, there's no alternative to invalidating the whole GPU cache.
+ */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
+ nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
+ flush_op = KBASE_MMU_OP_FLUSH_PT;
+
+ if (!rt_mutex_trylock(&mmut->mmu_lock)) {
+ /*
+ * Sometimes, mmu_lock takes long time to be released.
+ * In that case, kswapd is stuck until it can hold
+ * the lock. Instead, just bail out here so kswapd
+ * could reclaim other pages.
+ */
+ if (current_is_kswapd())
+ return -EBUSY;
+ rt_mutex_lock(&mmut->mmu_lock);
+ }
+
+ err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
+ &free_pgds_list, flush_op);
+
/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
op_param = (struct kbase_mmu_hw_op_param){
.vpfn = start_vpfn,
- .nr = requested_nr,
+ .nr = nr_virt_pages,
.mmu_sync_info = mmu_sync_info,
.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
.op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
KBASE_MMU_OP_FLUSH_MEM,
.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
};
- mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param);
+ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
+ &op_param);
- kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
+ /* If page migration is enabled: the status of all physical pages involved
+ * shall be updated, unless they are not movable. Their status shall be
+ * updated before releasing the lock to protect against concurrent
+ * requests to migrate the pages, if they have been isolated.
+ */
+ if (kbase_page_migration_enabled && phys && !ignore_page_migration)
+ kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
+
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+
+ rt_mutex_unlock(&mmut->mmu_lock);
return err;
}
-
KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
/**
- * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries
+ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
+ * page table entries
*
- * @kctx: Kbase context
+ * @kbdev: Pointer to kbase device.
+ * @mmut: The involved MMU table
* @vpfn: Virtual PFN (Page Frame Number) of the first page to update
* @phys: Pointer to the array of tagged physical addresses of the physical
* pages that are pointed to by the page table entries (that need to
@@ -2585,26 +3122,22 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
* @dirty_pgds: Flags to track every level where a PGD has been updated.
*
* This will update page table entries that already exist on the GPU based on
- * the new flags that are passed (the physical pages pointed to by the page
- * table entries remain unchanged). It is used as a response to the changes of
- * the memory attributes.
+ * new flags and replace any existing phy pages that are passed (the PGD pages
+ * remain unchanged). It is used as a response to the changes of phys as well
+ * as the the memory attributes.
*
* The caller is responsible for validating the memory attributes.
*
* Return: 0 if the attributes data in page table entries were updated
* successfully, otherwise an error code.
*/
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr, unsigned long flags,
- int const group_id, u64 *dirty_pgds)
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds)
{
phys_addr_t pgd;
u64 *pgd_page;
int err;
- struct kbase_device *kbdev;
-
- if (WARN_ON(kctx == NULL))
- return -EINVAL;
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2612,9 +3145,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (nr == 0)
return 0;
- rt_mutex_lock(&kctx->mmu.mmu_lock);
-
- kbdev = kctx->kbdev;
+ rt_mutex_lock(&mmut->mmu_lock);
while (nr) {
unsigned int i;
@@ -2630,8 +3161,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (is_huge(*phys) && (index == index_in_large_page(*phys)))
cur_level = MIDGARD_MMU_LEVEL(2);
- err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL,
- dirty_pgds);
+ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
if (WARN_ON(err))
goto fail_unlock;
@@ -2658,7 +3188,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
*target_phys, flags, MIDGARD_MMU_LEVEL(2),
group_id);
- kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)),
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
kbase_dma_addr(p) + (level_index * sizeof(u64)),
sizeof(u64), KBASE_MMU_OP_NONE);
} else {
@@ -2676,7 +3206,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
/* MMU cache flush strategy is NONE because GPU cache maintenance
* will be done by the caller.
*/
- kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
kbase_dma_addr(p) + (index * sizeof(u64)),
count * sizeof(u64), KBASE_MMU_OP_NONE);
}
@@ -2694,75 +3224,458 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
kunmap(p);
}
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
+ rt_mutex_unlock(&mmut->mmu_lock);
return 0;
fail_unlock:
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
+ rt_mutex_unlock(&mmut->mmu_lock);
return err;
}
-int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id)
+static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id)
{
int err;
struct kbase_mmu_hw_op_param op_param;
u64 dirty_pgds = 0;
-
+ struct kbase_mmu_table *mmut;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ int as_nr;
+
+#if !MALI_USE_CSF
+ if (unlikely(kctx == NULL))
+ return -EINVAL;
+
+ as_nr = kctx->as_nr;
+ mmut = &kctx->mmu;
+#else
+ if (kctx) {
+ mmut = &kctx->mmu;
+ as_nr = kctx->as_nr;
+ } else {
+ mmut = &kbdev->csf.mcu_mmu;
+ as_nr = MCU_AS_NR;
+ }
+#endif
- err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds);
+ err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds);
op_param = (const struct kbase_mmu_hw_op_param){
.vpfn = vpfn,
.nr = nr,
.op = KBASE_MMU_OP_FLUSH_MEM,
- .kctx_id = kctx->id,
+ .kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
.mmu_sync_info = mmu_sync_info,
.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
};
- if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev))
- mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
else
- mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+ mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
+
return err;
}
-static void mmu_teardown_level(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, phys_addr_t pgd,
- int level)
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id)
+{
+ if (unlikely(kctx == NULL))
+ return -EINVAL;
+
+ return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
+}
+
+#if MALI_USE_CSF
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id)
+{
+ return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
+}
+#endif /* MALI_USE_CSF */
+
+static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
+ kbdev->mmu_page_migrate_in_progress = true;
+}
+
+static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
+ kbdev->mmu_page_migrate_in_progress = false;
+ /* Invoke the PM state machine, as the MMU page migration session
+ * may have deferred a transition in L2 state machine.
+ */
+ kbase_pm_update_state(kbdev);
+}
+
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
+ struct kbase_mmu_hw_op_param op_param;
+ struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+ page_md->data.mapped.mmut :
+ page_md->data.pt_mapped.mmut;
+ struct kbase_device *kbdev;
+ phys_addr_t pgd;
+ u64 *old_page, *new_page, *pgd_page, *target, vpfn;
+ int index, check_state, ret = 0;
+ unsigned long hwaccess_flags = 0;
+ unsigned int num_of_valid_entries;
+ u8 vmap_count = 0;
+
+ /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
+ * here we skip the no kctx case, which is only used with MCU's mmut.
+ */
+ if (!mmut->kctx)
+ return -EINVAL;
+
+ if (level > MIDGARD_MMU_BOTTOMLEVEL)
+ return -EINVAL;
+ else if (level == MIDGARD_MMU_BOTTOMLEVEL)
+ vpfn = page_md->data.mapped.vpfn;
+ else
+ vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
+
+ kbdev = mmut->kctx->kbdev;
+ index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+
+ /* Create all mappings before copying content.
+ * This is done as early as possible because is the only operation that may
+ * fail. It is possible to do this before taking any locks because the
+ * pages to migrate are not going to change and even the parent PGD is not
+ * going to be affected by any other concurrent operation, since the page
+ * has been isolated before migration and therefore it cannot disappear in
+ * the middle of this function.
+ */
+ old_page = kmap(as_page(old_phys));
+ if (!old_page) {
+ dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
+ ret = -EINVAL;
+ goto old_page_map_error;
+ }
+
+ new_page = kmap(as_page(new_phys));
+ if (!new_page) {
+ dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
+ ret = -EINVAL;
+ goto new_page_map_error;
+ }
+
+ /* GPU cache maintenance affects both memory content and page table,
+ * but at two different stages. A single virtual memory page is affected
+ * by the migration.
+ *
+ * Notice that the MMU maintenance is done in the following steps:
+ *
+ * 1) The MMU region is locked without performing any other operation.
+ * This lock must cover the entire migration process, in order to
+ * prevent any GPU access to the virtual page whose physical page
+ * is being migrated.
+ * 2) Immediately after locking: the MMU region content is flushed via
+ * GPU control while the lock is taken and without unlocking.
+ * The region must stay locked for the duration of the whole page
+ * migration procedure.
+ * This is necessary to make sure that pending writes to the old page
+ * are finalized before copying content to the new page.
+ * 3) Before unlocking: changes to the page table are flushed.
+ * Finer-grained GPU control operations are used if possible, otherwise
+ * the whole GPU cache shall be flushed again.
+ * This is necessary to make sure that the GPU accesses the new page
+ * after migration.
+ * 4) The MMU region is unlocked.
+ */
+#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
+ op_param.mmu_sync_info = CALLER_MMU_ASYNC;
+ op_param.kctx_id = mmut->kctx->id;
+ op_param.vpfn = vpfn & PGD_VPFN_MASK(level);
+ op_param.nr = 1 << ((3 - level) * 9);
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
+ op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+ pgd_level_to_skip_flush(1ULL << level) :
+ pgd_level_to_skip_flush(3ULL << level);
+
+ rt_mutex_lock(&mmut->mmu_lock);
+
+ /* The state was evaluated before entering this function, but it could
+ * have changed before the mmu_lock was taken. However, the state
+ * transitions which are possible at this point are only two, and in both
+ * cases it is a stable state progressing to a "free in progress" state.
+ *
+ * After taking the mmu_lock the state can no longer change: read it again
+ * and make sure that it hasn't changed before continuing.
+ */
+ spin_lock(&page_md->migrate_lock);
+ check_state = PAGE_STATUS_GET(page_md->status);
+ if (level == MIDGARD_MMU_BOTTOMLEVEL)
+ vmap_count = page_md->vmap_count;
+ spin_unlock(&page_md->migrate_lock);
+
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ if (check_state != ALLOCATED_MAPPED) {
+ dev_dbg(kbdev->dev,
+ "%s: state changed to %d (was %d), abort page migration", __func__,
+ check_state, ALLOCATED_MAPPED);
+ ret = -EAGAIN;
+ goto page_state_change_out;
+ } else if (vmap_count > 0) {
+ dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
+ __func__);
+ ret = -EAGAIN;
+ goto page_state_change_out;
+ }
+ } else {
+ if (check_state != PT_MAPPED) {
+ dev_dbg(kbdev->dev,
+ "%s: state changed to %d (was %d), abort PGD page migration",
+ __func__, check_state, PT_MAPPED);
+ WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
+ ret = -EAGAIN;
+ goto page_state_change_out;
+ }
+ }
+
+ ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
+ if (ret) {
+ dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
+ goto get_pgd_at_level_error;
+ }
+
+ pgd_page = kmap(phys_to_page(pgd));
+ if (!pgd_page) {
+ dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
+ ret = -EINVAL;
+ goto pgd_page_map_error;
+ }
+
+ rt_mutex_lock(&kbdev->pm.lock);
+ mutex_lock(&kbdev->mmu_hw_mutex);
+
+ /* Lock MMU region and flush GPU cache by using GPU control,
+ * in order to keep MMU region locked.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+ if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
+ /* Defer the migration as L2 is in a transitional phase */
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ rt_mutex_unlock(&kbdev->pm.lock);
+ dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
+ ret = -EAGAIN;
+ goto l2_state_defer_out;
+ }
+ /* Prevent transitional phases in L2 by starting the transaction */
+ mmu_page_migration_transaction_begin(kbdev);
+ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+ int as_nr = mmut->kctx->as_nr;
+ struct kbase_as *as = &kbdev->as[as_nr];
+
+ ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
+ if (!ret) {
+ ret = kbase_gpu_cache_flush_and_busy_wait(
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+ }
+ if (ret)
+ mmu_page_migration_transaction_end(kbdev);
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+ if (ret < 0) {
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ rt_mutex_unlock(&kbdev->pm.lock);
+ dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
+ goto undo_mappings;
+ }
+
+ /* Copy memory content.
+ *
+ * It is necessary to claim the ownership of the DMA buffer for the old
+ * page before performing the copy, to make sure of reading a consistent
+ * version of its content, before copying. After the copy, ownership of
+ * the DMA buffer for the new page is given to the GPU in order to make
+ * the content visible to potential GPU access that may happen as soon as
+ * this function releases the lock on the MMU region.
+ */
+ dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ memcpy(new_page, old_page, PAGE_SIZE);
+ dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Remap GPU virtual page.
+ *
+ * This code rests on the assumption that page migration is only enabled
+ * for 4 kB pages, that necessarily live in the bottom level of the MMU
+ * page table. For this reason, the PGD level tells us inequivocably
+ * whether the page being migrated is a "content page" or another PGD
+ * of the page table:
+ *
+ * - Bottom level implies ATE (Address Translation Entry)
+ * - Any other level implies PTE (Page Table Entry)
+ *
+ * The current implementation doesn't handle the case of a level 0 PGD,
+ * that is: the root PGD of the page table.
+ */
+ target = &pgd_page[index];
+
+ /* Certain entries of a page table page encode the count of valid entries
+ * present in that page. So need to save & restore the count information
+ * when updating the PTE/ATE to point to the new page.
+ */
+ num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
+
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ WARN_ON_ONCE((*target & 1UL) == 0);
+ *target =
+ kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags,
+ level, page_md->data.mapped.reg->gpu_alloc->group_id);
+ } else {
+ u64 managed_pte;
+
+#ifdef CONFIG_MALI_DEBUG
+ /* The PTE should be pointing to the page being migrated */
+ WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr(
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
+#endif
+ kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
+ *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
+ }
+
+ kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+
+ /* This function always updates a single entry inside an existing PGD,
+ * therefore cache maintenance is necessary and affects a single entry.
+ */
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64),
+ KBASE_MMU_OP_FLUSH_PT);
+
+ /* Unlock MMU region.
+ *
+ * Notice that GPUs which don't issue flush commands via GPU control
+ * still need an additional GPU cache flush here, this time only
+ * for the page table, because the function call above to sync PGDs
+ * won't have any effect on them.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+ int as_nr = mmut->kctx->as_nr;
+ struct kbase_as *as = &kbdev->as[as_nr];
+
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
+ } else {
+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+ GPU_COMMAND_CACHE_CLN_INV_L2);
+ if (!ret)
+ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+ /* Releasing locks before checking the migration transaction error state */
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ rt_mutex_unlock(&kbdev->pm.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+ /* Release the transition prevention in L2 by ending the transaction */
+ mmu_page_migration_transaction_end(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+ /* Checking the final migration transaction error state */
+ if (ret < 0) {
+ dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
+ goto undo_mappings;
+ }
+
+ /* Undertaking metadata transfer, while we are holding the mmu_lock */
+ spin_lock(&page_md->migrate_lock);
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ size_t page_array_index =
+ page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn;
+
+ WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
+
+ /* Replace page in array of pages of the physical allocation. */
+ page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
+ }
+ /* Update the new page dma_addr with the transferred metadata from the old_page */
+ page_md->dma_addr = new_dma_addr;
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+ spin_unlock(&page_md->migrate_lock);
+ set_page_private(as_page(new_phys), (unsigned long)page_md);
+ /* Old page metatdata pointer cleared as it now owned by the new page */
+ set_page_private(as_page(old_phys), 0);
+
+l2_state_defer_out:
+ kunmap(phys_to_page(pgd));
+pgd_page_map_error:
+get_pgd_at_level_error:
+page_state_change_out:
+ rt_mutex_unlock(&mmut->mmu_lock);
+
+ kunmap(as_page(new_phys));
+new_page_map_error:
+ kunmap(as_page(old_phys));
+old_page_map_error:
+ return ret;
+
+undo_mappings:
+ /* Unlock the MMU table and undo mappings. */
+ rt_mutex_unlock(&mmut->mmu_lock);
+ kunmap(phys_to_page(pgd));
+ kunmap(as_page(new_phys));
+ kunmap(as_page(old_phys));
+
+ return ret;
+}
+
+static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t pgd, unsigned int level)
{
u64 *pgd_page;
int i;
struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
u64 *pgd_page_buffer = NULL;
+ struct page *p = phys_to_page(pgd);
lockdep_assert_held(&mmut->mmu_lock);
- pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+ pgd_page = kmap_atomic(p);
/* kmap_atomic should NEVER fail. */
if (WARN_ON_ONCE(pgd_page == NULL))
return;
- if (level != MIDGARD_MMU_BOTTOMLEVEL) {
+ if (level < MIDGARD_MMU_BOTTOMLEVEL) {
/* Copy the page to our preallocated buffer so that we can minimize
* kmap_atomic usage
*/
- pgd_page_buffer = mmut->mmu_teardown_pages[level];
+ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
}
+ /* When page migration is enabled, kbase_region_tracker_term() would ensure
+ * there are no pages left mapped on the GPU for a context. Hence the count
+ * of valid entries is expected to be zero here.
+ */
+ if (kbase_page_migration_enabled && mmut->kctx)
+ WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
/* Invalidate page after copying */
mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
kunmap_atomic(pgd_page);
pgd_page = pgd_page_buffer;
- if (level != MIDGARD_MMU_BOTTOMLEVEL) {
+ if (level < MIDGARD_MMU_BOTTOMLEVEL) {
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr(
@@ -2782,29 +3695,20 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
int const group_id)
{
- int level;
-
if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
WARN_ON(group_id < 0))
return -EINVAL;
+ compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
+ "List of free PGDs may not be large enough.");
+ compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
+ "Array of MMU levels is not large enough.");
+
mmut->group_id = group_id;
rt_mutex_init(&mmut->mmu_lock);
mmut->kctx = kctx;
mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
- /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */
- for (level = MIDGARD_MMU_TOPLEVEL;
- level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
- mmut->mmu_teardown_pages[level] =
- kmalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (!mmut->mmu_teardown_pages[level]) {
- kbase_mmu_term(kbdev, mmut);
- return -ENOMEM;
- }
- }
-
/* We allocate pages into the kbdev memory pool, then
* kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
* avoid allocations from the kernel happening with the lock held.
@@ -2812,8 +3716,9 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
int err;
- err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
- MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
+ err = kbase_mem_pool_grow(
+ &kbdev->mem_pools.small[mmut->group_id],
+ MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
if (err) {
kbase_mmu_term(kbdev, mmut);
return -ENOMEM;
@@ -2829,8 +3734,6 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
- int level;
-
WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
"kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
mmut->kctx->tgid, mmut->kctx->id);
@@ -2844,22 +3747,30 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
}
- for (level = MIDGARD_MMU_TOPLEVEL;
- level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
- if (!mmut->mmu_teardown_pages[level])
- break;
- kfree(mmut->mmu_teardown_pages[level]);
- }
-
rt_mutex_destroy(&mmut->mmu_lock);
}
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i)
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
{
destroy_workqueue(kbdev->as[i].pf_wq);
}
-#if defined(CONFIG_MALI_VECTOR_DUMP)
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op)
+{
+#if MALI_USE_CSF
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
+ kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+ mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+#endif
+}
+
+#ifdef CONFIG_MALI_VECTOR_DUMP
static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
int level, char ** const buffer, size_t *size_left)
{
@@ -2880,7 +3791,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+ dev_warn(kbdev->dev, "%s: kmap failure", __func__);
return 0;
}
@@ -3001,7 +3912,7 @@ fail_free:
return NULL;
}
KBASE_EXPORT_TEST_API(kbase_mmu_dump);
-#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */
+#endif /* CONFIG_MALI_VECTOR_DUMP */
void kbase_mmu_bus_fault_worker(struct work_struct *data)
{
@@ -3034,8 +3945,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
#ifdef CONFIG_MALI_ARBITER_SUPPORT
/* check if we still have GPU */
if (unlikely(kbase_is_gpu_removed(kbdev))) {
- dev_dbg(kbdev->dev,
- "%s: GPU has been removed\n", __func__);
+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
return;
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 5330306..49b42e0 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -29,6 +29,7 @@
struct kbase_context;
struct kbase_mmu_table;
+struct kbase_va_region;
/**
* enum kbase_caller_mmu_sync_info - MMU-synchronous caller info.
@@ -50,6 +51,26 @@ enum kbase_caller_mmu_sync_info {
};
/**
+ * enum kbase_mmu_op_type - enum for MMU operations
+ * @KBASE_MMU_OP_NONE: To help catch uninitialized struct
+ * @KBASE_MMU_OP_FIRST: The lower boundary of enum
+ * @KBASE_MMU_OP_LOCK: Lock memory region
+ * @KBASE_MMU_OP_UNLOCK: Unlock memory region
+ * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only)
+ * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC)
+ * @KBASE_MMU_OP_COUNT: The upper boundary of enum
+ */
+enum kbase_mmu_op_type {
+ KBASE_MMU_OP_NONE = 0, /* Must be zero */
+ KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
+ KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
+ KBASE_MMU_OP_UNLOCK,
+ KBASE_MMU_OP_FLUSH_PT,
+ KBASE_MMU_OP_FLUSH_MEM,
+ KBASE_MMU_OP_COUNT /* Must be the last in enum */
+};
+
+/**
* kbase_mmu_as_init() - Initialising GPU address space object.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer).
@@ -60,7 +81,7 @@ enum kbase_caller_mmu_sync_info {
*
* Return: 0 on success and non-zero value on failure.
*/
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i);
/**
* kbase_mmu_as_term() - Terminate address space object.
@@ -71,7 +92,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
* This is called upon device termination to destroy
* the address space object of the device.
*/
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i);
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i);
/**
* kbase_mmu_init - Initialise an object representing GPU page tables
@@ -131,23 +152,143 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
struct tagged_addr phy, unsigned long flags, int level, int group_id);
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
- unsigned long flags, int group_id, u64 *dirty_pgds);
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info);
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info);
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg, bool ignore_page_migration);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+ int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg, bool ignore_page_migration);
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg);
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys,
+ size_t nr, unsigned long flags, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool ignore_page_migration);
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
+/**
+ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev: Pointer to kbase device.
+ * @mmut: Pointer to GPU MMU page table.
+ * @vpfn: Start page frame number of the GPU virtual pages to unmap.
+ * @phys: Array of physical pages currently mapped to the virtual
+ * pages to unmap, or NULL. This is used for GPU cache maintenance
+ * and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr, int as_nr);
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr, bool ignore_page_migration);
+
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags, int const group_id);
+#if MALI_USE_CSF
+/**
+ * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags
+ *
+ * @kbdev: Pointer to kbase device.
+ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update
+ * @phys: Pointer to the array of tagged physical addresses of the physical
+ * pages that are pointed to by the page table entries (that need to
+ * be updated).
+ * @nr: Number of pages to update
+ * @flags: Flags
+ * @group_id: The physical memory group in which the page was allocated.
+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id);
+#endif
+
+/**
+ * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages
+ *
+ * @old_phys: Old physical page to be replaced.
+ * @new_phys: New physical page used to replace old physical page.
+ * @old_dma_addr: DMA address of the old page.
+ * @new_dma_addr: DMA address of the new page.
+ * @level: MMU page table level of the provided PGD.
+ *
+ * The page migration process is made of 2 big steps:
+ *
+ * 1) Copy the content of the old page to the new page.
+ * 2) Remap the virtual page, that is: replace either the ATE (if the old page
+ * was a regular page) or the PTE (if the old page was used as a PGD) in the
+ * MMU page table with the new page.
+ *
+ * During the process, the MMU region is locked to prevent GPU access to the
+ * virtual memory page that is being remapped.
+ *
+ * Before copying the content of the old page to the new page and while the
+ * MMU region is locked, a GPU cache flush is performed to make sure that
+ * pending GPU writes are finalized to the old page before copying.
+ * That is necessary because otherwise there's a risk that GPU writes might
+ * be finalized to the old page, and not new page, after migration.
+ * The MMU region is unlocked only at the end of the migration operation.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level);
+
+/**
+ * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches
+ *
+ * @kbdev: Instance of GPU platform device, allocated from the probe method.
+ * @kctx: Pointer to kbase context, it can be NULL if the physical address
+ * range is not associated with User created context.
+ * @phys: Starting address of the physical range to start the operation on.
+ * @size: Number of bytes to work on.
+ * @flush_op: Type of cache flush operation to perform.
+ *
+ * Issue a cache flush physical range command. This function won't perform any
+ * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be
+ * performed only if the context has a JASID assigned to it.
+ * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait().
+ */
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op);
/**
* kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 3291143..d53f928 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -55,26 +55,6 @@ enum kbase_mmu_fault_type {
};
/**
- * enum kbase_mmu_op_type - enum for MMU operations
- * @KBASE_MMU_OP_NONE: To help catch uninitialized struct
- * @KBASE_MMU_OP_FIRST: The lower boundary of enum
- * @KBASE_MMU_OP_LOCK: Lock memory region
- * @KBASE_MMU_OP_UNLOCK: Unlock memory region
- * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only)
- * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC)
- * @KBASE_MMU_OP_COUNT: The upper boundary of enum
- */
-enum kbase_mmu_op_type {
- KBASE_MMU_OP_NONE = 0, /* Must be zero */
- KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
- KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
- KBASE_MMU_OP_UNLOCK,
- KBASE_MMU_OP_FLUSH_PT,
- KBASE_MMU_OP_FLUSH_MEM,
- KBASE_MMU_OP_COUNT /* Must be the last in enum */
-};
-
-/**
* struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions
* @vpfn: MMU Virtual Page Frame Number to start the operation on.
* @nr: Number of pages to work on.
@@ -105,6 +85,22 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
struct kbase_as *as);
/**
+ * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program
+ * the LOCKADDR register.
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @as: Address space to issue the MMU operation on.
+ * @op_param: Pointer to struct containing information about the MMU
+ * operation to perform.
+ *
+ * hwaccess_lock needs to be held when calling this function.
+ *
+ * Return: 0 if issuing the command was successful, otherwise an error code.
+ */
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+ const struct kbase_mmu_hw_op_param *op_param);
+
+/**
* kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without
* programming the LOCKADDR register and wait
* for it to complete before returning.
@@ -114,6 +110,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
+ * This function should be called for GPU where GPU command is used to flush
+ * the cache(s) instead of MMU command.
+ *
* Return: 0 if issuing the command was successful, otherwise an error code.
*/
int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
@@ -160,9 +159,7 @@ int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
* GPUs where MMU command to flush the cache(s) is deprecated.
* mmu_hw_mutex needs to be held when calling this function.
*
- * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex
- *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
@@ -181,7 +178,7 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
* Both mmu_hw_mutex and hwaccess_lock need to be held when calling this
* function.
*
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
@@ -198,7 +195,7 @@ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as
* specified inside @op_param. GPU command is used to flush the cache(s)
* instead of the MMU command.
*
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 122e9ef..794d6d5 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,10 +24,31 @@
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_mem.h>
+#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu_hw.h>
#include <tl/mali_kbase_tracepoints.h>
#include <linux/delay.h>
+#if MALI_USE_CSF
+/**
+ * mmu_has_flush_skip_pgd_levels() - Check if the GPU has the feature
+ * AS_LOCKADDR_FLUSH_SKIP_LEVELS
+ *
+ * @gpu_props: GPU properties for the GPU instance.
+ *
+ * This function returns whether a cache flush can apply the skip flags of
+ * AS_LOCKADDR_FLUSH_SKIP_LEVELS.
+ *
+ * Return: True if cache flush has the said feature.
+ */
+static bool mmu_has_flush_skip_pgd_levels(struct kbase_gpu_props const *gpu_props)
+{
+ u32 const signature =
+ gpu_props->props.raw_props.gpu_id & (GPU_ID2_ARCH_MAJOR | GPU_ID2_ARCH_REV);
+
+ return signature >= (u32)GPU_ID2_PRODUCT_MAKE(12, 0, 4, 0);
+}
+#endif
/**
* lock_region() - Generate lockaddr to lock memory region in MMU
@@ -126,43 +147,70 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
*/
*lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1);
*lockaddr |= lockaddr_size_log2 - 1;
+
+#if MALI_USE_CSF
+ if (mmu_has_flush_skip_pgd_levels(gpu_props))
+ *lockaddr =
+ AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(*lockaddr, op_param->flush_skip_levels);
+#endif
+
return 0;
}
-static int wait_ready(struct kbase_device *kbdev,
- unsigned int as_nr)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev: Kbase device to wait for a MMU command to complete.
+ * @as_nr: Address space to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command fails.
+ *
+ * Return: 0 on successful completion. negative error on failure.
+ */
+static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
{
- u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+ s64 diff;
- /* Wait for the MMU status to indicate there is no active command. */
- while (--max_loops &&
- kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
- AS_STATUS_AS_ACTIVE) {
- ;
- }
+ if (unlikely(kbdev->as[as_nr].is_unresponsive))
+ return -EBUSY;
- if (WARN_ON_ONCE(max_loops == 0)) {
- dev_err(kbdev->dev,
- "AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector",
- as_nr);
-#if MALI_USE_CSF
- queue_work(system_highpri_wq, &kbdev->csf.coredump_work);
-#endif
- return -1;
- }
+ do {
+ unsigned int i;
- return 0;
+ for (i = 0; i < 1000; i++) {
+ /* Wait for the MMU status to indicate there is no active command */
+ if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+ AS_STATUS_AS_ACTIVE))
+ return 0;
+ }
+
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while (diff < mmu_as_inactive_wait_time_ms);
+
+ dev_err(kbdev->dev,
+ "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
+ as_nr);
+ kbdev->as[as_nr].is_unresponsive = true;
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
+
+ return -ETIMEDOUT;
}
static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
{
- int status;
-
/* write AS_COMMAND when MMU is ready to accept another command */
- status = wait_ready(kbdev, as_nr);
- if (status == 0)
+ const int status = wait_ready(kbdev, as_nr);
+
+ if (likely(status == 0))
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
- else {
+ else if (status == -EBUSY) {
+ dev_dbg(kbdev->dev,
+ "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u",
+ as_nr, cmd);
+ } else {
dev_err(kbdev->dev,
"Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u",
as_nr, cmd);
@@ -210,21 +258,18 @@ static int wait_cores_power_trans_complete(struct kbase_device *kbdev)
* implicit unlock.
* @as_nr: Address space number for which MMU command needs to be
* sent.
- * @hwaccess_locked: Flag to indicate if hwaccess_lock is held by the caller.
*
- * This functions ensures that the flush of LSC is not missed for the pages that
+ * This function ensures that the flush of LSC is not missed for the pages that
* were unmapped from the GPU, due to the power down transition of shader cores.
*
* Return: 0 if the WA was successfully applied, non-zero otherwise.
*/
-static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev,
- u32 *mmu_cmd, unsigned int as_nr, bool hwaccess_locked)
+static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_cmd,
+ unsigned int as_nr)
{
- unsigned long flags = 0;
int ret = 0;
- if (!hwaccess_locked)
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
/* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so
* the workaround can be safely skipped.
@@ -233,23 +278,26 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev,
if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) {
dev_warn(kbdev->dev,
"Unexpected mmu command received");
- ret = -EINVAL;
- goto unlock;
+ return -EINVAL;
}
/* Wait for the LOCK MMU command to complete, issued by the caller */
ret = wait_ready(kbdev, as_nr);
- if (ret)
- goto unlock;
+ if (unlikely(ret))
+ return ret;
ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
GPU_COMMAND_CACHE_CLN_INV_LSC);
- if (ret)
- goto unlock;
+ if (unlikely(ret))
+ return ret;
ret = wait_cores_power_trans_complete(kbdev);
- if (ret)
- goto unlock;
+ if (unlikely(ret)) {
+ if (kbase_prepare_to_reset_gpu_locked(kbdev,
+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
+ return ret;
+ }
/* As LSC is guaranteed to have been flushed we can use FLUSH_PT
* MMU command to only flush the L2.
@@ -257,10 +305,6 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev,
*mmu_cmd = AS_COMMAND_FLUSH_PT;
}
-unlock:
- if (!hwaccess_locked)
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
return ret;
}
#endif
@@ -381,12 +425,21 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a
ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param);
- if (!ret)
- write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+ if (likely(!ret))
+ ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
return ret;
}
+/**
+ * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion.
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @as: Address space to issue the MMU operation on.
+ * @op_param: Pointer to a struct containing information about the MMU operation.
+ *
+ * Return: 0 if issuing the LOCK command was successful, otherwise an error code.
+ */
static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
@@ -411,7 +464,7 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
}
int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
- const struct kbase_mmu_hw_op_param *op_param)
+ const struct kbase_mmu_hw_op_param *op_param)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -429,10 +482,10 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
/* Wait for UNLOCK command to complete */
- if (!ret)
+ if (likely(!ret))
ret = wait_ready(kbdev, as->number);
- if (!ret) {
+ if (likely(!ret)) {
u64 lock_addr = 0x0;
/* read MMU_AS_CONTROL.LOCKADDR register */
lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
@@ -464,6 +517,16 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
return ret;
}
+/**
+ * mmu_hw_do_flush - Flush MMU and wait for its completion.
+ *
+ * @kbdev: Kbase device to issue the MMU operation on.
+ * @as: Address space to issue the MMU operation on.
+ * @op_param: Pointer to a struct containing information about the MMU operation.
+ * @hwaccess_locked: Flag to indicate if the lock has been held.
+ *
+ * Return: 0 if flushing MMU was successful, otherwise an error code.
+ */
static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
{
@@ -494,25 +557,31 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
return ret;
#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
- /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here
- * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is
- * supported, and this function doesn't gets called for the GPUs where
- * FLUSH_MEM/PT command is deprecated.
- */
- if (mmu_cmd == AS_COMMAND_FLUSH_MEM) {
- ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd,
- as->number, hwaccess_locked);
+ /* WA for the BASE_HW_ISSUE_GPU2019_3901. */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) &&
+ mmu_cmd == AS_COMMAND_FLUSH_MEM) {
+ if (!hwaccess_locked) {
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ } else {
+ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number);
+ }
+
if (ret)
return ret;
}
#endif
- write_cmd(kbdev, as->number, mmu_cmd);
+ ret = write_cmd(kbdev, as->number, mmu_cmd);
/* Wait for the command to complete */
- ret = wait_ready(kbdev, as->number);
+ if (likely(!ret))
+ ret = wait_ready(kbdev, as->number);
- if (!ret)
+ if (likely(!ret))
mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr,
op_param->mmu_sync_info);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
index fcbccae..f2c6274 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -35,10 +35,8 @@
#define ENTRY_IS_INVAL 2ULL
#define ENTRY_IS_PTE 3ULL
-#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */
#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */
#define ENTRY_ACCESS_RO (3ULL << 6)
-#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */
#define ENTRY_ACCESS_BIT (1ULL << 10)
#define ENTRY_NX_BIT (1ULL << 54)
diff --git a/mali_kbase/platform/devicetree/Kbuild b/mali_kbase/platform/devicetree/Kbuild
index 5eeccfa..995c4cd 100644
--- a/mali_kbase/platform/devicetree/Kbuild
+++ b/mali_kbase/platform/devicetree/Kbuild
@@ -20,6 +20,5 @@
mali_kbase-y += \
platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
- platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_platform.o \
platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
platform/$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o
diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
index 743885f..584a721 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
@@ -33,13 +33,12 @@
* Attached value: pointer to @ref kbase_platform_funcs_conf
* Default value: See @ref kbase_platform_funcs_conf
*/
-#define PLATFORM_FUNCS (&platform_funcs)
+#define PLATFORM_FUNCS (NULL)
#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops)
extern struct kbase_pm_callback_conf pm_callbacks;
extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops;
-extern struct kbase_platform_funcs_conf platform_funcs;
/**
* AUTO_SUSPEND_DELAY - Autosuspend delay
*
diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
index ff1d902..a019229 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
@@ -29,7 +29,6 @@
#include "mali_kbase_config_platform.h"
-
static void enable_gpu_power_control(struct kbase_device *kbdev)
{
unsigned int i;
@@ -82,8 +81,7 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
int error;
unsigned long flags;
- dev_dbg(kbdev->dev, "%s %p\n", __func__,
- (void *)kbdev->dev->pm_domain);
+ dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
WARN_ON(kbdev->pm.backend.gpu_powered);
@@ -314,5 +312,3 @@ struct kbase_pm_callback_conf pm_callbacks = {
.power_off_sc_rails_callback = pm_callback_sc_rails_off,
#endif
};
-
-
diff --git a/mali_kbase/platform/meson/mali_kbase_runtime_pm.c b/mali_kbase/platform/meson/mali_kbase_runtime_pm.c
index c00cbcb..a9b380c 100644
--- a/mali_kbase/platform/meson/mali_kbase_runtime_pm.c
+++ b/mali_kbase/platform/meson/mali_kbase_runtime_pm.c
@@ -149,7 +149,7 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
int ret = 1; /* Assume GPU has been powered off */
int error;
- dev_dbg(kbdev->dev, "%s %p\n", __func__, (void *)kbdev->dev->pm_domain);
+ dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain);
#ifdef KBASE_PM_RUNTIME
error = pm_runtime_get_sync(kbdev->dev);
@@ -245,6 +245,18 @@ static void pm_callback_suspend(struct kbase_device *kbdev)
pm_callback_runtime_off(kbdev);
}
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+static void pm_callback_sc_rails_on(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "SC rails are on");
+}
+
+static void pm_callback_sc_rails_off(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "SC rails are off");
+}
+#endif
+
struct kbase_pm_callback_conf pm_callbacks = {
.power_on_callback = pm_callback_power_on,
.power_off_callback = pm_callback_power_off,
@@ -262,4 +274,17 @@ struct kbase_pm_callback_conf pm_callbacks = {
.power_runtime_on_callback = NULL,
.power_runtime_off_callback = NULL,
#endif /* KBASE_PM_RUNTIME */
+
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ .power_runtime_gpu_idle_callback = pm_callback_runtime_gpu_idle,
+ .power_runtime_gpu_active_callback = pm_callback_runtime_gpu_active,
+#else
+ .power_runtime_gpu_idle_callback = NULL,
+ .power_runtime_gpu_active_callback = NULL,
+#endif
+
+#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
+ .power_on_sc_rails_callback = pm_callback_sc_rails_on,
+ .power_off_sc_rails_callback = pm_callback_sc_rails_off,
+#endif
};
diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig
index 4203971..d81c639 100644
--- a/mali_kbase/tests/Mconfig
+++ b/mali_kbase/tests/Mconfig
@@ -26,8 +26,8 @@ menuconfig MALI_KUTF
This option will build the Mali testing framework modules.
Modules:
- - kutf.ko
- - kutf_test.ko
+ - kutf.ko
+ - kutf_test.ko
config MALI_KUTF_IRQ_TEST
bool "Build Mali KUTF IRQ test module"
@@ -38,7 +38,7 @@ config MALI_KUTF_IRQ_TEST
It can determine the latency of the Mali GPU IRQ on your system.
Modules:
- - mali_kutf_irq_test.ko
+ - mali_kutf_irq_test.ko
config MALI_KUTF_CLK_RATE_TRACE
bool "Build Mali KUTF Clock rate trace test module"
@@ -50,7 +50,7 @@ config MALI_KUTF_CLK_RATE_TRACE
basic trace test in the system.
Modules:
- - mali_kutf_clk_rate_trace_test_portal.ko
+ - mali_kutf_clk_rate_trace_test_portal.ko
config MALI_KUTF_MGM_INTEGRATION_TEST
bool "Build Mali KUTF MGM integration test module"
@@ -62,12 +62,12 @@ config MALI_KUTF_MGM_INTEGRATION_TEST
group ids.
Modules:
- - mali_kutf_mgm_integration_test.ko
+ - mali_kutf_mgm_integration_test.ko
# Enable MALI_DEBUG for KUTF modules support
config UNIT_TEST_KERNEL_MODULES
- bool
- default y if UNIT_TEST_CODE && BACKEND_KERNEL
- default n
+ bool
+ default y if UNIT_TEST_CODE && BACKEND_KERNEL
+ default n
diff --git a/mali_kbase/tests/build.bp b/mali_kbase/tests/build.bp
index 9d6137d..5581ba9 100644
--- a/mali_kbase/tests/build.bp
+++ b/mali_kbase/tests/build.bp
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,7 @@ bob_defaults {
"include",
"./../../",
"./../",
- "./"
+ "./",
],
}
@@ -38,3 +38,9 @@ bob_defaults {
kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"],
},
}
+
+bob_defaults {
+ name: "kernel_unit_tests",
+ add_to_alias: ["unit_tests"],
+ srcs: [".*_unit_test/"],
+}
diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h
index c4c713c..3f68efa 100644
--- a/mali_kbase/tests/include/kutf/kutf_helpers.h
+++ b/mali_kbase/tests/include/kutf/kutf_helpers.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,6 +31,7 @@
*/
#include <kutf/kutf_suite.h>
+#include <linux/device.h>
/**
* kutf_helper_pending_input() - Check any pending lines sent by user space
@@ -81,4 +82,28 @@ int kutf_helper_input_enqueue(struct kutf_context *context,
*/
void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+/**
+ * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser
+ * to ignore errors, until the counterpart
+ * is written to dmesg to stop ignoring errors.
+ * @dev: Device pointer to write to dmesg using.
+ *
+ * This function writes "Start ignoring dmesg warnings" to dmesg, which
+ * the parser will read and not log any errors. Only to be used in cases where
+ * we expect an error to be produced in dmesg but that we do not want to be
+ * flagged as an error.
+ */
+void kutf_helper_ignore_dmesg(struct device *dev);
+
+/**
+ * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser
+ * to stop ignoring errors.
+ * @dev: Device pointer to write to dmesg using.
+ *
+ * This function writes "Stop ignoring dmesg warnings" to dmesg, which
+ * the parser will read and continue to log any errors. Counterpart to
+ * kutf_helper_ignore_dmesg().
+ */
+void kutf_helper_stop_ignoring_dmesg(struct device *dev);
+
#endif /* _KERNEL_UTF_HELPERS_H_ */
diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c
index d207d1c..4273619 100644
--- a/mali_kbase/tests/kutf/kutf_helpers.c
+++ b/mali_kbase/tests/kutf/kutf_helpers.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -127,3 +127,15 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
{
kutf_helper_input_enqueue(context, NULL, 0);
}
+
+void kutf_helper_ignore_dmesg(struct device *dev)
+{
+ dev_info(dev, "KUTF: Start ignoring dmesg warnings\n");
+}
+EXPORT_SYMBOL(kutf_helper_ignore_dmesg);
+
+void kutf_helper_stop_ignoring_dmesg(struct device *dev)
+{
+ dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n");
+}
+EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg);
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index 2d7289d..a6f54b6 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -825,7 +825,7 @@ static void *mali_kutf_clk_rate_trace_create_fixture(
if (!data)
return NULL;
- *data = (const struct kutf_clk_rate_trace_fixture_data){ NULL };
+ memset(data, 0, sizeof(*data));
pr_debug("Hooking up the test portal to kbdev clk rate trace\n");
spin_lock(&kbdev->pm.clk_rtm.lock);
diff --git a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
index 2d6e689..f2a014d 100644
--- a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
+++ b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -51,8 +51,6 @@ struct kutf_irq_fixture_data {
struct kbase_device *kbdev;
};
-#define SEC_TO_NANO(s) ((s)*1000000000LL)
-
/* ID for the GPU IRQ */
#define GPU_IRQ_HANDLER 2
@@ -212,6 +210,11 @@ static void mali_kutf_irq_latency(struct kutf_context *context)
average_time += irq_time - start_time;
udelay(10);
+ /* Sleep for a ms, every 10000 iterations, to avoid misleading warning
+ * of CPU softlockup when all GPU IRQs keep going to the same CPU.
+ */
+ if (!(i % 10000))
+ msleep(1);
}
/* Go back to default handler */
diff --git a/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp b/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp
index 2e4a083..8b995f8 100644
--- a/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp
+++ b/mali_kbase/tests/mali_kutf_mgm_integration_test/build.bp
@@ -38,4 +38,4 @@ bob_kernel_module {
kbuild_options: ["CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST=y"],
enabled: true,
},
-} \ No newline at end of file
+}
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index 34d2223..1e636b9 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -10,6 +10,7 @@
*/
#include "linux/mman.h"
+#include <linux/version_compat_defs.h>
#include <mali_kbase.h>
/* mali_kbase_mmap.c
@@ -90,7 +91,6 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
return false;
-
return true;
}
@@ -132,6 +132,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
*info, bool is_shader_code, bool is_same_4gb_page)
{
+#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long length, low_limit, high_limit, gap_start, gap_end;
@@ -225,7 +226,37 @@ check_current:
}
}
}
+#else
+ unsigned long length, high_limit, gap_start, gap_end;
+
+ MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+ /* Adjust search length to account for worst case alignment overhead */
+ length = info->length + info->align_mask;
+ if (length < info->length)
+ return -ENOMEM;
+
+ /*
+ * Adjust search limits by the desired length.
+ * See implementation comment at top of unmapped_area().
+ */
+ gap_end = info->high_limit;
+ if (gap_end < length)
+ return -ENOMEM;
+ high_limit = gap_end - length;
+ if (info->low_limit > high_limit)
+ return -ENOMEM;
+
+ while (true) {
+ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
+ return -ENOMEM;
+ gap_end = mas.last + 1;
+ gap_start = mas.min;
+
+ if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
+ return gap_end;
+ }
+#endif
return -ENOMEM;
}
@@ -242,8 +273,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
struct vm_unmapped_area_info info;
unsigned long align_offset = 0;
unsigned long align_mask = 0;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base);
+ unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr);
+#else
unsigned long high_limit = mm->mmap_base;
unsigned long low_limit = PAGE_SIZE;
+#endif
int cpu_va_bits = BITS_PER_LONG;
int gpu_pc_bits =
kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
@@ -270,6 +306,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
struct kbase_reg_zone *zone =
kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
+
+ /* requested length too big for entire address space */
+ if (len > mmap_end - kbase_mmap_min_addr)
+ return -ENOMEM;
+#endif
/* err on fixed address */
if ((flags & MAP_FIXED) || addr)
@@ -282,7 +325,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
high_limit =
- min_t(unsigned long, mm->mmap_base, same_va_end_addr);
+ min_t(unsigned long, high_limit, same_va_end_addr);
/* If there's enough (> 33 bits) of GPU VA space, align
* to 2MB boundaries.
@@ -359,9 +402,15 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
high_limit < same_va_end_addr) {
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ /* Retry above TASK_UNMAPPED_BASE */
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = min_t(u64, mmap_end, same_va_end_addr);
+#else
/* Retry above mmap_base */
info.low_limit = mm->mmap_base;
info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
+#endif
ret = kbase_unmapped_area_topdown(&info, is_shader_code,
is_same_4gb_page);
diff --git a/mali_kbase/tl/Kbuild b/mali_kbase/tl/Kbuild
index 4344850..1ecf3e4 100644
--- a/mali_kbase/tl/Kbuild
+++ b/mali_kbase/tl/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index 09de3f0..20356d6 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,8 +24,6 @@
#include "mali_kbase_tracepoints.h"
#include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-
#include <linux/atomic.h>
#include <linux/file.h>
#include <linux/mutex.h>
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 359ee4e..ae57006 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -35,19 +35,7 @@
#include <uapi/linux/eventpoll.h>
#endif
-#ifndef MALI_STRIP_KBASE_DEVELOPMENT
-/* Development builds need to test instrumentation and enable unprivileged
- * processes to acquire timeline streams, in order to avoid complications
- * with configurations across multiple platforms and systems.
- *
- * Release builds, instead, shall deny access to unprivileged processes
- * because there are no use cases where they are allowed to acquire timeline
- * streams, unless they're given special permissions by a privileged process.
- */
-static int kbase_unprivileged_global_profiling = 1;
-#else
static int kbase_unprivileged_global_profiling;
-#endif
/**
* kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes
@@ -350,7 +338,8 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
* @filp: Pointer to file structure
* @wait: Pointer to poll table
*
- * Return: POLLIN if data can be read without blocking, otherwise zero
+ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking,
+ * otherwise zero, or EPOLLHUP | EPOLLERR on error.
*/
static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
{
@@ -362,18 +351,19 @@ static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
KBASE_DEBUG_ASSERT(wait);
if (WARN_ON(!filp->private_data))
- return (__force __poll_t)-EFAULT;
+ return EPOLLHUP | EPOLLERR;
timeline = (struct kbase_timeline *)filp->private_data;
/* If there are header bytes to copy, read will not block */
if (kbasep_timeline_has_header_data(timeline))
- return (__force __poll_t)POLLIN;
+ return EPOLLIN | EPOLLRDNORM;
poll_wait(filp, &timeline->event_queue, wait);
if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx))
- return (__force __poll_t)POLLIN;
- return 0;
+ return EPOLLIN | EPOLLRDNORM;
+
+ return (__poll_t)0;
}
int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
diff --git a/mali_kbase/tl/mali_kbase_tlstream.h b/mali_kbase/tl/mali_kbase_tlstream.h
index 6660cf5..c142849 100644
--- a/mali_kbase/tl/mali_kbase_tlstream.h
+++ b/mali_kbase/tl/mali_kbase_tlstream.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,17 +27,13 @@
#include <linux/wait.h>
/* The maximum size of a single packet used by timeline. */
-#define PACKET_SIZE 4096 /* bytes */
+#define PACKET_SIZE 4096 /* bytes */
/* The number of packets used by one timeline stream. */
-#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
- #define PACKET_COUNT 64
-#else
- #define PACKET_COUNT 32
-#endif
+#define PACKET_COUNT 128
/* The maximum expected length of string in tracepoint descriptor. */
-#define STRLEN_MAX 64 /* bytes */
+#define STRLEN_MAX 64 /* bytes */
/**
* struct kbase_tlstream - timeline stream structure
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index 3ac7850..f62c755 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -84,9 +84,12 @@ enum tl_msg_id_obj {
KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
KBASE_TL_ATTRIB_ATOM_JIT,
KBASE_TL_KBASE_NEW_DEVICE,
+ KBASE_TL_KBASE_GPUCMDQUEUE_KICK,
KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
- KBASE_TL_KBASE_DEVICE_HALT_CSG,
+ KBASE_TL_KBASE_DEVICE_HALTING_CSG,
+ KBASE_TL_KBASE_DEVICE_SUSPEND_CSG,
+ KBASE_TL_KBASE_DEVICE_CSG_IDLE,
KBASE_TL_KBASE_NEW_CTX,
KBASE_TL_KBASE_DEL_CTX,
KBASE_TL_KBASE_CTX_ASSIGN_AS,
@@ -97,17 +100,19 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET,
+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION,
+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE,
- KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER,
- KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND,
KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE,
KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE,
KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE,
+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER,
+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START,
@@ -115,6 +120,9 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET,
+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START,
+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END,
+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END,
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START,
@@ -352,16 +360,28 @@ enum tl_msg_id_obj {
"New KBase Device", \
"@IIIIIII", \
"kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \
+ "Kernel receives a request to process new GPU queue instructions", \
+ "@IL", \
+ "kernel_ctx_id,buffer_gpu_addr") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
"CSG is programmed to a slot", \
"@IIIII", \
- "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \
+ "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
"CSG is deprogrammed from a slot", \
"@II", \
"kbase_device_id,kbase_device_csg_slot_index") \
- TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALT_CSG, \
- "CSG is halted", \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \
+ "CSG is halting", \
+ "@III", \
+ "kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \
+ "CSG is suspended", \
+ "@II", \
+ "kbase_device_id,kbase_device_csg_slot_index") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \
+ "KBase device is notified that CSG is idle.", \
"@II", \
"kbase_device_id,kbase_device_csg_slot_index") \
TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \
@@ -399,11 +419,19 @@ enum tl_msg_id_obj {
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
"KCPU Queue enqueues Wait on Cross Queue Sync Object", \
"@pLII", \
- "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \
+ "kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \
"KCPU Queue enqueues Set on Cross Queue Sync Object", \
"@pL", \
"kcpu_queue,cqs_obj_gpu_addr") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \
+ "KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \
+ "@pLLIII", \
+ "kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \
+ "KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \
+ "@pLLII", \
+ "kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
"KCPU Queue enqueues Map Import", \
"@pL", \
@@ -416,14 +444,6 @@ enum tl_msg_id_obj {
"KCPU Queue enqueues Unmap Import ignoring reference count", \
"@pL", \
"kcpu_queue,map_import_buf_gpu_addr") \
- TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \
- "KCPU Queue enqueues Error Barrier", \
- "@p", \
- "kcpu_queue") \
- TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \
- "KCPU Queue enqueues Group Suspend", \
- "@ppI", \
- "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \
TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
"Begin array of KCPU Queue enqueues JIT Alloc", \
"@p", \
@@ -448,6 +468,14 @@ enum tl_msg_id_obj {
"End array of KCPU Queue enqueues JIT Free", \
"@p", \
"kcpu_queue") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \
+ "KCPU Queue enqueues Error Barrier", \
+ "@p", \
+ "kcpu_queue") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \
+ "KCPU Queue enqueues Group Suspend", \
+ "@ppI", \
+ "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \
"KCPU Queue starts a Signal on Fence", \
"@p", \
@@ -465,15 +493,27 @@ enum tl_msg_id_obj {
"@pI", \
"kcpu_queue,execute_error") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
- "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \
+ "KCPU Queue starts a Wait on Cross Queue Sync Object", \
"@p", \
"kcpu_queue") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
- "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \
+ "KCPU Queue ends a Wait on Cross Queue Sync Object", \
"@pI", \
"kcpu_queue,execute_error") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \
- "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \
+ "KCPU Queue executes a Set on Cross Queue Sync Object", \
+ "@pI", \
+ "kcpu_queue,execute_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \
+ "KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \
+ "@p", \
+ "kcpu_queue") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \
+ "KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \
+ "@pI", \
+ "kcpu_queue,execute_error") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \
+ "KCPU Queue executes a Set Operation on Cross Queue Sync Object", \
"@pI", \
"kcpu_queue,execute_error") \
TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
@@ -2092,13 +2132,40 @@ void __kbase_tlstream_tl_kbase_new_device(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick(
+ struct kbase_tlstream *stream,
+ u32 kernel_ctx_id,
+ u64 buffer_gpu_addr
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kernel_ctx_id)
+ + sizeof(buffer_gpu_addr)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
u32 kbase_device_csg_slot_index,
- u32 kbase_device_csg_slot_resumed
+ u32 kbase_device_csg_slot_resuming
)
{
const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
@@ -2107,7 +2174,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
+ sizeof(kernel_ctx_id)
+ sizeof(gpu_cmdq_grp_handle)
+ sizeof(kbase_device_csg_slot_index)
- + sizeof(kbase_device_csg_slot_resumed)
+ + sizeof(kbase_device_csg_slot_resuming)
;
char *buffer;
unsigned long acq_flags;
@@ -2126,7 +2193,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
pos = kbasep_serialize_bytes(buffer,
- pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed));
+ pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -2158,13 +2225,71 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index,
+ u32 kbase_device_csg_slot_suspending
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kbase_device_id)
+ + sizeof(kbase_device_csg_slot_index)
+ + sizeof(kbase_device_csg_slot_suspending)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_id, sizeof(kbase_device_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_csg_slot_index
)
{
- const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG;
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kbase_device_id)
+ + sizeof(kbase_device_csg_slot_index)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_id, sizeof(kbase_device_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kbase_device_id)
+ sizeof(kbase_device_csg_slot_index)
@@ -2401,16 +2526,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u64 cqs_obj_gpu_addr,
- u32 cqs_obj_compare_value,
- u32 cqs_obj_inherit_error
+ u32 compare_value,
+ u32 inherit_error
)
{
const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
+ sizeof(cqs_obj_gpu_addr)
- + sizeof(cqs_obj_compare_value)
- + sizeof(cqs_obj_inherit_error)
+ + sizeof(compare_value)
+ + sizeof(inherit_error)
;
char *buffer;
unsigned long acq_flags;
@@ -2425,9 +2550,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
pos = kbasep_serialize_bytes(buffer,
pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
pos = kbasep_serialize_bytes(buffer,
- pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+ pos, &compare_value, sizeof(compare_value));
pos = kbasep_serialize_bytes(buffer,
- pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error));
+ pos, &inherit_error, sizeof(inherit_error));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -2459,16 +2584,24 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
struct kbase_tlstream *stream,
const void *kcpu_queue,
- u64 map_import_buf_gpu_addr
+ u64 cqs_obj_gpu_addr,
+ u64 compare_value,
+ u32 condition,
+ u32 data_type,
+ u32 inherit_error
)
{
- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT;
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
- + sizeof(map_import_buf_gpu_addr)
+ + sizeof(cqs_obj_gpu_addr)
+ + sizeof(compare_value)
+ + sizeof(condition)
+ + sizeof(data_type)
+ + sizeof(inherit_error)
;
char *buffer;
unsigned long acq_flags;
@@ -2481,21 +2614,35 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
pos = kbasep_serialize_bytes(buffer,
pos, &kcpu_queue, sizeof(kcpu_queue));
pos = kbasep_serialize_bytes(buffer,
- pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &compare_value, sizeof(compare_value));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &condition, sizeof(condition));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &data_type, sizeof(data_type));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &inherit_error, sizeof(inherit_error));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
struct kbase_tlstream *stream,
const void *kcpu_queue,
- u64 map_import_buf_gpu_addr
+ u64 cqs_obj_gpu_addr,
+ u64 value,
+ u32 operation,
+ u32 data_type
)
{
- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT;
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
- + sizeof(map_import_buf_gpu_addr)
+ + sizeof(cqs_obj_gpu_addr)
+ + sizeof(value)
+ + sizeof(operation)
+ + sizeof(data_type)
;
char *buffer;
unsigned long acq_flags;
@@ -2508,18 +2655,24 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
pos = kbasep_serialize_bytes(buffer,
pos, &kcpu_queue, sizeof(kcpu_queue));
pos = kbasep_serialize_bytes(buffer,
- pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &value, sizeof(value));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &operation, sizeof(operation));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &data_type, sizeof(data_type));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u64 map_import_buf_gpu_addr
)
{
- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE;
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
+ sizeof(map_import_buf_gpu_addr)
@@ -2540,14 +2693,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
struct kbase_tlstream *stream,
- const void *kcpu_queue
+ const void *kcpu_queue,
+ u64 map_import_buf_gpu_addr
)
{
- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER;
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
+ + sizeof(map_import_buf_gpu_addr)
;
char *buffer;
unsigned long acq_flags;
@@ -2559,22 +2714,22 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
pos = kbasep_serialize_timestamp(buffer, pos);
pos = kbasep_serialize_bytes(buffer,
pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
struct kbase_tlstream *stream,
const void *kcpu_queue,
- const void *group_suspend_buf,
- u32 gpu_cmdq_grp_handle
+ u64 map_import_buf_gpu_addr
)
{
- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND;
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kcpu_queue)
- + sizeof(group_suspend_buf)
- + sizeof(gpu_cmdq_grp_handle)
+ + sizeof(map_import_buf_gpu_addr)
;
char *buffer;
unsigned long acq_flags;
@@ -2587,9 +2742,7 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
pos = kbasep_serialize_bytes(buffer,
pos, &kcpu_queue, sizeof(kcpu_queue));
pos = kbasep_serialize_bytes(buffer,
- pos, &group_suspend_buf, sizeof(group_suspend_buf));
- pos = kbasep_serialize_bytes(buffer,
- pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle));
+ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -2772,6 +2925,60 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ const void *group_suspend_buf,
+ u32 gpu_cmdq_grp_handle
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(group_suspend_buf)
+ + sizeof(gpu_cmdq_grp_handle)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &group_suspend_buf, sizeof(group_suspend_buf));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
struct kbase_tlstream *stream,
const void *kcpu_queue
@@ -2949,6 +3156,83 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(execute_error)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &execute_error, sizeof(execute_error));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+)
+{
+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kcpu_queue)
+ + sizeof(execute_error)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kcpu_queue, sizeof(kcpu_queue));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &execute_error, sizeof(execute_error));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
struct kbase_tlstream *stream,
const void *kcpu_queue
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index f01fc54..f1f4761 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -77,7 +77,7 @@ extern const size_t aux_desc_header_size;
#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP
#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED
-#define TLSTREAM_ENABLED (1 << 31)
+#define TLSTREAM_ENABLED (1u << 31)
void __kbase_tlstream_tl_new_ctx(
struct kbase_tlstream *stream,
@@ -396,13 +396,19 @@ void __kbase_tlstream_tl_kbase_new_device(
u32 kbase_device_supports_gpu_sleep
);
+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick(
+ struct kbase_tlstream *stream,
+ u32 kernel_ctx_id,
+ u64 buffer_gpu_addr
+);
+
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
u32 kbase_device_csg_slot_index,
- u32 kbase_device_csg_slot_resumed
+ u32 kbase_device_csg_slot_resuming
);
void __kbase_tlstream_tl_kbase_device_deprogram_csg(
@@ -411,7 +417,20 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
u32 kbase_device_csg_slot_index
);
-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index,
+ u32 kbase_device_csg_slot_suspending
+);
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index
+);
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_csg_slot_index
@@ -468,8 +487,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u64 cqs_obj_gpu_addr,
- u32 cqs_obj_compare_value,
- u32 cqs_obj_inherit_error
+ u32 compare_value,
+ u32 inherit_error
);
void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
@@ -478,34 +497,41 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
u64 cqs_obj_gpu_addr
);
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
struct kbase_tlstream *stream,
const void *kcpu_queue,
- u64 map_import_buf_gpu_addr
+ u64 cqs_obj_gpu_addr,
+ u64 compare_value,
+ u32 condition,
+ u32 data_type,
+ u32 inherit_error
);
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
struct kbase_tlstream *stream,
const void *kcpu_queue,
- u64 map_import_buf_gpu_addr
+ u64 cqs_obj_gpu_addr,
+ u64 value,
+ u32 operation,
+ u32 data_type
);
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
struct kbase_tlstream *stream,
const void *kcpu_queue,
u64 map_import_buf_gpu_addr
);
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
struct kbase_tlstream *stream,
- const void *kcpu_queue
+ const void *kcpu_queue,
+ u64 map_import_buf_gpu_addr
);
-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
struct kbase_tlstream *stream,
const void *kcpu_queue,
- const void *group_suspend_buf,
- u32 gpu_cmdq_grp_handle
+ u64 map_import_buf_gpu_addr
);
void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
@@ -548,6 +574,18 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free(
const void *kcpu_queue
);
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ const void *group_suspend_buf,
+ u32 gpu_cmdq_grp_handle
+);
+
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
struct kbase_tlstream *stream,
const void *kcpu_queue
@@ -587,6 +625,23 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
u32 execute_error
);
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+ struct kbase_tlstream *stream,
+ const void *kcpu_queue,
+ u32 execute_error
+);
+
void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
struct kbase_tlstream *stream,
const void *kcpu_queue
@@ -1982,6 +2037,37 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions
+ *
+ * @kbdev: Kbase device
+ * @kernel_ctx_id: Unique ID for the KBase Context
+ * @buffer_gpu_addr: Address of the GPU queue's command buffer
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \
+ kbdev, \
+ kernel_ctx_id, \
+ buffer_gpu_addr \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_gpucmdqueue_kick( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kernel_ctx_id, \
+ buffer_gpu_addr \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \
+ kbdev, \
+ kernel_ctx_id, \
+ buffer_gpu_addr \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
* KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot
*
* @kbdev: Kbase device
@@ -1989,7 +2075,7 @@ struct kbase_tlstream;
* @kernel_ctx_id: Unique ID for the KBase Context
* @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
* @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
- * @kbase_device_csg_slot_resumed: Whether the csg is being resumed
+ * @kbase_device_csg_slot_resuming: Whether the csg is being resumed
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \
@@ -1998,7 +2084,7 @@ struct kbase_tlstream;
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index, \
- kbase_device_csg_slot_resumed \
+ kbase_device_csg_slot_resuming \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
@@ -2009,7 +2095,7 @@ struct kbase_tlstream;
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index, \
- kbase_device_csg_slot_resumed \
+ kbase_device_csg_slot_resuming \
); \
} while (0)
#else
@@ -2019,7 +2105,7 @@ struct kbase_tlstream;
kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index, \
- kbase_device_csg_slot_resumed \
+ kbase_device_csg_slot_resuming \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -2029,7 +2115,7 @@ struct kbase_tlstream;
*
* @kbdev: Kbase device
* @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \
@@ -2056,14 +2142,80 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG - CSG is halted
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting
*
* @kbdev: Kbase device
* @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted
+ * @kbase_device_csg_slot_suspending: Whether the csg is being suspended
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_suspending \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_device_halting_csg( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kbase_device_id, \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_suspending \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index, \
+ kbase_device_csg_slot_suspending \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_device_suspend_csg( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle.
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \
kbdev, \
kbase_device_id, \
kbase_device_csg_slot_index \
@@ -2071,14 +2223,14 @@ struct kbase_tlstream;
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_device_halt_csg( \
+ __kbase_tlstream_tl_kbase_device_csg_idle( \
__TL_DISPATCH_STREAM(kbdev, obj), \
kbase_device_id, \
kbase_device_csg_slot_index \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \
kbdev, \
kbase_device_id, \
kbase_device_csg_slot_index \
@@ -2336,16 +2488,16 @@ struct kbase_tlstream;
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
* @cqs_obj_gpu_addr: CQS Object GPU pointer
- * @cqs_obj_compare_value: Semaphore value that should be exceeded for the WAIT to pass
- * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ * @compare_value: Semaphore value that should be exceeded for the WAIT to pass
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \
kbdev, \
kcpu_queue, \
cqs_obj_gpu_addr, \
- cqs_obj_compare_value, \
- cqs_obj_inherit_error \
+ compare_value, \
+ inherit_error \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
@@ -2354,8 +2506,8 @@ struct kbase_tlstream;
__TL_DISPATCH_STREAM(kbdev, obj), \
kcpu_queue, \
cqs_obj_gpu_addr, \
- cqs_obj_compare_value, \
- cqs_obj_inherit_error \
+ compare_value, \
+ inherit_error \
); \
} while (0)
#else
@@ -2363,8 +2515,8 @@ struct kbase_tlstream;
kbdev, \
kcpu_queue, \
cqs_obj_gpu_addr, \
- cqs_obj_compare_value, \
- cqs_obj_inherit_error \
+ compare_value, \
+ inherit_error \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -2401,76 +2553,104 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
- * @map_import_buf_gpu_addr: Map import buffer GPU pointer
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @compare_value: Value that should be compared to semaphore value for the WAIT to pass
+ * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal)
+ * @data_type: Data type of a CQS Object's value
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \
kbdev, \
kcpu_queue, \
- map_import_buf_gpu_addr \
+ cqs_obj_gpu_addr, \
+ compare_value, \
+ condition, \
+ data_type, \
+ inherit_error \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( \
__TL_DISPATCH_STREAM(kbdev, obj), \
kcpu_queue, \
- map_import_buf_gpu_addr \
+ cqs_obj_gpu_addr, \
+ compare_value, \
+ condition, \
+ data_type, \
+ inherit_error \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \
kbdev, \
kcpu_queue, \
- map_import_buf_gpu_addr \
+ cqs_obj_gpu_addr, \
+ compare_value, \
+ condition, \
+ data_type, \
+ inherit_error \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - KCPU Queue enqueues Unmap Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
- * @map_import_buf_gpu_addr: Map import buffer GPU pointer
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @value: Value that will be set or added to semaphore
+ * @operation: Operation type performed on semaphore value (SET or ADD)
+ * @data_type: Data type of a CQS Object's value
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \
kbdev, \
kcpu_queue, \
- map_import_buf_gpu_addr \
+ cqs_obj_gpu_addr, \
+ value, \
+ operation, \
+ data_type \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( \
__TL_DISPATCH_STREAM(kbdev, obj), \
kcpu_queue, \
- map_import_buf_gpu_addr \
+ cqs_obj_gpu_addr, \
+ value, \
+ operation, \
+ data_type \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \
kbdev, \
kcpu_queue, \
- map_import_buf_gpu_addr \
+ cqs_obj_gpu_addr, \
+ value, \
+ operation, \
+ data_type \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - KCPU Queue enqueues Unmap Import ignoring reference count
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
* @map_import_buf_gpu_addr: Map import buffer GPU pointer
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \
kbdev, \
kcpu_queue, \
map_import_buf_gpu_addr \
@@ -2478,14 +2658,14 @@ struct kbase_tlstream;
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( \
__TL_DISPATCH_STREAM(kbdev, obj), \
kcpu_queue, \
map_import_buf_gpu_addr \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \
kbdev, \
kcpu_queue, \
map_import_buf_gpu_addr \
@@ -2494,63 +2674,63 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - KCPU Queue enqueues Unmap Import
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
+ * @map_import_buf_gpu_addr: Map import buffer GPU pointer
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \
kbdev, \
- kcpu_queue \
+ kcpu_queue, \
+ map_import_buf_gpu_addr \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( \
__TL_DISPATCH_STREAM(kbdev, obj), \
- kcpu_queue \
+ kcpu_queue, \
+ map_import_buf_gpu_addr \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \
kbdev, \
- kcpu_queue \
+ kcpu_queue, \
+ map_import_buf_gpu_addr \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - KCPU Queue enqueues Unmap Import ignoring reference count
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
- * @group_suspend_buf: Pointer to the suspend buffer structure
- * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
+ * @map_import_buf_gpu_addr: Map import buffer GPU pointer
*/
#if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \
kbdev, \
kcpu_queue, \
- group_suspend_buf, \
- gpu_cmdq_grp_handle \
+ map_import_buf_gpu_addr \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
- __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( \
__TL_DISPATCH_STREAM(kbdev, obj), \
kcpu_queue, \
- group_suspend_buf, \
- gpu_cmdq_grp_handle \
+ map_import_buf_gpu_addr \
); \
} while (0)
#else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \
kbdev, \
kcpu_queue, \
- group_suspend_buf, \
- gpu_cmdq_grp_handle \
+ map_import_buf_gpu_addr \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -2758,6 +2938,68 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \
+ kbdev, \
+ kcpu_queue \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \
+ kbdev, \
+ kcpu_queue \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @group_suspend_buf: Pointer to the suspend buffer structure
+ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \
+ kbdev, \
+ kcpu_queue, \
+ group_suspend_buf, \
+ gpu_cmdq_grp_handle \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ group_suspend_buf, \
+ gpu_cmdq_grp_handle \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \
+ kbdev, \
+ kcpu_queue, \
+ group_suspend_buf, \
+ gpu_cmdq_grp_handle \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
* KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - KCPU Queue starts a Signal on Fence
*
* @kbdev: Kbase device
@@ -2874,7 +3116,7 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on an array of Cross Queue Sync Objects
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on Cross Queue Sync Object
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
@@ -2901,7 +3143,7 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on an array of Cross Queue Sync Objects
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on Cross Queue Sync Object
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
@@ -2932,7 +3174,7 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on an array of Cross Queue Sync Objects
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on Cross Queue Sync Object
*
* @kbdev: Kbase device
* @kcpu_queue: KCPU queue
@@ -2963,6 +3205,95 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \
+ kbdev, \
+ kcpu_queue \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \
+ kbdev, \
+ kcpu_queue \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ execute_error \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kcpu_queue, \
+ execute_error \
+ ); \
+ } while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \
+ kbdev, \
+ kcpu_queue, \
+ execute_error \
+ ) \
+ do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
* KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import
*
* @kbdev: Kbase device