summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJesse Hall <jessehall@google.com>2021-11-23 14:38:46 -0800
committerJesse Hall <jessehall@google.com>2021-11-23 14:38:46 -0800
commit0c596dc70431fa2c70021fa1685e3efc969a852d (patch)
tree8c6cfe8da5d3bea214e991cc4438988f65d9081e
parentbbbb1cf6bb211bb2094dd66656966277c326867f (diff)
downloadgpu-0c596dc70431fa2c70021fa1685e3efc969a852d.tar.gz
Mali Valhall Android DDK r34p0-00dev1
Provenance: 046d23c969 (collaborate/google/android/v_r34p0-00dev1) VX504X08X-BU-00000-r34p0-00dev1 - Valhall Android DDK VX504X08X-SW-99006-r34p0-00dev1 - Valhall Android Renderscript AOSP parts Documentation from VX504X08X-BU-00000 omitted. Signed-off-by: Jesse Hall <jessehall@google.com> Change-Id: I4ebbb3a3af709bd39f883eed3b35bf4657a95797
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h65
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h50
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h2
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h52
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h4
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h16
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h163
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h16
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h2
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_base_kernel.h104
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h259
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h52
-rw-r--r--mali_kbase/Kbuild14
-rw-r--r--mali_kbase/Kconfig5
-rw-r--r--mali_kbase/Makefile2
-rw-r--r--mali_kbase/Mconfig2
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_defs.h1
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_interface.h155
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_pm.c61
-rw-r--r--mali_kbase/arbiter/mali_kbase_arbiter_pm.h2
-rw-r--r--mali_kbase/arbitration/Kconfig12
-rw-r--r--mali_kbase/arbitration/ptm/Kconfig2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c12
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h21
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c46
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h15
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c42
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_instr_backend.c24
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c108
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c247
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c456
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_defs.h57
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c185
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h121
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h18
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c12
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c78
-rw-r--r--mali_kbase/build.bp4
-rw-r--r--mali_kbase/context/mali_kbase_context.c2
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c71
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c273
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h5
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.c216
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h17
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c149
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h41
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c59
-rw-r--r--mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c16
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c207
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h10
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c29
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c1298
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h135
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c41
-rw-r--r--mali_kbase/csf/mali_kbase_csf_timeout.c2
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c16
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.h6
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_jm.h34
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_jm.h5
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c9
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.h9
-rw-r--r--mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_jm.h10
-rw-r--r--mali_kbase/debug/mali_kbase_debug_ktrace_codes.h4
-rw-r--r--mali_kbase/debug/mali_kbase_debug_linux_ktrace.h3
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c24
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c75
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_jm.c39
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c4
-rw-r--r--mali_kbase/device/mali_kbase_device.c35
-rw-r--r--mali_kbase/device/mali_kbase_device_hw.c42
-rw-r--r--mali_kbase/device/mali_kbase_device_internal.h3
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c14
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c3
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h2
-rw-r--r--mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c75
-rw-r--r--mali_kbase/ipa/mali_kbase_ipa_debugfs.c2
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h52
-rw-r--r--mali_kbase/jm/mali_kbase_jm_js.h79
-rw-r--r--mali_kbase/jm/mali_kbase_js_defs.h46
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h295
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h17
-rw-r--r--mali_kbase/mali_kbase.h43
-rw-r--r--mali_kbase/mali_kbase_as_fault_debugfs.c8
-rw-r--r--mali_kbase/mali_kbase_config.h49
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h13
-rw-r--r--mali_kbase/mali_kbase_core_linux.c107
-rw-r--r--mali_kbase/mali_kbase_defs.h101
-rw-r--r--mali_kbase/mali_kbase_dma_fence.c4
-rw-r--r--mali_kbase/mali_kbase_dummy_job_wa.c9
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c13
-rw-r--r--mali_kbase/mali_kbase_gpuprops_types.h1
-rw-r--r--mali_kbase/mali_kbase_hw.c18
-rw-r--r--mali_kbase/mali_kbase_hwaccess_time.h22
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf.c137
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c22
-rw-r--r--mali_kbase/mali_kbase_hwcnt_backend_jm.c144
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.c220
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu.h136
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu_narrow.c329
-rw-r--r--mali_kbase/mali_kbase_hwcnt_gpu_narrow.h347
-rw-r--r--mali_kbase/mali_kbase_hwcnt_legacy.c40
-rw-r--r--mali_kbase/mali_kbase_hwcnt_types.c26
-rw-r--r--mali_kbase/mali_kbase_hwcnt_types.h264
-rw-r--r--mali_kbase/mali_kbase_jd.c44
-rw-r--r--mali_kbase/mali_kbase_jm.c3
-rw-r--r--mali_kbase/mali_kbase_jm.h2
-rw-r--r--mali_kbase/mali_kbase_js.c315
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.c1184
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.h107
-rw-r--r--mali_kbase/mali_kbase_mem.c197
-rw-r--r--mali_kbase/mali_kbase_mem.h94
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c110
-rw-r--r--mali_kbase/mali_kbase_mem_linux.h14
-rw-r--r--mali_kbase/mali_kbase_mem_profile_debugfs.c6
-rw-r--r--mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h3
-rw-r--r--mali_kbase/mali_kbase_pbha.c237
-rw-r--r--mali_kbase/mali_kbase_pbha.h77
-rw-r--r--mali_kbase/mali_kbase_pbha_debugfs.c140
-rw-r--r--mali_kbase/mali_kbase_pbha_debugfs.h (renamed from common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h)20
-rw-r--r--mali_kbase/mali_kbase_pm.c13
-rw-r--r--mali_kbase/mali_kbase_regs_history_debugfs.h9
-rw-r--r--mali_kbase/mali_kbase_reset_gpu.h3
-rw-r--r--mali_kbase/mali_kbase_vinstr.c68
-rw-r--r--mali_kbase/mali_malisw.h5
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c5
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c1
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c442
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h49
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw.h47
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c162
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c50
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c117
-rw-r--r--mali_kbase/tests/include/kutf/kutf_helpers.h13
-rw-r--r--mali_kbase/tests/kutf/kutf_helpers.c46
-rw-r--r--mali_kbase/tests/kutf/kutf_suite.c24
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c4
-rw-r--r--mali_kbase/tl/backend/mali_kbase_timeline_csf.c23
-rw-r--r--mali_kbase/tl/mali_kbase_timeline.c17
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c401
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h344
140 files changed, 9841 insertions, 2569 deletions
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
index 78c328c..f5f859e 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -186,17 +186,17 @@
#define BASE_MEM_FLAGS_RESERVED \
BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20
-#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
+#define BASEP_MEM_INVALID_HANDLE (0ul)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
-#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12)
-#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12)
-#define BASE_MEM_COOKIE_BASE (64ul << 12)
-#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
- BASE_MEM_COOKIE_BASE)
+#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_FIRST_FREE_ADDRESS \
+ ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \
((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \
@@ -301,7 +301,6 @@ typedef __u32 base_context_create_flags;
*/
#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
-#if MALI_UNIT_TEST
/**
* enum base_kcpu_command_type - Kernel CPU queue command type.
* @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal,
@@ -331,42 +330,8 @@ enum base_kcpu_command_type {
BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
BASE_KCPU_COMMAND_TYPE_JIT_FREE,
BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
- BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
- BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME,
+ BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER
};
-#else
-/**
- * enum base_kcpu_command_type - Kernel CPU queue command type.
- * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal,
- * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait,
- * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait,
- * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set,
- * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
- * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation,
- * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import,
- * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import,
- * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
- * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc,
- * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free,
- * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend,
- * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier,
- */
-enum base_kcpu_command_type {
- BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
- BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
- BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
- BASE_KCPU_COMMAND_TYPE_CQS_SET,
- BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
- BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
- BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
- BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
- BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
- BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
- BASE_KCPU_COMMAND_TYPE_JIT_FREE,
- BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
- BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
-};
-#endif /* MALI_UNIT_TEST */
/**
* enum base_queue_group_priority - Priority of a GPU Command Queue Group.
@@ -568,11 +533,6 @@ struct base_kcpu_command_group_suspend_info {
__u8 padding[3];
};
-#if MALI_UNIT_TEST
-struct base_kcpu_command_sample_time_info {
- __u64 time;
-};
-#endif /* MALI_UNIT_TEST */
/**
* struct base_kcpu_command - kcpu command.
@@ -603,9 +563,6 @@ struct base_kcpu_command {
struct base_kcpu_command_jit_alloc_info jit_alloc;
struct base_kcpu_command_jit_free_info jit_free;
struct base_kcpu_command_group_suspend_info suspend_buf_copy;
-#if MALI_UNIT_TEST
- struct base_kcpu_command_sample_time_info sample_time;
-#endif /* MALI_UNIT_TEST */
__u64 padding[2]; /* No sub-struct should be larger */
} info;
};
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
index 06cc4c2..a5dc745 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
@@ -20,7 +20,8 @@
*/
/*
- * This header was autogenerated, it should not be edited.
+ * This header was originally autogenerated, but it is now ok (and
+ * expected) to have to add to it.
*/
#ifndef _UAPI_GPU_CSF_REGISTERS_H_
@@ -212,7 +213,6 @@
#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */
#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */
#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */
-#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */
#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */
#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */
@@ -653,7 +653,9 @@
(((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \
(((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK))
/* CS_FAULT_EXCEPTION_TYPE values */
+#define CS_FAULT_EXCEPTION_TYPE_KABOOM 0x05
#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F
+#define CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT 0x48
#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B
#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50
#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51
@@ -1164,6 +1166,13 @@
(((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \
(((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \
GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK))
+#define GLB_REQ_SLEEP_SHIFT 12
+#define GLB_REQ_SLEEP_MASK (0x1 << GLB_REQ_SLEEP_SHIFT)
+#define GLB_REQ_SLEEP_GET(reg_val) \
+ (((reg_val) & GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT)
+#define GLB_REQ_SLEEP_SET(reg_val, value) \
+ (((reg_val) & ~GLB_REQ_SLEEP_MASK) | \
+ (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK))
#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20
#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT)
#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \
@@ -1391,19 +1400,6 @@
#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \
(((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK))
-/* GLB_PROTM_COHERENCY register */
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \
- (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT)
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \
- (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \
- GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT)
-#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \
- (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \
- (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \
- GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK))
-/* End of GLB_INPUT_BLOCK register set definitions */
-
/* GLB_OUTPUT_BLOCK register set definitions */
/* GLB_ACK register */
@@ -1485,4 +1481,28 @@
(((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \
(((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK))
+/* GLB_FEATURES_ITER_TRACE_SUPPORTED register */
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT GPU_U(4)
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK \
+ (GPU_U(0x1) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT)
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val) \
+ (((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >> \
+ GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT)
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value) \
+ (((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) | \
+ (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) & \
+ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK))
+
+/* GLB_REQ_ITER_TRACE_ENABLE register */
+#define GLB_REQ_ITER_TRACE_ENABLE_SHIFT GPU_U(11)
+#define GLB_REQ_ITER_TRACE_ENABLE_MASK \
+ (GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT)
+#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val) \
+ (((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >> \
+ GLB_REQ_ITER_TRACE_ENABLE_SHIFT)
+#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value) \
+ (((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) | \
+ (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \
+ GLB_REQ_ITER_TRACE_ENABLE_MASK))
+
#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
index d2d7ce2..ec4870c 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -44,6 +44,8 @@
* - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new
* queue registration call with extended format for supporting CS
* trace configurations with CSF trace_command.
+ * 1.6:
+ * - Added new HW performance counters interface to all GPUs.
*/
#define BASE_UK_VERSION_MAJOR 1
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
index 2041739..4001a4c 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -28,8 +28,13 @@
#error "Cannot be compiled with JM"
#endif
-/* IPA control registers */
+/* GPU_CONTROL_MCU base address */
+#define GPU_CONTROL_MCU_BASE 0x3000
+
+/* MCU_SUBSYSTEM base address */
+#define MCU_SUBSYSTEM_BASE 0x20000
+/* IPA control registers */
#define IPA_CONTROL_BASE 0x40000
#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r))
#define COMMAND 0x000 /* (WO) Command register */
@@ -63,8 +68,6 @@
#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-#include "../../csf/mali_gpu_csf_control_registers.h"
-
/* Set to implementation defined, outer caching */
#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
/* Set to write back memory, outer caching */
@@ -117,6 +120,9 @@
#define MCU_CNTRL_AUTO (1 << 1)
#define MCU_CNTRL_DISABLE (0)
+#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31)
+#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT)
+
#define MCU_STATUS_HALTED (1 << 1)
#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
@@ -181,11 +187,19 @@
#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */
#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */
-/* GPU_COMMAND_FLUSH_CACHES payloads */
-#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */
-#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */
-#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */
+/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_L2_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN 0x001 /* CLN only */
+#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */
+#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */
/* GPU_COMMAND command + payload */
#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
@@ -220,13 +234,21 @@
#define GPU_COMMAND_CYCLE_COUNT_STOP \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
-/* Clean all caches */
-#define GPU_COMMAND_CLEAN_CACHES \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN)
-
-/* Clean and invalidate all caches */
-#define GPU_COMMAND_CLEAN_INV_CACHES \
- GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE)
+/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */
+#define GPU_COMMAND_CACHE_CLN_INV_L2 \
+ GPU_COMMAND_CODE_PAYLOAD( \
+ GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE | \
+ GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
+
+/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */
+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \
+ GPU_COMMAND_CODE_PAYLOAD( \
+ GPU_COMMAND_CODE_FLUSH_CACHES, \
+ (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
+ GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
/* Places the GPU in protected mode */
#define GPU_COMMAND_SET_PROTECTED_MODE \
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
index 1be3541..dcadcc7 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -261,6 +261,10 @@
#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */
#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
+/* GPU_COMMAND cache flush alias to CSF command payload */
+#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
+
/* IRQ flags */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
index d093ce4..666b0af 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
@@ -53,6 +53,20 @@
GPU_ID2_VERSION_MINOR | \
GPU_ID2_VERSION_STATUS)
+/* Helper macro to construct a value consisting of arch major and revision
+ * using the value of gpu_id.
+ */
+#define ARCH_MAJOR_REV_REG(gpu_id) \
+ ((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) | \
+ (((__u32)gpu_id) & GPU_ID2_ARCH_REV))
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+ * a arch major and revision.
+ */
+#define GPU_ID2_ARCH_MAJOR_REV_MAKE(arch_major, arch_rev) \
+ ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
+ (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT))
+
/* Helper macro to create a partial GPU_ID (new format) that defines
* a product ignoring its version.
*/
@@ -109,6 +123,8 @@
#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3)
#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4)
#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7)
+#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2)
+#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3)
/* Helper macro to create a GPU_ID assuming valid values for id, major,
* minor, status
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
index 84fad8d..e223220 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
@@ -30,6 +30,13 @@
#include "backend/mali_kbase_gpu_regmap_jm.h"
#endif
+/* GPU_U definition */
+#ifdef __ASSEMBLER__
+#define GPU_U(x) x
+#else
+#define GPU_U(x) x##u
+#endif /* __ASSEMBLER__ */
+
/* Begin Register Offsets */
/* GPU control registers */
@@ -149,6 +156,10 @@
#define ASN_HASH(n) (ASN_HASH_0 + (n)*4)
#define ASN_HASH_COUNT 3
+#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */
+#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4)
+#define SYSC_ALLOC_COUNT 8
+
#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */
#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */
@@ -164,6 +175,7 @@
#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
+
#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */
@@ -327,10 +339,6 @@
#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */
#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */
-/* Flush all L2 caches then issue a flush region command to all MMUs
- * (deprecated - only for use with T60x)
- */
-#define AS_COMMAND_FLUSH 0x04
/* Flush all L2 caches then issue a flush region command to all MMUs */
#define AS_COMMAND_FLUSH_PT 0x04
/* Wait for memory accesses to complete, flush all the L1s cache then flush all
@@ -338,6 +346,28 @@
*/
#define AS_COMMAND_FLUSH_MEM 0x05
+/* AS_LOCKADDR register */
+#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0)
+#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \
+ (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \
+ (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \
+ AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \
+ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \
+ (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \
+ AS_LOCKADDR_LOCKADDR_SIZE_MASK))
+#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12)
+#define AS_LOCKADDR_LOCKADDR_BASE_MASK \
+ (GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \
+ (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \
+ AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \
+ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \
+ (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \
+ AS_LOCKADDR_LOCKADDR_BASE_MASK))
+
/* GPU_STATUS values */
#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */
@@ -427,8 +457,133 @@
#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
/* End L2_CONFIG register */
+
/* IDVS_GROUP register */
#define IDVS_GROUP_SIZE_SHIFT (16)
#define IDVS_GROUP_MAX_SIZE (0x3F)
+/* SYSC_ALLOC read IDs */
+#define SYSC_ALLOC_ID_R_OTHER 0x00
+#define SYSC_ALLOC_ID_R_CSF 0x02
+#define SYSC_ALLOC_ID_R_MMU 0x04
+#define SYSC_ALLOC_ID_R_TILER_VERT 0x08
+#define SYSC_ALLOC_ID_R_TILER_PTR 0x09
+#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A
+#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B
+#define SYSC_ALLOC_ID_R_IC 0x10
+#define SYSC_ALLOC_ID_R_ATTR 0x11
+#define SYSC_ALLOC_ID_R_SCM 0x12
+#define SYSC_ALLOC_ID_R_FSDC 0x13
+#define SYSC_ALLOC_ID_R_VL 0x14
+#define SYSC_ALLOC_ID_R_PLR 0x15
+#define SYSC_ALLOC_ID_R_TEX 0x18
+#define SYSC_ALLOC_ID_R_LSC 0x1c
+
+/* SYSC_ALLOC write IDs */
+#define SYSC_ALLOC_ID_W_OTHER 0x00
+#define SYSC_ALLOC_ID_W_CSF 0x02
+#define SYSC_ALLOC_ID_W_PCB 0x07
+#define SYSC_ALLOC_ID_W_TILER_PTR 0x09
+#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A
+#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B
+#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C
+#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D
+#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10
+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11
+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12
+#define SYSC_ALLOC_ID_W_TIB_CRC 0x13
+#define SYSC_ALLOC_ID_W_TIB_DS 0x14
+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15
+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16
+#define SYSC_ALLOC_ID_W_LSC 0x1C
+
+/* SYSC_ALLOC values */
+#define SYSC_ALLOC_L2_ALLOC 0x0
+#define SYSC_ALLOC_NEVER_ALLOC 0x2
+#define SYSC_ALLOC_ALWAYS_ALLOC 0x3
+#define SYSC_ALLOC_PTL_ALLOC 0x4
+#define SYSC_ALLOC_L2_PTL_ALLOC 0x5
+
+/* SYSC_ALLOC register */
+#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \
+ SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \
+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \
+ SYSC_ALLOC_R_SYSC_ALLOC0_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \
+ SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \
+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \
+ SYSC_ALLOC_W_SYSC_ALLOC0_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \
+ SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \
+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \
+ SYSC_ALLOC_R_SYSC_ALLOC1_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \
+ SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \
+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \
+ SYSC_ALLOC_W_SYSC_ALLOC1_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \
+ SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \
+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \
+ SYSC_ALLOC_R_SYSC_ALLOC2_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \
+ SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \
+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \
+ SYSC_ALLOC_W_SYSC_ALLOC2_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \
+ SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \
+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \
+ SYSC_ALLOC_R_SYSC_ALLOC3_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \
+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \
+ SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \
+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \
+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \
+ SYSC_ALLOC_W_SYSC_ALLOC3_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */
+
#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
index 749e1fa..7a52fbf 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
@@ -192,15 +192,15 @@
#define BASE_MEM_FLAGS_RESERVED \
(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
-#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
-#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
-#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
-#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
-#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
+#define BASEP_MEM_INVALID_HANDLE (0ul)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT)
/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
-#define BASE_MEM_COOKIE_BASE (64ul << 12)
-#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
- BASE_MEM_COOKIE_BASE)
+#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT)
+#define BASE_MEM_FIRST_FREE_ADDRESS \
+ ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE)
/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
* initial commit is aligned to 'extension' pages, where 'extension' must be a power
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
index 72d75cb..2598e20 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -119,6 +119,8 @@
* 11.31:
* - Added BASE_JD_REQ_LIMITED_CORE_MASK.
* - Added ioctl 55: set_limited_core_count.
+ * 11.32:
+ * - Added new HW performance counters interface to all GPUs.
*/
#define BASE_UK_VERSION_MAJOR 11
#define BASE_UK_VERSION_MINOR 31
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
index a46c41f..410d54e 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
@@ -42,18 +42,6 @@ struct base_mem_handle {
#define BASE_MAX_COHERENT_GROUPS 16
-#if defined(CDBG_ASSERT)
-#define LOCAL_ASSERT CDBG_ASSERT
-#elif defined(KBASE_DEBUG_ASSERT)
-#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
-#else
-#if defined(__KERNEL__)
-#error assert macro not defined!
-#else
-#define LOCAL_ASSERT(...) ((void)#__VA_ARGS__)
-#endif
-#endif
-
#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
#define LOCAL_PAGE_SHIFT PAGE_SHIFT
#define LOCAL_PAGE_LSB ~PAGE_MASK
@@ -635,7 +623,7 @@ struct mali_base_gpu_coherent_group_info {
* @thread_max_barrier_size: Maximum number of threads per barrier
* @thread_features: Thread features
* @coherency_mode: Note: This is the _selected_ coherency mode rather than the
- * available modes as exposed in the coherency_features register
+ * available modes as exposed in the coherency_features register
* @thread_tls_alloc: Number of threads per core that TLS must be allocated for
* @gpu_features: GPU features
*
@@ -699,7 +687,7 @@ struct gpu_raw_gpu_props {
* values from which the value of the other members are derived. The derived
* members exist to allow for efficient access and/or shielding the details
* of the layout of the registers.
- * */
+ */
struct base_gpu_props {
struct mali_base_gpu_core_props core_props;
struct mali_base_gpu_l2_cache_props l2_props;
@@ -716,82 +704,24 @@ struct base_gpu_props {
#include "jm/mali_base_jm_kernel.h"
#endif
-/**
- * base_mem_group_id_get() - Get group ID from flags
- * @flags: Flags to pass to base_mem_alloc
- *
- * This inline function extracts the encoded group ID from flags
- * and converts it into numeric value (0~15).
- *
- * Return: group ID(0~15) extracted from the parameter
- */
-static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags)
-{
- LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
- return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
- BASEP_MEM_GROUP_ID_SHIFT);
-}
-
-/**
- * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
- * @id: group ID(0~15) you want to encode
- *
- * This inline function encodes specific group ID into base_mem_alloc_flags.
- * Parameter 'id' should lie in-between 0 to 15.
- *
- * Return: base_mem_alloc_flags with the group ID (id) encoded
- *
- * The return value can be combined with other flags against base_mem_alloc
- * to identify a specific memory group.
- */
-static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id)
-{
- if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) {
- /* Set to default value when id is out of range. */
- id = BASE_MEM_GROUP_DEFAULT;
- }
+#define BASE_MEM_GROUP_ID_GET(flags) \
+ ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT)
- return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
- BASE_MEM_GROUP_ID_MASK;
-}
+#define BASE_MEM_GROUP_ID_SET(id) \
+ (((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? \
+ BASE_MEM_GROUP_DEFAULT : \
+ id) \
+ << BASEP_MEM_GROUP_ID_SHIFT) & \
+ BASE_MEM_GROUP_ID_MASK)
-/**
- * base_context_mmu_group_id_set - Encode a memory group ID in
- * base_context_create_flags
- *
- * Memory allocated for GPU page tables will come from the specified group.
- *
- * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1).
- *
- * Return: Bitmask of flags to pass to base_context_init.
- */
-static __inline__ base_context_create_flags base_context_mmu_group_id_set(
- int const group_id)
-{
- LOCAL_ASSERT(group_id >= 0);
- LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT);
- return BASEP_CONTEXT_MMU_GROUP_ID_MASK &
- ((base_context_create_flags)group_id <<
- BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
-}
+#define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \
+ (BASEP_CONTEXT_MMU_GROUP_ID_MASK & \
+ ((base_context_create_flags)(group_id) \
+ << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT))
-/**
- * base_context_mmu_group_id_get - Decode a memory group ID from
- * base_context_create_flags
- *
- * Memory allocated for GPU page tables will come from the returned group.
- *
- * @flags: Bitmask of flags to pass to base_context_init.
- *
- * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
- */
-static __inline__ int base_context_mmu_group_id_get(
- base_context_create_flags const flags)
-{
- LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
- return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>
- BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
-}
+#define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \
+ ((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> \
+ BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
/*
* A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
index 9baaec1..15843ee 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -91,6 +91,7 @@ enum base_hwcnt_reader_event {
#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
+
/**
* struct kbase_hwcnt_reader_api_version - hwcnt reader API version
* @version: API version
@@ -101,5 +102,263 @@ struct kbase_hwcnt_reader_api_version {
__u32 features;
};
+/** Hardware counters reader API version */
+#define PRFCNT_READER_API_VERSION (0)
+
+/**
+ * enum prfcnt_list_type - Type of list item
+ * @PRFCNT_LIST_TYPE_ENUM: Enumeration of performance counters.
+ * @PRFCNT_LIST_TYPE_REQUEST: Request for configuration setup.
+ * @PRFCNT_LIST_TYPE_SAMPLE_META: Sample metadata.
+ */
+enum prfcnt_list_type {
+ PRFCNT_LIST_TYPE_ENUM,
+ PRFCNT_LIST_TYPE_REQUEST,
+ PRFCNT_LIST_TYPE_SAMPLE_META,
+};
+
+#define FLEX_LIST_TYPE(type, subtype) \
+ (__u16)(((type & 0xf) << 12) | (subtype & 0xfff))
+#define FLEX_LIST_TYPE_NONE FLEX_LIST_TYPE(0, 0)
+
+#define PRFCNT_ENUM_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 0)
+#define PRFCNT_ENUM_TYPE_REQUEST FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 1)
+
+#define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0)
+#define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1)
+
+#define PRFCNT_SAMPLE_META_TYPE_SAMPLE \
+ FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0)
+#define PRFCNT_SAMPLE_META_TYPE_CLOCK \
+ FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 1)
+#define PRFCNT_SAMPLE_META_TYPE_BLOCK \
+ FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 2)
+
+/**
+ * struct prfcnt_item_header - Header for an item of the list.
+ * @item_type: Type of item.
+ * @item_version: Protocol version.
+ */
+struct prfcnt_item_header {
+ __u16 item_type;
+ __u16 item_version;
+};
+
+/**
+ * enum prfcnt_block_type - Type of performance counter block.
+ * @PRFCNT_BLOCK_TYPE_FE: Front End.
+ * @PRFCNT_BLOCK_TYPE_TILER: Tiler.
+ * @PRFCNT_BLOCK_TYPE_MEMORY: Memory System.
+ * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core.
+ */
+enum prfcnt_block_type {
+ PRFCNT_BLOCK_TYPE_FE,
+ PRFCNT_BLOCK_TYPE_TILER,
+ PRFCNT_BLOCK_TYPE_MEMORY,
+ PRFCNT_BLOCK_TYPE_SHADER_CORE,
+ PRFCNT_BLOCK_TYPE_RESERVED = 255,
+};
+
+/**
+ * enum prfcnt_block_set - Type of performance counter block set.
+ * @PRFCNT_SET_PRIMARY: Primary.
+ * @PRFCNT_SET_SECONDARY: Secondary.
+ * @PRFCNT_SET_TERTIARY: Tertiary.
+ */
+enum prfcnt_set {
+ PRFCNT_SET_PRIMARY,
+ PRFCNT_SET_SECONDARY,
+ PRFCNT_SET_TERTIARY,
+ PRFCNT_SET_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_enum_block_counter - Performance counter block descriptor.
+ * @block_type: Type of performance counter block.
+ * @set: Which SET this represents: primary, secondary or tertiary.
+ * @num_instances: How many instances of this block type exist in the hardware.
+ * @num_values: How many entries in the values array there are for samples
+ * from this block.
+ * @pad: Padding bytes.
+ * @counter_mask: Bitmask that indicates the availability of counters in this
+ * block.
+ */
+struct prfcnt_enum_block_counter {
+ __u8 block_type;
+ __u8 set;
+ __u8 num_instances;
+ __u8 num_values;
+ __u8 pad[4];
+ __u64 counter_mask[2];
+};
+
+/**
+ * struct prfcnt_enum_request - Request descriptor.
+ * @request_item_type: Type of request.
+ * @pad: Padding bytes.
+ * @versions_mask: Bitmask of versions that support this request.
+ */
+struct prfcnt_enum_request {
+ __u16 request_item_type;
+ __u16 pad;
+ __u32 versions_mask;
+};
+
+/**
+ * struct prfcnt_enum_item - Performance counter enumeration item.
+ * @hdr: Header describing the type of item in the list.
+ * @block_counter: Performance counter block descriptor.
+ * @request: Request descriptor.
+ */
+struct prfcnt_enum_item {
+ struct prfcnt_item_header hdr;
+ union {
+ struct prfcnt_enum_block_counter block_counter;
+ struct prfcnt_enum_request request;
+ } u;
+};
+
+/**
+ * enum prfcnt_mode - Capture mode for counter sampling.
+ * @PRFCNT_MODE_MANUAL: Manual sampling mode.
+ * @PRFCNT_MODE_PERIODIC: Periodic sampling mode.
+ */
+enum prfcnt_mode {
+ PRFCNT_MODE_MANUAL,
+ PRFCNT_MODE_PERIODIC,
+ PRFCNT_MODE_RESERVED = 255,
+};
+
+/**
+ * struct prfcnt_request_mode - Mode request descriptor.
+ * @mode: Capture mode for the session, either manual or periodic.
+ * @pad: Padding bytes.
+ * @period_us: Period in microseconds, for periodic mode.
+ */
+struct prfcnt_request_mode {
+ __u8 mode;
+ __u8 pad[7];
+ union {
+ struct {
+ __u64 period_us;
+ } periodic;
+ } mode_config;
+};
+
+/**
+ * struct prfcnt_request_enable - Enable request descriptor.
+ * @block_type: Type of performance counter block.
+ * @set: Which SET to use: primary, secondary or tertiary.
+ * @pad: Padding bytes.
+ * @enable_mask: Bitmask that indicates which performance counters to enable.
+ * Unavailable counters will be ignored.
+ */
+struct prfcnt_request_enable {
+ __u8 block_type;
+ __u8 set;
+ __u8 pad[6];
+ __u64 enable_mask[2];
+};
+
+/**
+ * struct prfcnt_request_item - Performance counter request item.
+ * @hdr: Header describing the type of item in the list.
+ * @req_mode: Mode request descriptor.
+ * @req_enable: Enable request descriptor.
+ */
+struct prfcnt_request_item {
+ struct prfcnt_item_header hdr;
+ union {
+ struct prfcnt_request_mode req_mode;
+ struct prfcnt_request_enable req_enable;
+ } u;
+};
+
+/**
+ * enum prfcnt_request_type - Type of request descriptor.
+ * @PRFCNT_REQUEST_MODE: Specify the capture mode to be used for the session.
+ * @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture.
+ */
+enum prfcnt_request_type {
+ PRFCNT_REQUEST_MODE,
+ PRFCNT_REQUEST_ENABLE,
+};
+
+/**
+ * struct prfcnt_sample_metadata - Metadata for counter sample data.
+ * @timestamp_start: Earliest timestamp that values in this sample represent.
+ * @timestamp_end: Latest timestamp that values in this sample represent.
+ * @seq: Sequence number of this sample. Must match the value from
+ * GET_SAMPLE.
+ * @user_data: User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_*
+ * @flags: Property flags.
+ */
+struct prfcnt_sample_metadata {
+ __u64 timestamp_start;
+ __u64 timestamp_end;
+ __u64 seq;
+ __u64 user_data;
+ __u32 flags;
+ __u32 pad;
+};
+
+/**
+ * struct prfcnt_clock_metadata - Metadata for clock cycles.
+ * @num_domains: Number of domains this metadata refers to.
+ * @cycles: Number of cycles elapsed in each counter domain between
+ * timestamp_start and timestamp_end.
+ */
+struct prfcnt_clock_metadata {
+ __u32 num_domains;
+ __u32 pad;
+ __u64 *cycles;
+};
+
+/* This block was powered on for at least some portion of the sample */
+#define BLOCK_STATE_ON (1 << 0)
+/* This block was powered off for at least some portion of the sample */
+#define BLOCK_STATE_OFF (1 << 1)
+/* This block was available to this VM for at least some portion of the sample */
+#define BLOCK_STATE_AVAILABLE (1 << 2)
+/* This block was not available to this VM for at least some portion of the sample
+ * Note that no data is collected when the block is not available to the VM.
+ */
+#define BLOCK_STATE_UNAVAILABLE (1 << 3)
+/* This block was operating in "normal" (non-protected) mode for at least some portion of the sample */
+#define BLOCK_STATE_NORMAL (1 << 4)
+/* This block was operating in "protected" mode for at least some portion of the sample.
+ * Note that no data is collected when the block is in protected mode.
+ */
+#define BLOCK_STATE_PROTECTED (1 << 5)
+
+/**
+ * struct prfcnt_block_metadata - Metadata for counter block.
+ * @block_type: Type of performance counter block.
+ * @block_idx: Index of performance counter block.
+ * @set: Set of performance counter block.
+ * @block_state: Bits set indicate the states which the block is known
+ * to have operated in during this sample.
+ * @values_offset: Offset from the start of the mmapped region, to the values
+ * for this block. The values themselves are an array of __u64.
+ */
+struct prfcnt_block_metadata {
+ __u8 block_type;
+ __u8 block_idx;
+ __u8 set;
+ __u8 pad_u8;
+ __u32 block_state;
+ __u32 values_offset;
+ __u32 pad_u32;
+};
+
+struct prfcnt_metadata {
+ struct prfcnt_item_header hdr;
+ union {
+ struct prfcnt_sample_metadata sample_md;
+ struct prfcnt_clock_metadata clock_md;
+ struct prfcnt_block_metadata block_md;
+ } u;
+};
+
#endif /* _UAPI_KBASE_HWCNT_READER_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
index 29ff32a..8e1ed55 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -186,12 +186,15 @@ struct kbase_ioctl_hwcnt_enable {
__u32 mmu_l2_bm;
};
+/* This IOCTL is deprecated as of R33, and will be removed in R35. */
#define KBASE_IOCTL_HWCNT_ENABLE \
_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+/* This IOCTL is deprecated as of R33, and will be removed in R35. */
#define KBASE_IOCTL_HWCNT_DUMP \
_IO(KBASE_IOCTL_TYPE, 10)
+/* This IOCTL is deprecated as of R33, and will be removed in R35. */
#define KBASE_IOCTL_HWCNT_CLEAR \
_IO(KBASE_IOCTL_TYPE, 11)
@@ -686,6 +689,55 @@ struct kbase_ioctl_set_limited_core_count {
#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
+/**
+ * struct kbase_ioctl_kinstr_prfcnt_enum_info - Enum Performance counter
+ * information
+ * @info_item_size: Performance counter item size in bytes.
+ * @info_item_count: Performance counter item count in the info_list_ptr.
+ * @info_list_ptr: Performance counter item list pointer which points to a
+ * list with info_item_count of items.
+ *
+ * On success: returns info_item_size and info_item_count if info_list_ptr is
+ * NULL, returns performance counter information if info_list_ptr is not NULL.
+ * On error: returns a negative error code.
+ */
+struct kbase_ioctl_kinstr_prfcnt_enum_info {
+ __u32 info_item_size;
+ __u32 info_item_count;
+ __u64 info_list_ptr;
+};
+
+#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \
+ _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @in: input parameters.
+ * @in.request_item_count: Number of requests in the requests array.
+ * @in.request_item_size: Size in bytes of each request in the requests array.
+ * @in.requests_ptr: Pointer to the requests array.
+ * @out: output parameters.
+ * @out.prfcnt_metadata_item_size: Size of each item in the metadata array for
+ * each sample.
+ * @out.prfcnt_mmap_size_bytes: Size in bytes that user-space should mmap
+ * for reading performance counter samples.
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error.
+ */
+union kbase_ioctl_kinstr_prfcnt_setup {
+ struct {
+ __u32 request_item_count;
+ __u32 request_item_size;
+ __u64 requests_ptr;
+ } in;
+ struct {
+ __u32 prfcnt_metadata_item_size;
+ __u32 prfcnt_mmap_size_bytes;
+ } out;
+};
+
+#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \
+ _IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup)
/***************
* test ioctls *
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index c520597..e253f1c 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -48,6 +48,10 @@ ifeq ($(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND),n)
$(error CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND must be set in Kernel configuration)
endif
+ifeq ($(CONFIG_FW_LOADER), n)
+ $(error CONFIG_FW_LOADER must be set in Kernel configuration)
+endif
+
ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y)
ifneq ($(CONFIG_DEBUG_FS), y)
$(error CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS depends on CONFIG_DEBUG_FS to be set in Kernel configuration)
@@ -67,7 +71,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r32p1-01eac0"'
+MALI_RELEASE_NAME ?= '"r34p0-00dev1"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1
@@ -91,6 +95,7 @@ else
MALI_USE_CSF ?= 0
endif
+
ifneq ($(CONFIG_MALI_KUTF), n)
MALI_KERNEL_TEST_API ?= 1
else
@@ -156,9 +161,11 @@ mali_kbase-y := \
mali_kbase_gpuprops.o \
mali_kbase_pm.o \
mali_kbase_config.o \
+ mali_kbase_kinstr_prfcnt.o \
mali_kbase_vinstr.o \
mali_kbase_hwcnt.o \
mali_kbase_hwcnt_gpu.o \
+ mali_kbase_hwcnt_gpu_narrow.o \
mali_kbase_hwcnt_legacy.o \
mali_kbase_hwcnt_types.o \
mali_kbase_hwcnt_virtualizer.o \
@@ -180,7 +187,10 @@ mali_kbase-y := \
mali_kbase_regs_history_debugfs.o \
mali_kbase_dvfs_debugfs.o \
mali_power_gpu_frequency_trace.o \
- mali_kbase_trace_gpu_mem.o
+ mali_kbase_trace_gpu_mem.o \
+ mali_kbase_pbha.o
+
+mali_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o
mali_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 9f1a6e3..a563d35 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -24,6 +24,7 @@ menuconfig MALI_MIDGARD
select DMA_SHARED_BUFFER
select PM_DEVFREQ
select DEVFREQ_THERMAL
+ select FW_LOADER
default n
help
Enable this option to build support for a ARM Mali Midgard GPU.
@@ -39,7 +40,7 @@ config MALI_PLATFORM_NAME
default "devicetree"
help
Enter the name of the desired platform configuration directory to
- include in the build. 'platform/$(MALI_PLATFORM_NAME)/Makefile' must
+ include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
config MALI_REAL_HW
@@ -365,7 +366,7 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
endif
config MALI_ARBITRATION
- bool "Enable Virtualization reference code"
+ tristate "Enable Virtualization reference code"
depends on MALI_MIDGARD
default n
help
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 4384e80..099da33 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -55,7 +55,7 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
endif
- ifeq ($(CONFIG_BSP_HAS_HYPERVISOR),y)
+ ifeq ($(CONFIG_XEN),y)
ifneq ($(CONFIG_MALI_ARBITRATION), n)
CONFIG_MALI_XEN ?= m
endif
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index d71a113..1b66978 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -35,7 +35,7 @@ config MALI_PLATFORM_NAME
default "devicetree"
help
Enter the name of the desired platform configuration directory to
- include in the build. 'platform/$(MALI_PLATFORM_NAME)/Makefile' must
+ include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
When PLATFORM_CUSTOM is set, this needs to be set manually to
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
index 570a82a..65cfc7b 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
@@ -20,7 +20,6 @@
*/
/**
- * @file
* Mali structures define to support arbitration feature
*/
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
index c0137f7..3c60878 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
@@ -20,7 +20,6 @@
*/
/**
- * @file
* Defines the Mali arbiter interface
*/
@@ -61,58 +60,47 @@ struct arbiter_if_dev;
* the arbiter arbiter_if_vm_arb_ops callbacks below.
* For example vm_arb_gpu_stopped() may be called as a side effect of
* arb_vm_gpu_stop() being called here.
+ *
+ * @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
+ * dev: The arbif kernel module device.
+ *
+ * Informs KBase to stop using the GPU as soon as possible.
+ * Note: Once the driver is no longer using the GPU, a call
+ * to vm_arb_gpu_stopped is expected by the arbiter.
+ * @arb_vm_gpu_granted: Callback to indicate that GPU has been granted to VM.
+ * dev: The arbif kernel module device.
+ *
+ * Informs KBase that the GPU can now be used by the VM.
+ * @arb_vm_gpu_lost: Callback to indicate that VM has lost the GPU.
+ * dev: The arbif kernel module device.
+ *
+ * This is called if KBase takes too long to respond to the
+ * arbiter stop request.
+ * Once this is called, KBase will assume that access to the
+ * GPU has been lost and will fail all running jobs and
+ * reset its internal state.
+ * If successful, will respond with a vm_arb_gpu_stopped
+ * message.
+ * @arb_vm_max_config: Callback to send the max config info to the VM.
+ * dev: The arbif kernel module device.
+ * max_l2_slices: The maximum number of L2 slices.
+ * max_core_mask: The largest core mask.
+ *
+ * Informs KBase the maximum resources that can be
+ * allocated to the partition in use.
+ * @arb_vm_update_freq: Callback to notify that GPU clock frequency has been
+ * updated.
+ * dev: The arbif kernel module device.
+ * freq: GPU clock frequency value reported from arbiter
+ *
+ * Informs KBase that the GPU clock frequency has been updated.
*/
struct arbiter_if_arb_vm_ops {
- /**
- * arb_vm_gpu_stop() - Ask VM to stop using GPU
- * @dev: The arbif kernel module device.
- *
- * Informs KBase to stop using the GPU as soon as possible.
- * @Note: Once the driver is no longer using the GPU, a call to
- * vm_arb_gpu_stopped is expected by the arbiter.
- */
void (*arb_vm_gpu_stop)(struct device *dev);
-
- /**
- * arb_vm_gpu_granted() - GPU has been granted to VM
- * @dev: The arbif kernel module device.
- *
- * Informs KBase that the GPU can now be used by the VM.
- */
void (*arb_vm_gpu_granted)(struct device *dev);
-
- /**
- * arb_vm_gpu_lost() - VM has lost the GPU
- * @dev: The arbif kernel module device.
- *
- * This is called if KBase takes too long to respond to the arbiter
- * stop request.
- * Once this is called, KBase will assume that access to the GPU
- * has been lost and will fail all running jobs and reset its
- * internal state.
- * If successful, will respond with a vm_arb_gpu_stopped message.
- */
void (*arb_vm_gpu_lost)(struct device *dev);
-
- /**
- * arb_vm_max_config() - Send max config info to the VM
- * @dev: The arbif kernel module device.
- * @max_l2_slices: The maximum number of L2 slices.
- * @max_core_mask: The largest core mask.
- *
- * Informs KBase the maximum resources that can be allocated to the
- * partition in use.
- */
void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices,
uint32_t max_core_mask);
-
- /**
- * arb_vm_update_freq() - GPU clock frequency has been updated
- * @dev: The arbif kernel module device.
- * @freq: GPU clock frequency value reported from arbiter
- *
- * Informs KBase that the GPU clock frequency has been updated.
- */
void (*arb_vm_update_freq)(struct device *dev, uint32_t freq);
};
@@ -124,60 +112,45 @@ struct arbiter_if_arb_vm_ops {
*
* Note that we must not make any synchronous calls back in to the VM
* (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
+ *
+ * @vm_arb_register_dev: Callback to register VM device driver callbacks.
+ * arbif_dev: The arbiter interface to register
+ * with for device callbacks
+ * dev: The device structure to supply in the callbacks.
+ * ops: The callbacks that the device driver supports
+ * (none are optional).
+ *
+ * Returns
+ * 0 - successful.
+ * -EINVAL - invalid argument.
+ * -EPROBE_DEFER - module dependencies are not yet
+ * available.
+ * @vm_arb_unregister_dev: Callback to unregister VM device driver callbacks.
+ * arbif_dev: The arbiter interface to unregistering
+ * from.
+ * @vm_arb_get_max_config: Callback to Request the max config from the Arbiter.
+ * arbif_dev: The arbiter interface to issue the
+ * request to.
+ * @vm_arb_gpu_request: Callback to ask the arbiter interface for GPU access.
+ * arbif_dev: The arbiter interface to issue the request
+ * to.
+ * @vm_arb_gpu_active: Callback to inform arbiter that driver has gone active.
+ * arbif_dev: The arbiter interface device to notify.
+ * @vm_arb_gpu_idle: Callback to inform the arbiter that driver has gone idle.
+ * arbif_dev: The arbiter interface device to notify.
+ * @vm_arb_gpu_stopped: Callback to inform arbiter that driver has stopped
+ * using the GPU
+ * arbif_dev: The arbiter interface device to notify.
+ * gpu_required: The GPU is still needed to do more work.
*/
struct arbiter_if_vm_arb_ops {
- /**
- * vm_arb_register_dev() - Register VM device driver callbacks.
- * @arbif_dev: The arbiter interface we are registering device callbacks
- * @dev: The device structure to supply in the callbacks.
- * @ops: The callbacks that the device driver supports
- * (none are optional).
- *
- * Return:
- * * 0 - successful.
- * * -EINVAL - invalid argument.
- * * -EPROBE_DEFER - module dependencies are not yet available.
- */
int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,
struct device *dev, struct arbiter_if_arb_vm_ops *ops);
-
- /**
- * vm_arb_unregister_dev() - Unregister VM device driver callbacks.
- * @arbif_dev: The arbiter interface we are unregistering from.
- */
void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev);
-
- /**
- * vm_arb_gpu_get_max_config() - Request the max config from the
- * Arbiter.
- * @arbif_dev: The arbiter interface we want to issue the request.
- */
void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev);
-
- /**
- * vm_arb_gpu_request() - Ask the arbiter interface for GPU access.
- * @arbif_dev: The arbiter interface we want to issue the request.
- */
void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev);
-
- /**
- * vm_arb_gpu_active() - Inform arbiter that the driver has gone active
- * @arbif_dev: The arbiter interface device.
- */
void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev);
-
- /**
- * vm_arb_gpu_idle() - Inform the arbiter that the driver has gone idle
- * @arbif_dev: The arbiter interface device.
- */
void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev);
-
- /**
- * vm_arb_gpu_stopped() - Inform the arbiter that the driver has stopped
- * using the GPU
- * @arbif_dev: The arbiter interface device.
- * @gpu_required: The GPU is still needed to do more work.
- */
void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev,
u8 gpu_required);
};
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
index 5c75686..62ff4fd 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
@@ -20,15 +20,12 @@
*/
/**
- * @file
* Mali arbiter power manager state machine and APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
-#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
-#include <mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <tl/mali_kbase_tracepoints.h>
#include <mali_kbase_gpuprops.h>
@@ -319,6 +316,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
if (kbdev->arb.arb_if) {
kbase_arbif_gpu_request(kbdev);
dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n");
+
err = wait_event_timeout(arb_vm_state->vm_state_wait,
arb_vm_state->vm_state ==
KBASE_VM_STATE_INITIALIZING_WITH_GPU,
@@ -328,8 +326,9 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
dev_dbg(kbdev->dev,
"Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n",
gpu_req_timeout);
- err = -EPROBE_DEFER;
- goto arbif_eprobe_defer;
+
+ err = -ENODEV;
+ goto arbif_timeout;
}
dev_dbg(kbdev->dev,
@@ -337,9 +336,10 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
}
return 0;
-arbif_eprobe_defer:
+arbif_timeout:
kbase_arbiter_pm_early_term(kbdev);
return err;
+
arbif_init_fail:
destroy_workqueue(arb_vm_state->vm_arb_wq);
kfree(arb_vm_state);
@@ -619,6 +619,18 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
case KBASE_VM_STATE_SUSPEND_PENDING:
/* Suspend finishes with a stop so nothing else to do */
break;
+ case KBASE_VM_STATE_INITIALIZING:
+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
+ /*
+ * Case stop() is received when in a GPU REQUESTED state, it
+ * means that the granted() was missed so the GPU needs to be
+ * requested again.
+ */
+ dev_dbg(kbdev->dev,
+ "GPU stop while already stopped with GPU requested");
+ kbase_arbif_gpu_stopped(kbdev, true);
+ start_request_timer(kbdev);
+ break;
default:
dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n",
kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
@@ -656,9 +668,20 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
break;
case KBASE_VM_STATE_SUSPENDED:
case KBASE_VM_STATE_STOPPED:
- case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
dev_dbg(kbdev->dev, "GPU lost while already stopped");
break;
+ case KBASE_VM_STATE_INITIALIZING:
+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
+ /*
+ * Case lost() is received when in a GPU REQUESTED state, it
+ * means that the granted() and stop() were missed so the GPU
+ * needs to be requested again. Very unlikely to happen.
+ */
+ dev_dbg(kbdev->dev,
+ "GPU lost while already stopped with GPU requested");
+ kbase_arbif_gpu_request(kbdev);
+ start_request_timer(kbdev);
+ break;
case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
dev_dbg(kbdev->dev, "GPU lost while waiting to suspend");
kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED);
@@ -1020,8 +1043,8 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
/**
* kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received
* from arbiter.
- * @arb_freq - Pointer to struchture holding GPU clock frequenecy data
- * @freq - New frequency value in KHz
+ * @arb_freq: Pointer to struchture holding GPU clock frequenecy data
+ * @freq: New frequency value in KHz
*/
void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
uint32_t freq)
@@ -1045,8 +1068,8 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
/**
* enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
- * @kbdev - kbase_device pointer
- * @index - GPU clock index
+ * @kbdev: kbase_device pointer
+ * @index: GPU clock index
*
* Returns pointer to structure holding GPU clock frequency data reported from
* arbiter, only index 0 is valid.
@@ -1061,8 +1084,8 @@ static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
/**
* get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
- * @kbdev - kbase_device pointer
- * @index - GPU clock index
+ * @kbdev: kbase_device pointer
+ * @index: GPU clock index
*
* Returns the GPU clock frequency value saved when gpu is granted from arbiter
*/
@@ -1082,9 +1105,9 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
/**
* arb_gpu_clk_notifier_register() - Register a clock rate change notifier.
- * @kbdev - kbase_device pointer
- * @gpu_clk_handle - Handle unique to the enumerated GPU clock
- * @nb - notifier block containing the callback function pointer
+ * @kbdev: kbase_device pointer
+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock
+ * @nb: notifier block containing the callback function pointer
*
* Returns 0 on success, negative error code otherwise.
*
@@ -1108,9 +1131,9 @@ static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
/**
* gpu_clk_notifier_unregister() - Unregister clock rate change notifier
- * @kbdev - kbase_device pointer
- * @gpu_clk_handle - Handle unique to the enumerated GPU clock
- * @nb - notifier block containing the callback function pointer
+ * @kbdev: kbase_device pointer
+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock
+ * @nb: notifier block containing the callback function pointer
*
* This function pointer is used to unregister a callback function that
* was previously registered to get notified of a frequency change of the
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
index 1f570bb..091b431 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
@@ -20,7 +20,6 @@
*/
/**
- * @file
* Mali arbiter power manager state machine and APIs
*/
@@ -108,6 +107,7 @@ int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
/**
* kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @event: The event to dispatch
*
* The state machine function. Receives events and transitions states
* according the event received and the current state
diff --git a/mali_kbase/arbitration/Kconfig b/mali_kbase/arbitration/Kconfig
index 95125f9..b4d6202 100644
--- a/mali_kbase/arbitration/Kconfig
+++ b/mali_kbase/arbitration/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
#
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
#
@@ -19,7 +19,7 @@
#
config MALI_XEN
- bool "Enable Xen Interface reference code"
+ tristate "Enable Xen Interface reference code"
depends on MALI_ARBITRATION && XEN
default n
help
@@ -27,13 +27,5 @@ config MALI_XEN
virtualization setup for Mali
If unsure, say N.
-config MALI_KUTF_ARBITRATION_TEST
- bool "Enable Arbitration Test reference code"
- depends on MALI_KUTF && MALI_ARBITRATION
- default n
- help
- Enables the build of test modules used in the reference
- virtualization setup for Mali
- If unsure, say N.
source "drivers/gpu/arm/midgard/arbitration/ptm/Kconfig"
diff --git a/mali_kbase/arbitration/ptm/Kconfig b/mali_kbase/arbitration/ptm/Kconfig
index e11e674..074ebd5 100644
--- a/mali_kbase/arbitration/ptm/Kconfig
+++ b/mali_kbase/arbitration/ptm/Kconfig
@@ -19,7 +19,7 @@
#
config MALI_PARTITION_MANAGER
- bool "Enable compilation of partition manager modules"
+ tristate "Enable compilation of partition manager modules"
depends on MALI_ARBITRATION
default n
help
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
index e542ccf..9587c70 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -22,12 +22,22 @@
#include "backend/gpu/mali_kbase_cache_policy_backend.h"
#include <device/mali_kbase_device.h>
+
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode)
{
kbdev->current_gpu_coherency_mode = mode;
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
}
+u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
+{
+ u32 coherency_features;
+
+ coherency_features = kbase_reg_read(
+ kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES));
+
+ return coherency_features;
+}
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
index 278125a..13c79d6 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -26,12 +26,21 @@
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
/**
- * kbase_cache_set_coherency_mode() - Sets the system coherency mode
- * in the GPU.
- * @kbdev: Device pointer
- * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE
- */
+ * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+ * in the GPU.
+ * @kbdev: Device pointer
+ * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE
+ */
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode);
-#endif /* _KBASE_CACHE_POLICY_H_ */
+/**
+ * kbase_cache_get_coherency_features() - Get the coherency features
+ * in the GPU.
+ * @kbdev: Device pointer
+ *
+ * Return: Register value to be returned
+ */
+u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index 6ad0f58..d6b9750 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -26,6 +26,7 @@
#include <mali_kbase.h>
#include <mali_kbase_config_defaults.h>
#include <linux/clk.h>
+#include <linux/pm_opp.h>
#include <asm/div64.h>
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
@@ -46,7 +47,7 @@
* Return: Pointer to clk trace ops if supported or NULL.
*/
static struct kbase_clk_rate_trace_op_conf *
-get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused)
+get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
{
/* base case */
struct kbase_clk_rate_trace_op_conf *callbacks =
@@ -71,6 +72,49 @@ get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused)
return callbacks;
}
+int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev)
+{
+ /* Uses default reference frequency defined in below macro */
+ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+
+ /* Only check lowest frequency in cases when OPPs are used and
+ * present in the device tree.
+ */
+#ifdef CONFIG_PM_OPP
+ struct dev_pm_opp *opp_ptr;
+ unsigned long found_freq = 0;
+
+ /* find lowest frequency OPP */
+ opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq);
+ if (IS_ERR(opp_ptr)) {
+ dev_err(kbdev->dev,
+ "No OPPs found in device tree! Scaling timeouts using %llu kHz",
+ (unsigned long long)lowest_freq_khz);
+ } else {
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+ dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */
+#endif
+ /* convert found frequency to KHz */
+ found_freq /= 1000;
+
+ /* If lowest frequency in OPP table is still higher
+ * than the reference, then keep the reference frequency
+ * as the one to use for scaling .
+ */
+ if (found_freq < lowest_freq_khz)
+ lowest_freq_khz = found_freq;
+ }
+#else
+ dev_err(kbdev->dev,
+ "No operating-points-v2 node or operating-points property in DT");
+#endif
+
+ kbdev->lowest_gpu_freq_khz = lowest_freq_khz;
+ dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz",
+ kbdev->lowest_gpu_freq_khz);
+ return 0;
+}
+
static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
index f7ec9d1..df30b63 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -61,6 +61,21 @@ struct kbase_clk_data {
int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
/**
+ * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can
+ * run as using the device tree, and save this
+ * within kbdev.
+ *
+ * This function could be called from kbase_clk_rate_trace_manager_init,
+ * but is left separate as it can be called as soon as
+ * dev_pm_opp_of_add_table() has been called to initialize the OPP table.
+ *
+ * @kbdev: Pointer to kbase device.
+ *
+ * Return: 0 in any case.
+ */
+int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev);
+
+/**
* kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager.
*
* @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
index 11088db..7b04286 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -26,6 +26,7 @@
#include <mali_kbase.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <mali_kbase_hwaccess_gpuprops.h>
int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
@@ -146,7 +147,7 @@ int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_PRESENT_HI));
- if (WARN_ON(kbase_is_gpu_removed(kbdev)))
+ if (kbase_is_gpu_removed(kbdev))
return -EIO;
return 0;
@@ -156,30 +157,22 @@ int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
- u32 coherency_features;
+ u32 coherency_features;
+ int error = 0;
- /* Ensure we can access the GPU registers */
- kbase_pm_register_access_enable(kbdev);
+ /* Ensure we can access the GPU registers */
+ kbase_pm_register_access_enable(kbdev);
- coherency_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(COHERENCY_FEATURES));
+ coherency_features = kbase_cache_get_coherency_features(kbdev);
- if (kbase_is_gpu_removed(kbdev))
- return -EIO;
+ if (kbase_is_gpu_removed(kbdev))
+ error = -EIO;
- regdump->coherency_features = coherency_features;
+ regdump->coherency_features = coherency_features;
- /* We're done accessing the GPU registers for now. */
- kbase_pm_register_access_disable(kbdev);
- } else {
- /* Pre COHERENCY_FEATURES we only supported ACE_LITE */
- regdump->coherency_features =
- COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
- COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
- }
+ kbase_pm_register_access_disable(kbdev);
- return 0;
+ return error;
}
int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
@@ -190,13 +183,24 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
GPU_CONTROL_REG(L2_FEATURES));
u32 l2_config =
kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
+ u32 asn_hash[ASN_HASH_COUNT] = {
+ 0,
+ };
+ int i;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) {
+ for (i = 0; i < ASN_HASH_COUNT; i++)
+ asn_hash[i] = kbase_reg_read(
+ kbdev, GPU_CONTROL_REG(ASN_HASH(i)));
+ }
if (kbase_is_gpu_removed(kbdev))
return -EIO;
regdump->l2_features = l2_features;
regdump->l2_config = l2_config;
+ for (i = 0; i < ASN_HASH_COUNT; i++)
+ regdump->l2_asn_hash[i] = asn_hash[i];
}
return 0;
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index d7edf30..90cc537 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -53,6 +53,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
goto out_err;
}
+ if (kbase_is_gpu_removed(kbdev)) {
+ /* GPU has been removed by Arbiter */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ goto out_err;
+ }
+
/* Enable interrupt */
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
@@ -152,6 +158,14 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
kbdev->hwcnt.backend.triggered = 0;
+ if (kbase_is_gpu_removed(kbdev)) {
+ /* GPU has been removed by Arbiter */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+ err = 0;
+ goto out;
+ }
+
/* Disable interrupt */
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
@@ -195,6 +209,11 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
goto unlock;
}
+ if (kbase_is_gpu_removed(kbdev)) {
+ /* GPU has been removed by Arbiter */
+ goto unlock;
+ }
+
kbdev->hwcnt.backend.triggered = 0;
/* Mark that we're dumping - the PF handler can signal that we faulted
@@ -310,6 +329,11 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
KBASE_INSTR_STATE_IDLE)
goto out;
+ if (kbase_is_gpu_removed(kbdev)) {
+ /* GPU has been removed by Arbiter */
+ goto out;
+ }
+
/* Clear the counters */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index ae0377f..001efd9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -48,18 +48,13 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
int js, const u64 limited_core_mask)
{
u64 affinity;
+ bool skip_affinity_check = false;
if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
BASE_JD_REQ_T) {
- /* Tiler-only atom */
- /* If the hardware supports XAFFINITY then we'll only enable
- * the tiler (which is the default so this is a no-op),
- * otherwise enable shader core 0.
- */
- if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
- affinity = 1;
- else
- affinity = 0;
+ /* Tiler-only atom, affinity value can be programed as 0 */
+ affinity = 0;
+ skip_affinity_check = true;
} else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
@@ -89,7 +84,7 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
}
- if (unlikely(!affinity)) {
+ if (unlikely(!affinity && !skip_affinity_check)) {
#ifdef CONFIG_MALI_DEBUG
u64 shaders_ready =
kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
@@ -251,18 +246,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
(katom->core_req & BASE_JD_REQ_END_RENDERPASS))
cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
- if (kbase_hw_has_feature(kbdev,
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
- if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
- cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
- katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
- kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
- true;
- } else {
- katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
- kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
- false;
- }
+ if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+ cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+ katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = true;
+ } else {
+ katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = false;
}
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
@@ -621,25 +611,17 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
/* Mark the point where we issue the soft-stop command */
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
- if (kbase_hw_has_feature(
- kbdev,
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
- action = (target_katom->atom_flags &
- KBASE_KATOM_FLAGS_JOBCHAIN) ?
- JS_COMMAND_SOFT_STOP_1 :
- JS_COMMAND_SOFT_STOP_0;
- }
+ action = (target_katom->atom_flags &
+ KBASE_KATOM_FLAGS_JOBCHAIN) ?
+ JS_COMMAND_SOFT_STOP_1 :
+ JS_COMMAND_SOFT_STOP_0;
} else if (action == JS_COMMAND_HARD_STOP) {
target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
- if (kbase_hw_has_feature(
- kbdev,
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
- action = (target_katom->atom_flags &
- KBASE_KATOM_FLAGS_JOBCHAIN) ?
- JS_COMMAND_HARD_STOP_1 :
- JS_COMMAND_HARD_STOP_0;
- }
+ action = (target_katom->atom_flags &
+ KBASE_KATOM_FLAGS_JOBCHAIN) ?
+ JS_COMMAND_HARD_STOP_1 :
+ JS_COMMAND_HARD_STOP_0;
}
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
@@ -725,40 +707,11 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
kbase_job_slot_hardstop(kctx, i, NULL);
}
-/**
- * kbase_is_existing_atom_submitted_later_than_ready
- * @ready: sequence number of the ready atom
- * @existing: sequence number of the existing atom
- *
- * Returns true if the existing atom has been submitted later than the
- * ready atom. It is used to understand if an atom that is ready has been
- * submitted earlier than the currently running atom, so that the currently
- * running atom should be preempted to allow the ready atom to run.
- */
-static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing)
-{
- /* No seq_nr set? */
- if (!ready || !existing)
- return false;
-
- /* Efficiently handle the unlikely case of wrapping.
- * The following code assumes that the delta between the sequence number
- * of the two atoms is less than INT64_MAX.
- * In the extremely unlikely case where the delta is higher, the comparison
- * defaults for no preemption.
- * The code also assumes that the conversion from unsigned to signed types
- * works because the signed integers are 2's complement.
- */
- return (s64)(ready - existing) < 0;
-}
-
void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev;
- int js = target_katom->slot_nr;
- int priority = target_katom->sched_priority;
- int seq_nr = target_katom->seq_nr;
+ int target_js = target_katom->slot_nr;
int i;
bool stop_sent = false;
@@ -768,26 +721,21 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
lockdep_assert_held(&kbdev->hwaccess_lock);
- for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
- struct kbase_jd_atom *katom;
-
- katom = kbase_gpu_inspect(kbdev, js, i);
- if (!katom)
- continue;
+ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, target_js); i++) {
+ struct kbase_jd_atom *slot_katom;
- if ((kbdev->js_ctx_scheduling_mode ==
- KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) &&
- (katom->kctx != kctx))
+ slot_katom = kbase_gpu_inspect(kbdev, target_js, i);
+ if (!slot_katom)
continue;
- if ((katom->sched_priority > priority) ||
- (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) {
+ if (kbase_js_atom_runs_before(kbdev, target_katom, slot_katom,
+ KBASE_ATOM_ORDERING_FLAG_SEQNR)) {
if (!stop_sent)
KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
kbdev,
target_katom);
- kbase_job_slot_softstop(kbdev, js, katom);
+ kbase_job_slot_softstop(kbdev, target_js, slot_katom);
stop_sent = true;
}
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index b475d79..1906286 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -387,6 +387,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
{
lockdep_assert_held(&kbdev->hwaccess_lock);
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS,
+ katom->kctx, katom, katom->jc,
+ katom->slot_nr, katom->event_code);
kbase_gpu_release_atom(kbdev, katom, NULL);
katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
}
@@ -564,7 +567,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
kbdev->protected_mode_transition = true;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
/* See if we can get away with disabling hwcnt atomically */
kbdev->protected_mode_hwcnt_desired = false;
@@ -607,7 +610,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
kbase_pm_update_cores_state_nolock(kbdev);
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
/* Avoid unnecessary waiting on non-ACE platforms. */
if (kbdev->system_coherency == COHERENCY_ACE) {
@@ -638,7 +641,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
/*
* When entering into protected mode, we must ensure that the
@@ -671,7 +674,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
return -EAGAIN;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
/*
@@ -742,7 +745,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
kbase_pm_update_cores_state_nolock(kbdev);
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
/*
@@ -755,7 +758,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_EXIT_PROTECTED_RESET;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_RESET:
/* Issue the reset to the GPU */
err = kbase_gpu_protected_mode_reset(kbdev);
@@ -797,7 +800,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
/* A GPU reset is issued when exiting protected mode. Once the
* reset is done all atoms' state will also be reset. For this
@@ -854,7 +857,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
if (kbase_gpu_check_secure_atoms(kbdev,
!kbase_jd_katom_is_protected(
@@ -874,7 +877,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
/*
@@ -909,7 +912,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
if (katom[idx]->will_fail_event_code) {
kbase_gpu_mark_atom_for_return(kbdev,
@@ -936,6 +939,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
if (katom[idx]->event_code ==
BASE_JD_EVENT_PM_EVENT) {
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(
+ kbdev, JM_MARK_FOR_RETURN_TO_JS,
+ katom[idx]->kctx, katom[idx],
+ katom[idx]->jc, js,
+ katom[idx]->event_code);
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_RETURN_TO_JS;
break;
@@ -948,7 +956,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_READY;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_READY:
if (idx == 1) {
@@ -994,7 +1002,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
KBASE_ATOM_GPU_RB_SUBMITTED;
/* ***TRANSITION TO HIGHER STATE*** */
- /* fallthrough */
+ fallthrough;
case KBASE_ATOM_GPU_RB_SUBMITTED:
/* Inform power management at start/finish of
@@ -1037,9 +1045,55 @@ void kbase_backend_run_atom(struct kbase_device *kbdev,
kbase_backend_slot_update(kbdev);
}
-#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
- (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+/**
+ * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer
+ * might depend on another from the same kctx
+ * @katom_a: dependee atom
+ * @katom_b: atom to query
+ *
+ * This can be used on atoms that belong to different slot ringbuffers
+ *
+ * Return: true if @katom_b might depend on @katom_a, false if it cannot depend.
+ */
+static inline bool
+kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
+ const struct kbase_jd_atom *katom_b)
+{
+ if (katom_a->kctx != katom_b->kctx)
+ return false;
+ return (katom_b->pre_dep ||
+ (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED |
+ KBASE_KATOM_FLAG_FAIL_BLOCKER)));
+}
+/**
+ * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
+ * related to a failed JSn_HEAD atom
+ * @kbdev kbase device
+ * @js job slot to check
+ * @completion_code completion code of the failed atom
+ *
+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
+ * unlike other failure codes we _can_ re-run them.
+ *
+ * This forms step 1 in a 2-step process of removing any related atoms from a
+ * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have
+ * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0).
+ *
+ * This step only removes the atoms from the HW, and marks them as
+ * (potentially) ready to run again.
+ *
+ * Step 2 is on marking the JSn_HEAD atom as complete
+ * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS
+ * as appropriate, or re-submit them.
+ *
+ * Hence, this function must evict at a minimum the atoms related to the atom
+ * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable
+ * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as
+ * the next kbase_backend_slot_update() will resubmit any remaining.
+ *
+ * Return: true if an atom was evicted, false otherwise.
+ */
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
u32 completion_code)
{
@@ -1051,14 +1105,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
katom = kbase_gpu_inspect(kbdev, js, 0);
next_katom = kbase_gpu_inspect(kbdev, js, 1);
- if (next_katom && katom->kctx == next_katom->kctx &&
- next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
- (HAS_DEP(next_katom) || next_katom->sched_priority ==
- katom->sched_priority) &&
- (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
- != 0 ||
- kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
- != 0)) {
+ if (next_katom &&
+ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+ (kbase_rb_atom_might_depend(katom, next_katom) ||
+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) &&
+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 ||
+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) {
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
JS_COMMAND_NOP);
next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
@@ -1083,6 +1135,30 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
return false;
}
+/**
+ * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD
+ * @kbdev kbase device
+ * @js job slot to check
+ * @completion_code completion code of the completed atom
+ * @job_tail value read from JSn_TAIL, for STOPPED atoms
+ * @end_timestamp pointer to approximate ktime value when the katom completed
+ *
+ * Among other operations, this also executes step 2 of a 2-step process of
+ * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1),
+ * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index
+ * 0). The first step is done in kbase_gpu_irq_evict().
+ *
+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
+ * unlike other failure codes we _can_ re-run them.
+ *
+ * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue
+ * and return to the JS some (usually all) of the atoms evicted from the HW
+ * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an
+ * atom, that atom must not have been running or must already be evicted, as
+ * otherwise we would be in the incorrect state of having an atom both running
+ * on the HW and returned to the JS.
+ */
+
void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
u32 completion_code,
u64 job_tail,
@@ -1133,9 +1209,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
* registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
* the atoms on this slot are returned in the correct order.
*/
- if (next_katom && katom->kctx == next_katom->kctx &&
- next_katom->sched_priority ==
- katom->sched_priority) {
+ if (next_katom &&
+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) {
WARN_ON(next_katom->gpu_rb_state ==
KBASE_ATOM_GPU_RB_SUBMITTED);
kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
@@ -1145,12 +1220,14 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
int i;
- if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+ if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
js, completion_code,
kbase_gpu_exception_name(
completion_code));
+ }
+
#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0
KBASE_KTRACE_DUMP(kbdev);
#endif
@@ -1168,18 +1245,17 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *katom_idx1 =
kbase_gpu_inspect(kbdev, i, 1);
- if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
- HAS_DEP(katom_idx0) &&
- katom_idx0->gpu_rb_state !=
- KBASE_ATOM_GPU_RB_SUBMITTED) {
+ if (katom_idx0 &&
+ kbase_rb_atom_might_depend(katom, katom_idx0) &&
+ katom_idx0->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
/* Dequeue katom_idx0 from ringbuffer */
kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
- if (katom_idx1 &&
- katom_idx1->kctx == katom->kctx
- && HAS_DEP(katom_idx1) &&
- katom_idx0->gpu_rb_state !=
- KBASE_ATOM_GPU_RB_SUBMITTED) {
+ if (katom_idx1 && kbase_rb_atom_might_depend(
+ katom, katom_idx1) &&
+ katom_idx0->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
/* Dequeue katom_idx1 from ringbuffer */
kbase_gpu_dequeue_atom(kbdev, i,
end_timestamp);
@@ -1192,11 +1268,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
kbase_jm_return_atom_to_js(kbdev, katom_idx0);
- } else if (katom_idx1 &&
- katom_idx1->kctx == katom->kctx &&
- HAS_DEP(katom_idx1) &&
- katom_idx1->gpu_rb_state !=
- KBASE_ATOM_GPU_RB_SUBMITTED) {
+ } else if (katom_idx1 && kbase_rb_atom_might_depend(
+ katom, katom_idx1) &&
+ katom_idx1->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
/* Can not dequeue this atom yet - will be
* dequeued when atom at idx0 completes
*/
@@ -1369,17 +1444,63 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
kbase_pm_protected_override_disable(kbdev);
}
+/**
+ * should_stop_next_atom - given a soft/hard stop action, determine if the next
+ * atom on a slot should be stopped
+ * @kbdev: kbase devices
+ * @head_katom: atom currently in the JSn_HEAD
+ * @next_katom: atom currently in the JSn_HEAD_NEXT
+ * @action: JS_COMMAND_<...> action for soft/hard-stop
+ *
+ * This is used in cases where @head_katom is the target of the soft/hard-stop.
+ * It only makes sense to call this when @head_katom and @next_katom are from
+ * the same slot.
+ *
+ * Return: true if @next_katom should also be stopped with the given action,
+ * false otherwise
+ */
+static bool should_stop_next_atom(struct kbase_device *kbdev,
+ const struct kbase_jd_atom *head_katom,
+ const struct kbase_jd_atom *next_katom,
+ u32 action)
+{
+ bool ret = false;
+ u32 hw_action = action & JS_COMMAND_MASK;
+
+ switch (hw_action) {
+ case JS_COMMAND_SOFT_STOP:
+ ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom,
+ 0u);
+ break;
+ case JS_COMMAND_HARD_STOP:
+ /* Unlike soft-stop, a hard-stop targeting a particular atom
+ * should not cause atoms from unrelated contexts to be
+ * removed
+ */
+ ret = (head_katom->kctx == next_katom->kctx);
+ break;
+ default:
+ /* Other stop actions are possible, but the driver should not
+ * be generating them at this point in the call chain
+ */
+ WARN(1, "Unexpected stop action: 0x%.8x", hw_action);
+ break;
+ }
+ return ret;
+}
+
static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
int js,
struct kbase_jd_atom *katom,
u32 action)
{
+ struct kbase_context *kctx = katom->kctx;
u32 hw_action = action & JS_COMMAND_MASK;
kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
katom->core_req, katom);
- katom->kctx->blocked_js[js][katom->sched_priority] = true;
+ kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority);
}
static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
@@ -1387,11 +1508,14 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
u32 action,
bool disjoint)
{
+ struct kbase_context *kctx = katom->kctx;
+
lockdep_assert_held(&kbdev->hwaccess_lock);
katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_gpu_mark_atom_for_return(kbdev, katom);
- katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+ kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
+ katom->sched_priority);
if (disjoint)
kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
@@ -1419,7 +1543,9 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
u32 action)
{
struct kbase_jd_atom *katom_idx0;
+ struct kbase_context *kctx_idx0 = NULL;
struct kbase_jd_atom *katom_idx1;
+ struct kbase_context *kctx_idx1 = NULL;
bool katom_idx0_valid, katom_idx1_valid;
@@ -1433,30 +1559,32 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
- if (katom_idx0)
+ if (katom_idx0) {
+ kctx_idx0 = katom_idx0->kctx;
prio_idx0 = katom_idx0->sched_priority;
- if (katom_idx1)
+ }
+ if (katom_idx1) {
+ kctx_idx1 = katom_idx1->kctx;
prio_idx1 = katom_idx1->sched_priority;
+ }
if (katom) {
katom_idx0_valid = (katom_idx0 == katom);
- /* If idx0 is to be removed and idx1 is on the same context,
- * then idx1 must also be removed otherwise the atoms might be
- * returned out of order
- */
if (katom_idx1)
- katom_idx1_valid = (katom_idx1 == katom) ||
- (katom_idx0_valid &&
- (katom_idx0->kctx ==
- katom_idx1->kctx));
+ katom_idx1_valid = (katom_idx1 == katom);
else
katom_idx1_valid = false;
} else {
- katom_idx0_valid =
- (katom_idx0 && (!kctx || katom_idx0->kctx == kctx));
- katom_idx1_valid =
- (katom_idx1 && (!kctx || katom_idx1->kctx == kctx));
+ katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx));
+ katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx));
}
+ /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided
+ * to stop, but we're stopping the JSn_HEAD atom, see if they are
+ * related/ordered in some way that would require the same stop action
+ */
+ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1)
+ katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0,
+ katom_idx1, action);
if (katom_idx0_valid)
stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
@@ -1472,14 +1600,15 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
katom_idx1->event_code =
BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_jm_return_atom_to_js(kbdev, katom_idx1);
- katom_idx1->kctx->blocked_js[js][prio_idx1] =
- true;
+ kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js,
+ prio_idx1);
}
katom_idx0->event_code =
BASE_JD_EVENT_REMOVED_FROM_NEXT;
kbase_jm_return_atom_to_js(kbdev, katom_idx0);
- katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+ kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js,
+ prio_idx0);
} else {
/* katom_idx0 is on GPU */
if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index cc791df..5df7f67 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -32,6 +32,9 @@
#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
+#else
+#include <linux/pm_runtime.h>
+#include <mali_kbase_reset_gpu.h>
#endif /* !MALI_USE_CSF */
#include <mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@@ -69,6 +72,10 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)
callbacks->power_runtime_idle_callback;
kbdev->pm.backend.callback_soft_reset =
callbacks->soft_reset_callback;
+ kbdev->pm.backend.callback_power_runtime_gpu_idle =
+ callbacks->power_runtime_gpu_idle_callback;
+ kbdev->pm.backend.callback_power_runtime_gpu_active =
+ callbacks->power_runtime_gpu_active_callback;
if (callbacks->power_runtime_init_callback)
return callbacks->power_runtime_init_callback(kbdev);
@@ -86,6 +93,8 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)
kbdev->pm.backend.callback_power_runtime_off = NULL;
kbdev->pm.backend.callback_power_runtime_idle = NULL;
kbdev->pm.backend.callback_soft_reset = NULL;
+ kbdev->pm.backend.callback_power_runtime_gpu_idle = NULL;
+ kbdev->pm.backend.callback_power_runtime_gpu_active = NULL;
return 0;
}
@@ -120,10 +129,10 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev)
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+ kbdev->pm.backend.gpu_powered = false;
+
if (callbacks)
callbacks->power_off_callback(kbdev);
-
- kbdev->pm.backend.gpu_powered = false;
}
int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
@@ -193,6 +202,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
kbase_pm_hwcnt_disable_worker);
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
kbdev->pm.backend.l2_always_on = false;
kbdev->pm.backend.gpu_clock_slow_down_wa = false;
@@ -263,6 +273,76 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
*/
}
+static void pm_handle_power_off(struct kbase_device *kbdev)
+{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+#if MALI_USE_CSF
+ enum kbase_mcu_state mcu_state;
+#endif
+ unsigned long flags;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ if (backend->poweron_required)
+ return;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ if (kbdev->pm.backend.gpu_wakeup_override ) {
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return;
+ }
+#endif
+ WARN_ON(backend->shaders_state !=
+ KBASE_SHADERS_OFF_CORESTACK_OFF ||
+ backend->l2_state != KBASE_L2_OFF);
+#if MALI_USE_CSF
+ mcu_state = backend->mcu_state;
+ WARN_ON(!kbase_pm_is_mcu_inactive(kbdev, mcu_state));
+#endif
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ if (backend->callback_power_runtime_gpu_idle) {
+ WARN_ON(backend->gpu_idled);
+ backend->callback_power_runtime_gpu_idle(kbdev);
+ backend->gpu_idled = true;
+ return;
+ }
+#endif
+
+ /* Disable interrupts and turn the clock off */
+ if (!kbase_pm_clock_off(kbdev)) {
+ /*
+ * Page/bus faults are pending, must drop locks to
+ * process. Interrupts are disabled so no more faults
+ * should be generated at this point.
+ */
+ kbase_pm_unlock(kbdev);
+ kbase_flush_mmu_wqs(kbdev);
+ kbase_pm_lock(kbdev);
+
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ /* poweron_required may have changed while pm lock
+ * was released.
+ */
+ if (kbase_pm_is_gpu_lost(kbdev))
+ backend->poweron_required = false;
+#endif
+
+ /* Turn off clock now that fault have been handled. We
+ * dropped locks so poweron_required may have changed -
+ * power back on if this is the case (effectively only
+ * re-enabling of the interrupts would be done in this
+ * case, as the clocks to GPU were not withdrawn yet).
+ */
+ if (backend->poweron_required)
+ kbase_pm_clock_on(kbdev, false);
+ else
+ WARN_ON(!kbase_pm_clock_off(kbdev));
+ }
+}
+
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
{
struct kbase_device *kbdev = container_of(data, struct kbase_device,
@@ -271,6 +351,8 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
struct kbase_pm_backend_data *backend = &pm->backend;
unsigned long flags;
+ KBASE_KTRACE_ADD(kbdev, PM_POWEROFF_WAIT_WQ, NULL, 0);
+
#if !MALI_USE_CSF
/* Wait for power transitions to complete. We do this with no locks held
* so that we don't deadlock with any pending workqueues.
@@ -285,46 +367,7 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
backend->poweron_required = false;
#endif
- if (!backend->poweron_required) {
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- WARN_ON(backend->shaders_state !=
- KBASE_SHADERS_OFF_CORESTACK_OFF ||
- backend->l2_state != KBASE_L2_OFF);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- /* Disable interrupts and turn the clock off */
- if (!kbase_pm_clock_off(kbdev)) {
- /*
- * Page/bus faults are pending, must drop locks to
- * process. Interrupts are disabled so no more faults
- * should be generated at this point.
- */
- kbase_pm_unlock(kbdev);
- kbase_flush_mmu_wqs(kbdev);
- kbase_pm_lock(kbdev);
-
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
- /* poweron_required may have changed while pm lock
- * was released.
- */
- if (kbase_pm_is_gpu_lost(kbdev))
- backend->poweron_required = false;
-#endif
-
- /* Turn off clock now that fault have been handled. We
- * dropped locks so poweron_required may have changed -
- * power back on if this is the case (effectively only
- * re-enabling of the interrupts would be done in this
- * case, as the clocks to GPU were not withdrawn yet).
- */
- if (backend->poweron_required)
- kbase_pm_clock_on(kbdev, false);
- else
- WARN_ON(!kbase_pm_clock_off(kbdev));
- }
- }
+ pm_handle_power_off(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->poweroff_wait_in_progress = false;
@@ -512,6 +555,74 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+/**
+ * kbase_pm_do_poweroff_sync - Do the synchronous power down of GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is called at the time of system suspend or device unload
+ * to power down the GPU synchronously. This is needed as the power down of GPU
+ * would usually happen from the runtime suspend callback function (if gpu_active
+ * and gpu_idle callbacks are used) and runtime suspend operation is disabled
+ * when system suspend takes place.
+ * The function first waits for the @gpu_poweroff_wait_work to complete, which
+ * could have been enqueued after the last PM reference was released.
+ */
+static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
+{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+ unsigned long flags;
+
+ WARN_ON(kbdev->pm.active_count);
+
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
+
+ kbase_pm_lock(kbdev);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(backend->poweroff_wait_in_progress);
+ if (backend->gpu_powered) {
+ int ret;
+
+ backend->mcu_desired = false;
+ backend->l2_desired = false;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+ if (ret) {
+ dev_warn(kbdev->dev, "Wait failed on synchronous power off");
+ kbase_pm_unlock(kbdev);
+ /* Wait for the completion of reset, triggered due to
+ * the previous failure.
+ */
+ kbase_reset_gpu_wait(kbdev);
+ /* Wait again for the poweroff work which could have
+ * been enqueued by the GPU reset worker.
+ */
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ kbase_pm_lock(kbdev);
+ }
+
+ /* Due to the power policy, GPU could have been kept active
+ * throughout and so need to invoke the idle callback before
+ * the power down.
+ */
+ if (backend->callback_power_runtime_gpu_idle &&
+ !backend->gpu_idled) {
+ backend->callback_power_runtime_gpu_idle(kbdev);
+ backend->gpu_idled = true;
+ }
+
+ kbase_pm_clock_off(kbdev);
+ } else {
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+
+ kbase_pm_unlock(kbdev);
+}
+#endif
+
void kbase_pm_do_poweroff(struct kbase_device *kbdev)
{
unsigned long flags;
@@ -561,12 +672,31 @@ static bool is_poweroff_in_progress(struct kbase_device *kbdev)
return ret;
}
-void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
{
wait_event_killable(kbdev->pm.backend.poweroff_wait,
is_poweroff_in_progress(kbdev));
}
-KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_complete);
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
+
+static bool is_gpu_powered_down(struct kbase_device *kbdev)
+{
+ bool ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ ret = !kbdev->pm.backend.gpu_powered;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return ret;
+}
+
+void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev)
+{
+ wait_event_killable(kbdev->pm.backend.poweroff_wait,
+ is_gpu_powered_down(kbdev));
+}
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_gpu_power_down);
int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
unsigned int flags)
@@ -612,6 +742,15 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
* cores off
*/
kbdev->pm.active_count = 1;
+#if MALI_USE_CSF && KBASE_PM_RUNTIME
+ if (kbdev->pm.backend.callback_power_runtime_gpu_active) {
+ /* Take the RPM reference count to match with the internal
+ * PM reference count
+ */
+ kbdev->pm.backend.callback_power_runtime_gpu_active(kbdev);
+ WARN_ON(kbdev->pm.backend.gpu_idled);
+ }
+#endif
spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
@@ -653,11 +792,15 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ kbase_pm_do_poweroff_sync(kbdev);
+#else
mutex_lock(&kbdev->pm.lock);
kbase_pm_do_poweroff(kbdev);
mutex_unlock(&kbdev->pm.lock);
- kbase_pm_wait_for_poweroff_complete(kbdev);
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
+#endif
}
KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
@@ -761,6 +904,9 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
{
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ kbase_pm_do_poweroff_sync(kbdev);
+#else
/* Force power off the GPU and all cores (regardless of policy), only
* after the PM active count reaches zero (otherwise, we risk turning it
* off prematurely)
@@ -775,7 +921,11 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
kbase_pm_unlock(kbdev);
- kbase_pm_wait_for_poweroff_complete(kbdev);
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
+#endif
+
+ WARN_ON(kbdev->pm.backend.gpu_powered);
+ WARN_ON(atomic_read(&kbdev->faults_pending));
if (kbdev->pm.backend.callback_power_suspend)
kbdev->pm.backend.callback_power_suspend(kbdev);
@@ -844,9 +994,12 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
/* Cancel any pending HWC dumps */
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
- kbdev->hwcnt.backend.triggered = 1;
- wake_up(&kbdev->hwcnt.backend.wait);
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING ||
+ kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+ }
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
mutex_unlock(&arb_vm_state->vm_state_lock);
@@ -854,3 +1007,208 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
}
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ /* Set the override flag to force the power up of L2 cache */
+ kbdev->pm.backend.gpu_wakeup_override = true;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return kbase_pm_wait_for_desired_state(kbdev);
+}
+
+static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ int ret;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ /* In case of no active CSG on slot, powering up L2 could be skipped and
+ * proceed directly to suspend GPU.
+ * ToDo: firmware has to be reloaded after wake-up as no halt command
+ * has been sent when GPU was put to sleep mode.
+ */
+ if (!kbase_csf_scheduler_get_nr_active_csgs(kbdev))
+ dev_info(
+ kbdev->dev,
+ "No active CSGs. Can skip the power up of L2 and go for suspension directly");
+
+ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
+ if (ret) {
+ dev_warn(kbdev->dev, "Wait for MCU wake up failed on runtime suspend");
+ return ret;
+ }
+
+ /* Check if a Doorbell mirror interrupt occurred meanwhile */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (kbdev->pm.backend.gpu_sleep_mode_active &&
+ kbdev->pm.backend.exit_gpu_sleep_mode) {
+ dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend after L2 power up");
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return -EBUSY;
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ /* Need to release the kbdev->pm.lock to avoid lock ordering issue
+ * with kctx->reg.lock, which is taken if the sync wait condition is
+ * evaluated after the CSG suspend operation.
+ */
+ kbase_pm_unlock(kbdev);
+ ret = kbase_csf_scheduler_handle_runtime_suspend(kbdev);
+ kbase_pm_lock(kbdev);
+
+ /* Power down L2 cache */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* After re-acquiring the kbdev->pm.lock, check if the device
+ * became active (or active then idle) meanwhile.
+ */
+ if (kbdev->pm.active_count ||
+ kbdev->pm.backend.poweroff_wait_in_progress) {
+ dev_dbg(kbdev->dev,
+ "Device became active on runtime suspend after suspending Scheduler");
+ ret = -EBUSY;
+ }
+
+ if (ret)
+ return ret;
+
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+ if (ret)
+ dev_warn(kbdev->dev, "Wait for power down failed on runtime suspend");
+
+ return ret;
+}
+
+int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev)
+{
+ enum kbase_mcu_state mcu_state;
+ bool exit_early = false;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ /* This check is needed for the case where Kbase had invoked the
+ * @power_off_callback directly.
+ */
+ if (!kbdev->pm.backend.gpu_powered) {
+ dev_dbg(kbdev->dev, "GPU already powered down on runtime suspend");
+ exit_early = true;
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (exit_early)
+ goto out;
+
+ ret = kbase_reset_gpu_try_prevent(kbdev);
+ if (ret == -ENOMEM) {
+ dev_dbg(kbdev->dev, "Quit runtime suspend as GPU is in bad state");
+ /* Finish the runtime suspend, no point in trying again as GPU is
+ * in irrecoverable bad state.
+ */
+ goto out;
+ } else if (ret) {
+ dev_dbg(kbdev->dev, "Quit runtime suspend for failing to prevent gpu reset");
+ ret = -EBUSY;
+ goto out;
+ }
+
+ kbase_csf_scheduler_lock(kbdev);
+ kbase_pm_lock(kbdev);
+
+ /*
+ * This is to handle the case where GPU device becomes active and idle
+ * very quickly whilst the runtime suspend callback is executing.
+ * This is useful for the following scenario :-
+ * - GPU goes idle and pm_callback_runtime_gpu_idle() is called.
+ * - Auto-suspend timer expires and kbase_device_runtime_suspend()
+ * is called.
+ * - GPU becomes active and pm_callback_runtime_gpu_active() calls
+ * pm_runtime_get().
+ * - Shortly after that GPU becomes idle again.
+ * - kbase_pm_handle_runtime_suspend() gets called.
+ * - pm_callback_runtime_gpu_idle() is called.
+ *
+ * We do not want to power down the GPU immediately after it goes idle.
+ * So if we notice that GPU had become active when the runtime suspend
+ * had already kicked in, we abort the runtime suspend.
+ * By aborting the runtime suspend, we defer the power down of GPU.
+ *
+ * This check also helps prevent warnings regarding L2 and MCU states
+ * inside the pm_handle_power_off() function. The warning stems from
+ * the fact that pm.lock is released before invoking Scheduler function
+ * to suspend the CSGs.
+ */
+ if (kbdev->pm.active_count ||
+ kbdev->pm.backend.poweroff_wait_in_progress) {
+ dev_dbg(kbdev->dev, "Device became active on runtime suspend");
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (kbdev->pm.backend.gpu_sleep_mode_active &&
+ kbdev->pm.backend.exit_gpu_sleep_mode) {
+ dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend before L2 power up");
+ ret = -EBUSY;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ goto unlock;
+ }
+
+ mcu_state = kbdev->pm.backend.mcu_state;
+ WARN_ON(!kbase_pm_is_mcu_inactive(kbdev, mcu_state));
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (mcu_state == KBASE_MCU_IN_SLEEP) {
+ ret = pm_handle_mcu_sleep_on_runtime_suspend(kbdev);
+ if (ret)
+ goto unlock;
+ }
+
+ /* Disable interrupts and turn off the GPU clocks */
+ if (!kbase_pm_clock_off(kbdev)) {
+ dev_warn(kbdev->dev, "Failed to turn off GPU clocks on runtime suspend, MMU faults pending");
+
+ WARN_ON(!kbdev->poweroff_pending);
+ /* Previous call to kbase_pm_clock_off() would have disabled
+ * the interrupts and also synchronized with the interrupt
+ * handlers, so more fault work items can't be enqueued.
+ *
+ * Can't wait for the completion of MMU fault work items as
+ * there is a possibility of a deadlock since the fault work
+ * items would do the group termination which requires the
+ * Scheduler lock.
+ */
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ wake_up(&kbdev->pm.backend.poweroff_wait);
+ WARN_ON(kbdev->pm.backend.gpu_powered);
+ dev_dbg(kbdev->dev, "GPU power down complete");
+
+unlock:
+ kbase_pm_unlock(kbdev);
+ kbase_csf_scheduler_unlock(kbdev);
+ kbase_reset_gpu_allow(kbdev);
+out:
+ if (ret) {
+ ret = -EBUSY;
+ pm_runtime_mark_last_busy(kbdev->dev);
+ }
+
+ return ret;
+}
+#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index d9d3aa3..52877f5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -29,6 +29,10 @@
#include "mali_kbase_pm_always_on.h"
#include "mali_kbase_pm_coarse_demand.h"
+#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM)
+#define KBASE_PM_RUNTIME 1
+#endif
+
/* Forward definition - see mali_kbase.h */
struct kbase_device;
struct kbase_jd_atom;
@@ -271,10 +275,18 @@ union kbase_pm_policy_data {
* &struct kbase_pm_callback_conf
* @callback_power_runtime_off: Callback when the GPU may be turned off. See
* &struct kbase_pm_callback_conf
- * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
- * &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback invoked by runtime PM core
+ * when the GPU may be idle. See
+ * &struct kbase_pm_callback_conf
* @callback_soft_reset: Optional callback to software reset the GPU. See
* &struct kbase_pm_callback_conf
+ * @callback_power_runtime_gpu_idle: Callback invoked by Kbase when GPU has
+ * become idle.
+ * See &struct kbase_pm_callback_conf.
+ * @callback_power_runtime_gpu_active: Callback when GPU has become active and
+ * @callback_power_runtime_gpu_idle was
+ * called previously.
+ * See &struct kbase_pm_callback_conf.
* @ca_cores_enabled: Cores that are currently available
* @mcu_state: The current state of the micro-control unit, only applicable
* to GPUs that have such a component
@@ -312,6 +324,34 @@ union kbase_pm_policy_data {
* @policy_change_lock: Used to serialize the policy change calls. In CSF case,
* the change of policy may involve the scheduler to
* suspend running CSGs and then reconfigure the MCU.
+ * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be
+ * supported by the kernel driver or not. If this
+ * flag is not set, then HW state is directly saved
+ * when GPU idle notification is received.
+ * @gpu_sleep_mode_active: Flag to indicate that the GPU needs to be in sleep
+ * mode. It is set when the GPU idle notification is
+ * received and is cleared when HW state has been
+ * saved in the runtime suspend callback function or
+ * when the GPU power down is aborted if GPU became
+ * active whilst it was in sleep mode. The flag is
+ * guarded with hwaccess_lock spinlock.
+ * @exit_gpu_sleep_mode: Flag to indicate the GPU can now exit the sleep
+ * mode due to the submission of work from Userspace.
+ * The flag is guarded with hwaccess_lock spinlock.
+ * The @gpu_sleep_mode_active flag is not immediately
+ * reset when this flag is set, this is to ensure that
+ * MCU doesn't gets disabled undesirably without the
+ * suspend of CSGs. That could happen when
+ * scheduler_pm_active() and scheduler_pm_idle() gets
+ * called before the Scheduler gets reactivated.
+ * @gpu_idled: Flag to ensure that the gpu_idle & gpu_active callbacks are
+ * always called in pair. The flag is guarded with pm.lock mutex.
+ * @gpu_wakeup_override: Flag to force the power up of L2 cache & reactivation
+ * of MCU. This is set during the runtime suspend
+ * callback function, when GPU needs to exit the sleep
+ * mode for the saving the HW state before power down.
+ * @db_mirror_interrupt_enabled: Flag tracking if the Doorbell mirror interrupt
+ * is enabled or not.
* @in_reset: True if a GPU is resetting and normal power manager operation is
* suspended
* @partial_shaderoff: True if we want to partial power off shader cores,
@@ -398,6 +438,8 @@ struct kbase_pm_backend_data {
void (*callback_power_runtime_off)(struct kbase_device *kbdev);
int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
int (*callback_soft_reset)(struct kbase_device *kbdev);
+ void (*callback_power_runtime_gpu_idle)(struct kbase_device *kbdev);
+ void (*callback_power_runtime_gpu_active)(struct kbase_device *kbdev);
u64 ca_cores_enabled;
@@ -413,6 +455,15 @@ struct kbase_pm_backend_data {
bool policy_change_clamp_state_to_off;
unsigned int csf_pm_sched_flags;
struct mutex policy_change_lock;
+
+#ifdef KBASE_PM_RUNTIME
+ bool gpu_sleep_supported;
+ bool gpu_sleep_mode_active;
+ bool exit_gpu_sleep_mode;
+ bool gpu_idled;
+ bool gpu_wakeup_override;
+ bool db_mirror_interrupt_enabled;
+#endif
#endif
bool l2_desired;
bool l2_always_on;
@@ -420,11 +471,13 @@ struct kbase_pm_backend_data {
bool in_reset;
+#if !MALI_USE_CSF
bool partial_shaderoff;
bool protected_entry_transition_override;
bool protected_transition_override;
int protected_l2_override;
+#endif
bool hwcnt_desired;
bool hwcnt_disabled;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index bcada93..d65c684 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -40,6 +40,7 @@
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_hwcnt_context.h>
+#include <mali_kbase_pbha.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -104,9 +105,15 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
if (unlikely(!kbdev->csf.firmware_inited))
return false;
- if (kbdev->csf.scheduler.pm_active_count)
+ if (kbdev->csf.scheduler.pm_active_count &&
+ kbdev->pm.backend.mcu_desired)
return true;
+#ifdef KBASE_PM_RUNTIME
+ if (kbdev->pm.backend.gpu_wakeup_override)
+ return true;
+#endif
+
/* MCU is supposed to be ON, only when scheduler.pm_active_count is
* non zero. But for always_on policy, the MCU needs to be kept on,
* unless policy changing transition needs it off.
@@ -120,6 +127,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
bool kbase_pm_is_l2_desired(struct kbase_device *kbdev)
{
+#if !MALI_USE_CSF
if (kbdev->pm.backend.protected_entry_transition_override)
return false;
@@ -130,15 +138,19 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev)
if (kbdev->pm.backend.protected_transition_override &&
!kbdev->pm.backend.shaders_desired)
return false;
-
-#if MALI_USE_CSF
- if (kbdev->pm.backend.policy_change_clamp_state_to_off)
+#else
+ if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off))
return false;
+
+ /* Power up the L2 cache only when MCU is desired */
+ if (likely(kbdev->csf.firmware_inited))
+ return kbase_pm_is_mcu_desired(kbdev);
#endif
return kbdev->pm.backend.l2_desired;
}
+#if !MALI_USE_CSF
void kbase_pm_protected_override_enable(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -204,6 +216,7 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override)
kbase_pm_update_state(kbdev);
}
+#endif
/**
* core_type_to_reg - Decode a core type and action to a register.
@@ -259,9 +272,8 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev)
* to be called from.
*/
- kbase_reg_write(kbdev,
- GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CACHE_CLN_INV_L2);
raw = kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
@@ -610,6 +622,35 @@ static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbd
return (core_mask_update || timer_update);
}
+bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev,
+ enum kbase_mcu_state state)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ return ((state == KBASE_MCU_OFF) || (state == KBASE_MCU_IN_SLEEP));
+}
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on
+ * MCU side
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called to re-enable the Doorbell notification on MCU side
+ * when MCU needs to beome active again.
+ */
+static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
+{
+ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_CONTROL));
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK;
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val);
+}
+#endif
+
static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -618,12 +659,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->hwaccess_lock);
/*
- * Initial load of firmare should have been done to
+ * Initial load of firmware should have been done to
* exercise the MCU state machine.
*/
if (unlikely(!kbdev->csf.firmware_inited)) {
WARN_ON(backend->mcu_state != KBASE_MCU_OFF);
- return -EIO;
+ return 0;
}
do {
@@ -770,8 +811,15 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
if (!backend->hwcnt_disabled)
kbase_pm_trigger_hwcnt_disable(kbdev);
- if (backend->hwcnt_disabled)
- backend->mcu_state = KBASE_MCU_ON_HALT;
+
+ if (backend->hwcnt_disabled) {
+#ifdef KBASE_PM_RUNTIME
+ if (backend->gpu_sleep_mode_active)
+ backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE;
+ else
+#endif
+ backend->mcu_state = KBASE_MCU_ON_HALT;
+ }
break;
case KBASE_MCU_ON_HALT:
@@ -816,7 +864,32 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
kbase_csf_firmware_disable_mcu_wait(kbdev);
backend->mcu_state = KBASE_MCU_OFF;
break;
+#ifdef KBASE_PM_RUNTIME
+ case KBASE_MCU_ON_SLEEP_INITIATE:
+ if (!kbase_pm_is_mcu_desired(kbdev)) {
+ kbase_csf_firmware_trigger_mcu_sleep(kbdev);
+ backend->mcu_state = KBASE_MCU_ON_PEND_SLEEP;
+ } else
+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+ break;
+
+ case KBASE_MCU_ON_PEND_SLEEP:
+ if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) {
+ backend->mcu_state = KBASE_MCU_IN_SLEEP;
+ kbase_pm_enable_db_mirror_interrupt(kbdev);
+ kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
+ }
+ break;
+ case KBASE_MCU_IN_SLEEP:
+ if (kbase_pm_is_mcu_desired(kbdev) &&
+ backend->l2_state == KBASE_L2_ON) {
+ kbase_pm_enable_mcu_db_notification(kbdev);
+ kbase_pm_disable_db_mirror_interrupt(kbdev);
+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+ }
+ break;
+#endif
case KBASE_MCU_RESET_WAIT:
/* Reset complete */
if (!backend->in_reset)
@@ -889,8 +962,24 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
#endif
backend->shaders_state =
KBASE_SHADERS_OFF_CORESTACK_OFF;
- backend->l2_state = KBASE_L2_OFF;
- dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n");
+ backend->hwcnt_desired = false;
+ if (!backend->hwcnt_disabled) {
+ /* Don't progress until hw counters are disabled
+ * This may involve waiting for a worker to complete.
+ * The HW counters backend disable code checks for the
+ * GPU removed case and will error out without touching
+ * the hardware. This step is needed to keep the HW
+ * counters in a consistent state after a GPU lost.
+ */
+ backend->l2_state =
+ KBASE_L2_ON_HWCNT_DISABLE;
+ kbase_pm_trigger_hwcnt_disable(kbdev);
+ }
+
+ if (backend->hwcnt_disabled) {
+ backend->l2_state = KBASE_L2_OFF;
+ dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n");
+ }
break;
}
@@ -911,6 +1000,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
* powering it on
*/
kbase_pm_l2_config_override(kbdev);
+ kbase_pbha_write_settings(kbdev);
#if !MALI_USE_CSF
/* L2 is required, power on. Powering on the
* tiler will also power the first L2 cache.
@@ -1027,7 +1117,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
break;
#else
/* Do not power off L2 until the MCU has been stopped */
- if (backend->mcu_state != KBASE_MCU_OFF)
+ if ((backend->mcu_state != KBASE_MCU_OFF) &&
+ (backend->mcu_state != KBASE_MCU_IN_SLEEP))
break;
#endif
@@ -1608,7 +1699,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
return 0;
}
-#endif
+#endif /* !MALI_USE_CSF */
static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
{
@@ -1635,7 +1726,8 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
in_desired_state = false;
else if (!kbase_pm_is_mcu_desired(kbdev) &&
- kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)
+ (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
+ (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
in_desired_state = false;
#endif
@@ -1734,8 +1826,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev)
if (kbase_pm_mcu_update_state(kbdev))
return;
- if (prev_mcu_state != KBASE_MCU_OFF &&
- kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) {
+ if (!kbase_pm_is_mcu_inactive(kbdev, prev_mcu_state) &&
+ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) {
if (kbase_pm_l2_update_state(kbdev))
return;
}
@@ -1828,6 +1920,9 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
*/
if (likely(kbdev->csf.firmware_inited)) {
backend->mcu_state = KBASE_MCU_RESET_WAIT;
+#ifdef KBASE_PM_RUNTIME
+ backend->exit_gpu_sleep_mode = true;
+#endif
kbdev->csf.firmware_reload_needed = true;
} else {
WARN_ON(backend->mcu_state != KBASE_MCU_OFF);
@@ -1865,6 +1960,9 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
*/
kbase_gpu_cache_clean_wait_complete(kbdev);
backend->in_reset = false;
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ backend->gpu_wakeup_override = false;
+#endif
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -2098,6 +2196,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
*/
void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
{
+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
bool reset_required = is_resume;
unsigned long flags;
@@ -2115,7 +2214,13 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
}
#endif
- if (kbdev->pm.backend.gpu_powered) {
+ if (backend->gpu_powered) {
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ if (backend->gpu_idled) {
+ backend->callback_power_runtime_gpu_active(kbdev);
+ backend->gpu_idled = false;
+ }
+#endif
/* Already turned on */
if (kbdev->poweroff_pending)
kbase_pm_enable_interrupts(kbdev);
@@ -2128,15 +2233,15 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
KBASE_KTRACE_ADD(kbdev, PM_GPU_ON, NULL, 0u);
- if (is_resume && kbdev->pm.backend.callback_power_resume) {
- kbdev->pm.backend.callback_power_resume(kbdev);
+ if (is_resume && backend->callback_power_resume) {
+ backend->callback_power_resume(kbdev);
return;
- } else if (kbdev->pm.backend.callback_power_on) {
- reset_required = kbdev->pm.backend.callback_power_on(kbdev);
+ } else if (backend->callback_power_on) {
+ reset_required = backend->callback_power_on(kbdev);
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->pm.backend.gpu_powered = true;
+ backend->gpu_powered = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#if MALI_USE_CSF
@@ -2194,8 +2299,8 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
/* Turn on the L2 caches */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->pm.backend.gpu_ready = true;
- kbdev->pm.backend.l2_desired = true;
+ backend->gpu_ready = true;
+ backend->l2_desired = true;
#if MALI_USE_CSF
if (reset_required) {
/* GPU reset was done after the power on, so send the post
@@ -2209,6 +2314,17 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
#endif
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ /* GPU is now powered up. Invoke the GPU active callback as GPU idle
+ * callback would have been invoked before the power down.
+ */
+ if (backend->gpu_idled) {
+ backend->callback_power_runtime_gpu_active(kbdev);
+ backend->gpu_idled = false;
+ }
+#endif
+
}
KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
@@ -2252,19 +2368,22 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
kbase_ipa_control_handle_gpu_power_off(kbdev);
#endif
- kbdev->pm.backend.gpu_ready = false;
-
- /* The GPU power may be turned off from this point */
- kbdev->pm.backend.gpu_powered = false;
-
+ if (kbase_is_gpu_removed(kbdev)
#ifdef CONFIG_MALI_ARBITER_SUPPORT
- if (kbase_pm_is_gpu_lost(kbdev)) {
+ || kbase_pm_is_gpu_lost(kbdev)) {
+#else
+ ) {
+#endif
/* Ensure we unblock any threads that are stuck waiting
* for the GPU
*/
kbase_gpu_cache_clean_wait_complete(kbdev);
}
-#endif
+
+ kbdev->pm.backend.gpu_ready = false;
+
+ /* The GPU power may be turned off from this point */
+ kbdev->pm.backend.gpu_powered = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index 70d009e..ef26c16 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -137,6 +137,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
* off. It should be modified during integration to perform the necessary
* actions to turn the clock off (if this is possible in the integration).
*
+ * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used
+ * then this function would usually be invoked from the runtime suspend
+ * callback function.
+ *
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
*
@@ -242,7 +246,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
* NOTE: This may not wait until the correct state is reached if there is a
* power off in progress. To correctly wait for the desired state the caller
* must ensure that this is not the case by, for example, calling
- * kbase_pm_wait_for_poweroff_complete()
+ * kbase_pm_wait_for_poweroff_work_complete()
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
@@ -432,12 +436,25 @@ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
/**
- * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
- * complete
+ * kbase_pm_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to
+ * complete
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function effectively just waits for the @gpu_poweroff_wait_work work
+ * item to complete, if it was enqueued. GPU may not have been powered down
+ * before this function returns.
*/
-void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function waits for the actual gpu power down to complete.
+ */
+void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
/**
* kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
@@ -635,6 +652,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev);
*/
void kbase_pm_reset_complete(struct kbase_device *kbdev);
+#if !MALI_USE_CSF
/**
* kbase_pm_protected_override_enable - Enable the protected mode override
* @kbdev: Device pointer
@@ -707,6 +725,7 @@ int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev);
* to enter protected mode.
*/
void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev);
+#endif
/* If true, the driver should explicitly control corestack power management,
* instead of relying on the Power Domain Controller.
@@ -737,6 +756,21 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev);
bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev);
/**
+ * kbase_pm_is_mcu_inactive - Check if the MCU is inactive (i.e. either
+ * it is disabled or it is in sleep)
+ *
+ * @kbdev: kbase device
+ * @state: state of the MCU state machine.
+ *
+ * This function must be called with hwaccess_lock held.
+ * L2 cache can be turned off if this function returns true.
+ *
+ * Return: true if MCU is inactive
+ */
+bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev,
+ enum kbase_mcu_state state);
+
+/**
* kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be
* suspended to low power state when all
* the CSGs are idle
@@ -818,4 +852,83 @@ static inline void kbase_pm_unlock(struct kbase_device *kbdev)
#endif /* !MALI_USE_CSF */
}
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+/**
+ * kbase_pm_gpu_sleep_allowed - Check if the GPU is allowed to be put in sleep
+ *
+ * @kbdev: Device pointer
+ *
+ * This function is called on GPU idle notification and if it returns false then
+ * GPU power down will be triggered by suspending the CSGs and halting the MCU.
+ *
+ * Return: true if the GPU is allowed to be in the sleep state.
+ */
+static inline bool kbase_pm_gpu_sleep_allowed(struct kbase_device *kbdev)
+{
+ /* If the autosuspend_delay has been set to 0 then it doesn't make
+ * sense to first put GPU to sleep state and then power it down,
+ * instead would be better to power it down right away.
+ * Also need to do the same when autosuspend_delay is set to a negative
+ * value, which implies that runtime pm is effectively disabled by the
+ * kernel.
+ * A high positive value of autosuspend_delay can be used to keep the
+ * GPU in sleep state for a long time.
+ */
+ if (unlikely(!kbdev->dev->power.autosuspend_delay ||
+ (kbdev->dev->power.autosuspend_delay < 0)))
+ return false;
+
+ return kbdev->pm.backend.gpu_sleep_supported;
+}
+
+/**
+ * kbase_pm_enable_db_mirror_interrupt - Enable the doorbell mirror interrupt to
+ * detect the User doorbell rings.
+ *
+ * @kbdev: Device pointer
+ *
+ * This function is called just before sending the sleep request to MCU firmware
+ * so that User doorbell rings can be detected whilst GPU remains in the sleep
+ * state.
+ *
+ */
+static inline void kbase_pm_enable_db_mirror_interrupt(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (!kbdev->pm.backend.db_mirror_interrupt_enabled) {
+ u32 irq_mask = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(GPU_IRQ_MASK));
+
+ WARN_ON(irq_mask & DOORBELL_MIRROR);
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask | DOORBELL_MIRROR);
+ kbdev->pm.backend.db_mirror_interrupt_enabled = true;
+ }
+}
+
+/**
+ * kbase_pm_disable_db_mirror_interrupt - Disable the doorbell mirror interrupt.
+ *
+ * @kbdev: Device pointer
+ *
+ * This function is called when doorbell mirror interrupt is received or MCU
+ * needs to be reactivated by enabling the doorbell notification.
+ */
+static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (kbdev->pm.backend.db_mirror_interrupt_enabled) {
+ u32 irq_mask = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(GPU_IRQ_MASK));
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask & ~DOORBELL_MIRROR);
+ kbdev->pm.backend.db_mirror_interrupt_enabled = false;
+ }
+}
+#endif
+
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
index 4e99928..96f196f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -42,6 +42,20 @@
* @POWER_DOWN: MCU halted operations, pending being disabled.
* @PEND_OFF: MCU is being disabled, pending on powering off.
* @RESET_WAIT: The GPU is resetting, MCU state is unknown.
+ * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware
+ * have completed and shaders have been requested to
+ * power on.
+ * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being
+ * notified of the mask of enabled shader cores.
+ * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered
+ * and checks are done to increase the number of
+ * enabled cores.
+ * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down
+ * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down.
+ * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU
+ * is being put to sleep.
+ * @ON_PEND_SLEEP: MCU sleep is in progress.
+ * @IN_SLEEP: Sleep request is completed and MCU has halted.
*/
KBASEP_MCU_STATE(OFF)
KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -61,3 +75,7 @@ KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND)
KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK)
KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF)
KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF)
+/* Additional MCU states to support GPU sleep feature */
+KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
+KBASEP_MCU_STATE(ON_PEND_SLEEP)
+KBASEP_MCU_STATE(IN_SLEEP)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index cf61ef8..7b126a1 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -183,7 +183,7 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
{
- bool shaders_desired;
+ bool shaders_desired = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -192,6 +192,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
if (kbdev->pm.backend.poweroff_wait_in_progress)
return;
+#if !MALI_USE_CSF
if (kbdev->pm.backend.protected_transition_override)
/* We are trying to change in/out of protected mode - force all
* cores off so that the L2 powers down
@@ -199,15 +200,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
shaders_desired = false;
else
shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
-
-#if MALI_USE_CSF
- /* On CSF GPUs, Host driver isn't supposed to do the power management
- * for shader cores. CSF firmware will power up the cores appropriately
- * and so from Driver's standpoint 'shaders_desired' flag shall always
- * remain 0.
- */
- shaders_desired = false;
#endif
+
if (kbdev->pm.backend.shaders_desired != shaders_desired) {
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired);
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index d10e404..92a366b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -23,6 +23,7 @@
#include <mali_kbase_hwaccess_time.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_config_defaults.h>
void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
u64 *cycle_counter,
@@ -31,18 +32,8 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
{
u32 hi1, hi2;
- if (cycle_counter) {
- /* Read hi, lo, hi to ensure a coherent u64 */
- do {
- hi1 = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CYCLE_COUNT_HI));
- *cycle_counter = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CYCLE_COUNT_LO));
- hi2 = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CYCLE_COUNT_HI));
- } while (hi1 != hi2);
- *cycle_counter |= (((u64) hi1) << 32);
- }
+ if (cycle_counter)
+ *cycle_counter = kbase_backend_get_cycle_cnt(kbdev);
if (system_time) {
/* Read hi, lo, hi to ensure a coherent u64 */
@@ -107,3 +98,66 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
kbase_pm_release_gpu_cycle_counter(kbdev);
#endif
}
+
+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
+ enum kbase_timeout_selector selector)
+{
+ /* Timeout calculation:
+ * dividing number of cycles by freq in KHz automatically gives value
+ * in milliseconds. nr_cycles will have to be multiplied by 1e3 to
+ * get result in microseconds, and 1e6 to get result in nanoseconds.
+ */
+
+ u64 timeout, nr_cycles = 0;
+ u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+
+ WARN_ON(!freq_khz);
+
+ switch (selector) {
+ /* use Firmware timeout if invalid selection */
+ default:
+#if !MALI_USE_CSF
+ WARN(1, "Invalid timeout selector used! Using default value");
+ timeout = JM_DEFAULT_TIMEOUT_CYCLES;
+ CSTD_UNUSED(nr_cycles);
+#else
+ WARN(1,
+ "Invalid timeout selector used! Using CSF Firmware timeout");
+ fallthrough;
+ case CSF_FIRMWARE_TIMEOUT:
+ nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
+ timeout = div_u64(nr_cycles, freq_khz);
+ /* cap CSF FW timeout to FIRMWARE_PING_INTERVAL_MS
+ * if calculated timeout exceeds it. This should be adapted to a
+ * direct timeout comparison once the FIRMWARE_PING_INTERVAL_MS
+ * option is added to this timeout function. A compile-time check
+ * such as BUILD_BUG_ON can also be done once the firmware ping
+ * interval in cycles becomes available as a macro.
+ */
+ if (timeout > FIRMWARE_PING_INTERVAL_MS) {
+ dev_dbg(kbdev->dev, "Capped CSF_FIRMWARE_TIMEOUT %llu to %d",
+ timeout, FIRMWARE_PING_INTERVAL_MS);
+ timeout = FIRMWARE_PING_INTERVAL_MS;
+ }
+#endif
+ break;
+ }
+ return (unsigned int)timeout;
+}
+
+u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
+{
+ u32 hi1, hi2, lo;
+
+ /* Read hi, lo, hi to ensure a coherent u64 */
+ do {
+ hi1 = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(CYCLE_COUNT_HI));
+ lo = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(CYCLE_COUNT_LO));
+ hi2 = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(CYCLE_COUNT_HI));
+ } while (hi1 != hi2);
+
+ return lo | (((u64) hi1) << 32);
+}
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index 979e06f..030af9d 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -154,7 +154,9 @@ bob_defaults {
// (catch-all for experimental CS code without separating it into
// different features).
"MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}",
- "GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}",
+ "MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}",
+ "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}",
+ "MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}",
],
}
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index b2e7025..85f4c0a 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -283,7 +283,7 @@ int kbase_context_mmu_init(struct kbase_context *kctx)
{
return kbase_mmu_init(
kctx->kbdev, &kctx->mmu, kctx,
- base_context_mmu_group_id_get(kctx->create_flags));
+ kbase_context_mmu_group_id_get(kctx->create_flags));
}
void kbase_context_mmu_term(struct kbase_context *kctx)
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index a62cafa..ce6d546 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -253,7 +253,7 @@ static inline void calc_prfcnt_delta(struct kbase_device *kbdev,
if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0))
if (prfcnt->gpu_norm)
- delta_value /= kbdev->csf.ipa_control.cur_gpu_rate;
+ delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate);
prfcnt->latest_raw_value = raw_value;
@@ -300,17 +300,20 @@ kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
/* Interrupts are already disabled and interrupt state is also saved */
spin_lock(&ipa_ctrl->lock);
- for (i = 0; i < ipa_ctrl->num_active_sessions; i++) {
- size_t j;
+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
- for (j = 0; j < session->num_prfcnts; j++) {
- struct kbase_ipa_control_prfcnt *prfcnt =
- &session->prfcnts[j];
+ if (session->active) {
+ size_t j;
+
+ for (j = 0; j < session->num_prfcnts; j++) {
+ struct kbase_ipa_control_prfcnt *prfcnt =
+ &session->prfcnts[j];
- if (prfcnt->gpu_norm)
- calc_prfcnt_delta(kbdev, prfcnt, true);
- }
+ if (prfcnt->gpu_norm)
+ calc_prfcnt_delta(kbdev, prfcnt, true);
+ }
+ }
}
ipa_ctrl->cur_gpu_rate = clk_rate_hz;
@@ -480,16 +483,21 @@ static int session_gpu_start(struct kbase_device *kbdev,
*/
if (!ret) {
if (session) {
+ /* On starting a session, value read is required for
+ * IPA power model's calculation initialization.
+ */
session_read_raw_values(kbdev, session);
} else {
size_t session_idx;
for (session_idx = 0;
- session_idx < ipa_ctrl->num_active_sessions;
- session_idx++)
- session_read_raw_values(
- kbdev,
- &ipa_ctrl->sessions[session_idx]);
+ session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
+ session_idx++) {
+ struct kbase_ipa_control_session *session_to_check = &ipa_ctrl->sessions[session_idx];
+
+ if (session_to_check->active)
+ session_read_raw_values(kbdev, session_to_check);
+ }
}
}
@@ -783,6 +791,12 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
ipa_ctrl = &kbdev->csf.ipa_control;
session = (struct kbase_ipa_control_session *)client;
+ if (WARN_ON(!session->active)) {
+ dev_err(kbdev->dev,
+ "%s: attempt to query inactive session", __func__);
+ return -EINVAL;
+ }
+
if (WARN_ON(num_values < session->num_prfcnts)) {
dev_err(kbdev->dev,
"%s: not enough space (%zu) to return all counter values (%zu)",
@@ -860,20 +874,23 @@ void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev)
ret);
}
- for (session_idx = 0; session_idx < ipa_ctrl->num_active_sessions;
+ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
session_idx++) {
+
struct kbase_ipa_control_session *session =
&ipa_ctrl->sessions[session_idx];
- size_t i;
- for (i = 0; i < session->num_prfcnts; i++) {
- struct kbase_ipa_control_prfcnt *prfcnt =
- &session->prfcnts[i];
+ if (session->active) {
+ size_t i;
- calc_prfcnt_delta(kbdev, prfcnt, true);
+ for (i = 0; i < session->num_prfcnts; i++) {
+ struct kbase_ipa_control_prfcnt *prfcnt =
+ &session->prfcnts[i];
+
+ calc_prfcnt_delta(kbdev, prfcnt, true);
+ }
}
}
-
spin_unlock(&ipa_ctrl->lock);
}
@@ -975,13 +992,17 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->hwaccess_lock);
- for (i = 0; i < ipa_ctrl->num_active_sessions; i++) {
+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+
struct kbase_ipa_control_session *session =
&ipa_ctrl->sessions[i];
- u64 protm_time = time_now - MAX(session->last_query_time,
- ipa_ctrl->protm_start);
- session->protm_time += protm_time;
+ if (session->active) {
+ u64 protm_time = time_now - MAX(session->last_query_time,
+ ipa_ctrl->protm_start);
+
+ session->protm_time += protm_time;
+ }
}
/* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index d49e343..142e5a8 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -32,6 +32,7 @@
#include <mmu/mali_kbase_mmu.h>
#include "mali_kbase_csf_timeout.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#include <mali_kbase_hwaccess_time.h>
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -140,7 +141,7 @@ static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
WARN_ON(reg->flags & KBASE_REG_FREE);
mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(reg);
+ kbase_remove_va_region(kctx->kbdev, reg);
mutex_unlock(&kctx->kbdev->csf.reg_lock);
}
@@ -171,6 +172,11 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
const size_t num_pages = 2;
int ret;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
(KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
@@ -195,19 +201,18 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
return ret;
/* Map input page */
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn, &phys[0],
- 1, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_IO);
+ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
+ &phys[0], 1, mem_flags, MCU_AS_NR,
+ KBASE_MEM_GROUP_CSF_IO, mmu_sync_info);
if (ret)
goto bad_insert;
/* Map output page, it needs rw access */
mem_flags |= KBASE_REG_GPU_WR;
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn + 1, &phys[1],
- 1, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_IO);
+ reg->start_pfn + 1, &phys[1], 1, mem_flags,
+ MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO,
+ mmu_sync_info);
if (ret)
goto bad_insert_output_page;
@@ -218,7 +223,7 @@ bad_insert_output_page:
reg->start_pfn, 1, MCU_AS_NR);
bad_insert:
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(reg);
+ kbase_remove_va_region(kbdev, reg);
mutex_unlock(&kbdev->csf.reg_lock);
return ret;
@@ -475,7 +480,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
/* Only one pointer expected, otherwise coding error */
if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) {
- dev_err(kctx->kbdev->dev,
+ dev_dbg(kctx->kbdev->dev,
"Error, one and only one param-ptr expected!");
return -EINVAL;
}
@@ -1053,6 +1058,11 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
int err = 0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
lockdep_assert_held(&kctx->csf.lock);
/* Allocate and initialize Region Object */
@@ -1090,9 +1100,9 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
/* Update MMU table */
err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- reg->start_pfn, &s_buf->phy[0],
- nr_pages, mem_flags,
- MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
+ reg->start_pfn, &s_buf->phy[0], nr_pages,
+ mem_flags, MCU_AS_NR,
+ KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
if (err)
goto mmu_insert_failed;
@@ -1102,7 +1112,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
mmu_insert_failed:
mutex_lock(&kctx->kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(reg));
+ kbase_remove_va_region(kctx->kbdev, reg);
mutex_unlock(&kctx->kbdev->csf.reg_lock);
add_va_region_failed:
@@ -1138,6 +1148,11 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
int err = 0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
/* Allocate and initialize Region Object */
reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
nr_pages, KBASE_REG_ZONE_MCU_SHARED);
@@ -1170,10 +1185,9 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
goto add_va_region_failed;
/* Update MMU table */
- err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn, phys,
- nr_pages, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_FW);
+ err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
+ phys, nr_pages, mem_flags, MCU_AS_NR,
+ KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
if (err)
goto mmu_insert_failed;
@@ -1183,7 +1197,7 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
mmu_insert_failed:
mutex_lock(&kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(reg));
+ kbase_remove_va_region(kbdev, reg);
mutex_unlock(&kbdev->csf.reg_lock);
add_va_region_failed:
@@ -1244,16 +1258,9 @@ static int create_suspend_buffers(struct kbase_context *const kctx,
*/
static u32 generate_group_uid(void)
{
- /* use first KBase device to store max UID */
- struct kbase_device *kbdev = kbase_find_device(-1);
- u32 uid = 1;
-
- if (kbdev)
- uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
- else
- WARN(1, "NULL kbase device pointer in group UID generation");
+ static atomic_t global_csg_uid = ATOMIC_INIT(0);
- return uid;
+ return (u32)atomic_inc_return(&global_csg_uid);
}
/**
@@ -1272,8 +1279,8 @@ static int create_queue_group(struct kbase_context *const kctx,
int group_handle = find_free_group_handle(kctx);
if (group_handle < 0) {
- dev_err(kctx->kbdev->dev,
- "All queue group handles are already in use\n");
+ dev_dbg(kctx->kbdev->dev,
+ "All queue group handles are already in use");
} else {
struct kbase_queue_group * const group =
kmalloc(sizeof(struct kbase_queue_group),
@@ -1349,16 +1356,16 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
if ((create->in.tiler_max > tiler_count) ||
(create->in.fragment_max > fragment_count) ||
(create->in.compute_max > compute_count)) {
- dev_err(kctx->kbdev->dev,
- "Invalid maximum number of endpoints for a queue group\n");
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid maximum number of endpoints for a queue group");
err = -EINVAL;
} else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
- dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n",
+ dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u",
(unsigned int)create->in.priority);
err = -EINVAL;
} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
- dev_err(kctx->kbdev->dev,
- "No CSG has at least %d CSs\n",
+ dev_dbg(kctx->kbdev->dev,
+ "No CSG has at least %d CSs",
create->in.cs_min);
err = -EINVAL;
} else {
@@ -1403,7 +1410,7 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
mutex_lock(&kctx->kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(s_buf->reg));
+ kbase_remove_va_region(kctx->kbdev, s_buf->reg);
mutex_unlock(&kctx->kbdev->csf.reg_lock);
kbase_mem_pool_free_pages(
@@ -1436,7 +1443,7 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
mutex_lock(&kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(s_buf->reg));
+ kbase_remove_va_region(kbdev, s_buf->reg);
mutex_unlock(&kbdev->csf.reg_lock);
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
@@ -1994,6 +2001,26 @@ bool kbase_csf_error_pending(struct kbase_context *kctx)
return event_pended;
}
+static void sync_update_notify_gpu(struct kbase_context *kctx)
+{
+ bool can_notify_gpu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+ can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered;
+#ifdef KBASE_PM_RUNTIME
+ if (kctx->kbdev->pm.backend.gpu_sleep_mode_active)
+ can_notify_gpu = false;
+#endif
+
+ if (can_notify_gpu) {
+ kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
+ KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
+ }
+
+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+}
+
void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
{
struct kbase_csf_event *event, *next_event;
@@ -2014,13 +2041,8 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
* synch object wait operations are re-evaluated on a write to any
* CS_DOORBELL/GLB_DOORBELL register.
*/
- if (notify_gpu) {
- spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
- if (kctx->kbdev->pm.backend.gpu_powered)
- kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
- spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
- }
+ if (notify_gpu)
+ sync_update_notify_gpu(kctx);
/* Now invoke the callbacks registered on backend side.
* Allow item removal inside the loop, if requested by the callback.
@@ -2364,31 +2386,6 @@ static void protm_event_worker(struct work_struct *data)
group, 0u);
}
-static void report_queue_fatal_error(struct kbase_queue *const queue,
- u32 cs_fatal, u64 cs_fatal_info,
- u8 group_handle)
-{
- struct base_csf_notification error =
- { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
- .payload = {
- .csg_error = {
- .handle = group_handle,
- .error = {
- .error_type =
- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
- .payload = {
- .fatal_queue = {
- .sideband =
- cs_fatal_info,
- .status = cs_fatal,
- .csi_index =
- queue->csi_index,
- } } } } } };
-
- add_error(queue->kctx, &queue->error, &error);
- kbase_event_wakeup(queue->kctx);
-}
-
/**
* handle_fault_event - Handler for CS fault.
*
@@ -2429,10 +2426,34 @@ handle_fault_event(struct kbase_queue *const queue,
kbase_gpu_exception_name(cs_fault_exception_type),
cs_fault_exception_data, cs_fault_info_exception_data);
- if (cs_fault_exception_type ==
- CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
- report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
- 0, queue->group->handle);
+}
+
+static void report_queue_fatal_error(struct kbase_queue *const queue,
+ u32 cs_fatal, u64 cs_fatal_info,
+ u8 group_handle)
+{
+ struct base_csf_notification error = {
+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group_handle,
+ .error = {
+ .error_type =
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+ .payload = {
+ .fatal_queue = {
+ .sideband = cs_fatal_info,
+ .status = cs_fatal,
+ .csi_index = queue->csi_index,
+ }
+ }
+ }
+ }
+ }
+ };
+
+ add_error(queue->kctx, &queue->error, &error);
+ kbase_event_wakeup(queue->kctx);
}
/**
@@ -2531,6 +2552,7 @@ handle_fatal_event(struct kbase_queue *const queue,
if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
release_queue(queue);
}
+
}
/**
@@ -2757,9 +2779,14 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
group->handle, csg_nr);
/* Check if the scheduling tick can be advanced */
- if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
- !scheduler->gpu_idle_fw_timer_enabled) {
- kbase_csf_scheduler_advance_tick_nolock(kbdev);
+ if (kbase_csf_scheduler_all_csgs_idle(kbdev)) {
+ if (!scheduler->gpu_idle_fw_timer_enabled)
+ kbase_csf_scheduler_advance_tick_nolock(kbdev);
+ } else if (atomic_read(&scheduler->non_idle_offslot_grps)) {
+ /* If there are non-idle CSGs waiting for a slot, fire
+ * a tock for a replacement.
+ */
+ mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
}
}
@@ -2770,7 +2797,8 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
group, req ^ ack);
dev_info(kbdev->dev,
- "Timeout notification received for group %u of ctx %d_%d on slot %d\n",
+ "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
+ kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
handle_progress_timer_event(group);
@@ -2868,6 +2896,79 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
}
}
+/**
+ * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @glb_req: Global request register value.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * This function checks if the PROTM_ENTER Global request had completed and
+ * appropriately sends notification about the protected mode entry to components
+ * like IPA, HWC, IPA_CONTROL.
+ */
+static inline void check_protm_enter_req_complete(struct kbase_device *kbdev,
+ u32 glb_req, u32 glb_ack)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ if (likely(!kbdev->csf.scheduler.active_protm_grp))
+ return;
+
+ if (kbdev->protected_mode)
+ return;
+
+ if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) !=
+ (glb_ack & GLB_REQ_PROTM_ENTER_MASK))
+ return;
+
+ dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
+
+ kbdev->protected_mode = true;
+ kbase_ipa_protection_mode_switch_event(kbdev);
+ kbase_ipa_control_protm_entered(kbdev);
+ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * process_protm_exit - Handle the protected mode exit interrupt
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * This function handles the PROTM_EXIT interrupt and sends notification
+ * about the protected mode exit to components like HWC, IPA_CONTROL.
+ */
+static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_PROTM_EXIT_MASK);
+
+ if (likely(scheduler->active_protm_grp)) {
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
+ scheduler->active_protm_grp, 0u);
+ scheduler->active_protm_grp = NULL;
+ } else {
+ dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group");
+ }
+
+ if (!WARN_ON(!kbdev->protected_mode)) {
+ kbdev->protected_mode = false;
+ kbase_ipa_control_protm_exited(kbdev);
+ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
+ }
+}
+
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
unsigned long flags;
@@ -2898,19 +2999,10 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
global_iface, GLB_ACK);
KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
- if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) {
- dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_REQ, glb_ack,
- GLB_REQ_PROTM_EXIT_MASK);
- WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u);
- scheduler->active_protm_grp = NULL;
- kbdev->protected_mode = false;
- kbase_ipa_control_protm_exited(kbdev);
- kbase_hwcnt_backend_csf_protm_exited(
- &kbdev->hwcnt_gpu_iface);
- }
+ check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
+
+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
+ process_protm_exit(kbdev, glb_ack);
/* Handle IDLE Hysteresis notification event */
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
@@ -3066,4 +3158,3 @@ u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
return out_priority;
}
-
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index e3bd436..640d2ed 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -39,10 +39,13 @@
*/
#define KBASEP_USER_DB_NR_INVALID ((s8)-1)
-#define FIRMWARE_PING_INTERVAL_MS (4000) /* 4 seconds */
+#define FIRMWARE_PING_INTERVAL_MS (8000) /* 8 seconds */
#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
+/* Idle hysteresis time can be scaled down when GPU sleep feature is used */
+#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
+
/**
* enum kbase_csf_event_callback_action - return type for CSF event callbacks.
*
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index 14deb98..40bee79 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -24,10 +24,32 @@
#include <linux/seq_file.h>
#include <linux/delay.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "mali_kbase_csf_tl_reader.h"
+#define MAX_SCHED_STATE_STRING_LEN (16)
+static const char *scheduler_state_to_string(struct kbase_device *kbdev,
+ enum kbase_csf_scheduler_state sched_state)
+{
+ switch (sched_state) {
+ case SCHED_BUSY:
+ return "BUSY";
+ case SCHED_INACTIVE:
+ return "INACTIVE";
+ case SCHED_SUSPENDED:
+ return "SUSPENDED";
+#ifdef KBASE_PM_RUNTIME
+ case SCHED_SLEEPING:
+ return "SLEEPING";
+#endif
+ default:
+ dev_warn(kbdev->dev, "Unknown Scheduler state %d", sched_state);
+ return NULL;
+ }
+}
+
/**
* blocked_reason_to_string() - Convert blocking reason id to a string
*
@@ -142,10 +164,6 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
!queue->group))
return;
- /* Ring the doorbell to have firmware update CS_EXTRACT */
- kbase_csf_ring_cs_user_doorbell(queue->kctx->kbdev, queue);
- msleep(100);
-
addr = (u32 *)queue->user_io_addr;
cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32);
@@ -253,32 +271,68 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */
#define CSF_STATUS_UPDATE_TO_MS (100)
+static void update_active_group_status(struct seq_file *file,
+ struct kbase_queue_group *const group)
+{
+ struct kbase_device *const kbdev = group->kctx->kbdev;
+ struct kbase_csf_cmd_stream_group_info const *const ginfo =
+ &kbdev->csf.global_iface.groups[group->csg_nr];
+ long remaining =
+ kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS);
+ unsigned long flags;
+
+ /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
+ * ring for Extract offset update, shall not be made when MCU has been
+ * put to sleep otherwise it will undesirably make MCU exit the sleep
+ * state. Also it isn't really needed as FW will implicitly update the
+ * status of all on-slot groups when MCU sleep request is sent to it.
+ */
+ if (kbdev->csf.scheduler.state == SCHED_SLEEPING)
+ return;
+
+ /* Ring the User doobell shared between the queues bound to this
+ * group, to have FW update the CS_EXTRACT for all the queues
+ * bound to the group. Ring early so that FW gets adequate time
+ * for the handling.
+ */
+ kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
+ ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
+ CSG_REQ_STATUS_UPDATE_MASK);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr);
+
+ remaining = wait_event_timeout(kbdev->csf.event_wait,
+ !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
+ kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
+ CSG_REQ_STATUS_UPDATE_MASK), remaining);
+
+ if (!remaining) {
+ dev_err(kbdev->dev,
+ "Timed out for STATUS_UPDATE on group %d on slot %d",
+ group->handle, group->csg_nr);
+
+ seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
+ group->csg_nr);
+ seq_puts(file, "*** The following group-record is likely stale\n");
+ }
+}
+
static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
struct kbase_queue_group *const group)
{
if (kbase_csf_scheduler_group_get_slot(group) >= 0) {
struct kbase_device *const kbdev = group->kctx->kbdev;
- unsigned long flags;
u32 ep_c, ep_r;
char exclusive;
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[group->csg_nr];
- long remaining =
- kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS);
u8 slot_priority =
kbdev->csf.scheduler.csg_slots[group->csg_nr].priority;
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
- ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
- CSG_REQ_STATUS_UPDATE_MASK);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
- kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr);
-
- remaining = wait_event_timeout(kbdev->csf.event_wait,
- !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
- kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
- CSG_REQ_STATUS_UPDATE_MASK), remaining);
+ update_active_group_status(file, group);
ep_c = kbase_csf_firmware_csg_output(ginfo,
CSG_STATUS_EP_CURRENT);
@@ -291,16 +345,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
else
exclusive = '0';
- if (!remaining) {
- dev_err(kbdev->dev,
- "Timed out for STATUS_UPDATE on group %d on slot %d",
- group->handle, group->csg_nr);
-
- seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
- group->csg_nr);
- seq_printf(file, "*** The following group-record is likely stale\n");
- }
-
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n");
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n",
group->handle,
@@ -315,6 +359,10 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
exclusive);
+
+ /* Wait for the User doobell ring to take effect */
+ if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
+ msleep(100);
} else {
seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
seq_printf(file, "%7d, %6d, %9d, %8d\n",
@@ -362,6 +410,12 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
mutex_lock(&kctx->csf.lock);
kbase_csf_scheduler_lock(kbdev);
+ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
+ /* Wait for the MCU sleep request to complete. Please refer the
+ * update_active_group_status() function for the explanation.
+ */
+ kbase_pm_wait_for_desired_state(kbdev);
+ }
for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
struct kbase_queue_group *const group =
kctx->csf.queue_groups[gr];
@@ -395,6 +449,12 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
MALI_CSF_CSG_DEBUGFS_VERSION);
kbase_csf_scheduler_lock(kbdev);
+ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
+ /* Wait for the MCU sleep request to complete. Please refer the
+ * update_active_group_status() function for the explanation.
+ */
+ kbase_pm_wait_for_desired_state(kbdev);
+ }
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
@@ -502,59 +562,93 @@ DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops,
"%llu\n");
/**
- * kbase_csf_debugfs_scheduler_suspend_get() - get if the scheduler is suspended.
+ * kbase_csf_debugfs_scheduler_state_get() - Get the state of scheduler.
*
- * @data: The debugfs dentry private data, a pointer to kbase_device
- * @val: The debugfs output value, boolean: 1 suspended, 0 otherwise
+ * @file: Object of the file that is being read.
+ * @user_buf: User buffer that contains the string.
+ * @count: Length of user buffer
+ * @ppos: Offset within file object
*
- * Return: 0
+ * This function will return the current Scheduler state to Userspace
+ * Scheduler may exit that state by the time the state string is received
+ * by the Userspace.
+ *
+ * Return: 0 if Scheduler was found in an unexpected state, or the
+ * size of the state string if it was copied successfully to the
+ * User buffer or a negative value in case of an error.
*/
-static int kbase_csf_debugfs_scheduler_suspend_get(
- void *data, u64 *val)
+static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file,
+ char __user *user_buf, size_t count, loff_t *ppos)
{
- struct kbase_device *kbdev = data;
+ struct kbase_device *kbdev = file->private_data;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ const char *state_string;
kbase_csf_scheduler_lock(kbdev);
- *val = (scheduler->state == SCHED_SUSPENDED);
+ state_string = scheduler_state_to_string(kbdev, scheduler->state);
kbase_csf_scheduler_unlock(kbdev);
- return 0;
+ if (!state_string)
+ count = 0;
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ state_string, strlen(state_string));
}
/**
- * kbase_csf_debugfs_scheduler_suspend_set() - set the scheduler to suspended.
+ * kbase_csf_debugfs_scheduler_state_set() - Set the state of scheduler.
*
- * @data: The debugfs dentry private data, a pointer to kbase_device
- * @val: The debugfs input value, boolean: 1 suspend, 0 otherwise
+ * @file: Object of the file that is being written to.
+ * @ubuf: User buffer that contains the string.
+ * @count: Length of user buffer
+ * @ppos: Offset within file object
*
- * Return: Negative value if already in requested state, 0 otherwise.
+ * This function will update the Scheduler state as per the state string
+ * passed by the Userspace. Scheduler may or may not remain in new state
+ * for long.
+ *
+ * Return: Negative value if the string doesn't correspond to a valid Scheduler
+ * state or if copy from user buffer failed, otherwise the length of
+ * the User buffer.
*/
-static int kbase_csf_debugfs_scheduler_suspend_set(
- void *data, u64 val)
+static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file,
+ const char __user *ubuf, size_t count, loff_t *ppos)
{
- struct kbase_device *kbdev = data;
- struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- enum kbase_csf_scheduler_state state;
+ struct kbase_device *kbdev = file->private_data;
+ char buf[MAX_SCHED_STATE_STRING_LEN];
+ ssize_t ret = count;
- kbase_csf_scheduler_lock(kbdev);
- state = scheduler->state;
- kbase_csf_scheduler_unlock(kbdev);
+ CSTD_UNUSED(ppos);
+
+ count = min_t(size_t, sizeof(buf) - 1, count);
+ if (copy_from_user(buf, ubuf, count))
+ return -EFAULT;
+
+ buf[count] = 0;
- if (val && (state != SCHED_SUSPENDED))
+ if (sysfs_streq(buf, "SUSPENDED"))
kbase_csf_scheduler_pm_suspend(kbdev);
- else if (!val && (state == SCHED_SUSPENDED))
- kbase_csf_scheduler_pm_resume(kbdev);
- else
- return -1;
+#ifdef KBASE_PM_RUNTIME
+ else if (sysfs_streq(buf, "SLEEPING"))
+ kbase_csf_scheduler_force_sleep(kbdev);
+#endif
+ else if (sysfs_streq(buf, "INACTIVE"))
+ kbase_csf_scheduler_force_wakeup(kbdev);
+ else {
+ dev_dbg(kbdev->dev, "Bad scheduler state %s", buf);
+ ret = -EINVAL;
+ }
- return 0;
+ return ret;
}
-DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduler_suspend_fops,
- &kbase_csf_debugfs_scheduler_suspend_get,
- &kbase_csf_debugfs_scheduler_suspend_set,
- "%llu\n");
+static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = {
+ .owner = THIS_MODULE,
+ .read = kbase_csf_debugfs_scheduler_state_get,
+ .write = kbase_csf_debugfs_scheduler_state_set,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
void kbase_csf_debugfs_init(struct kbase_device *kbdev)
{
@@ -568,9 +662,9 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
debugfs_create_file("scheduling_timer_kick", 0200,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_csf_debugfs_scheduling_timer_kick_fops);
- debugfs_create_file("scheduler_suspend", 0644,
+ debugfs_create_file("scheduler_state", 0644,
kbdev->mali_debugfs_directory, kbdev,
- &kbasep_csf_debugfs_scheduler_suspend_fops);
+ &kbasep_csf_debugfs_scheduler_state_fops);
kbase_csf_tl_reader_debugfs_init(kbdev);
kbase_csf_firmware_trace_buffer_debugfs_init(kbdev);
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index 53526ce..de471eb 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -219,11 +219,19 @@ enum kbase_csf_csg_slot_state {
* management reference. This can happen if the GPU
* becomes idle for a duration exceeding a threshold,
* or due to a system triggered suspend action.
+ * @SCHED_SLEEPING: The scheduler is in low-power mode with scheduling
+ * operations suspended and is not holding the power
+ * management reference. This state is set, only for the
+ * GPUs that supports the sleep feature, when GPU idle
+ * notification is received. The state is changed to
+ * @SCHED_SUSPENDED from the runtime suspend callback
+ * function after the suspend of CSGs.
*/
enum kbase_csf_scheduler_state {
SCHED_BUSY,
SCHED_INACTIVE,
SCHED_SUSPENDED,
+ SCHED_SLEEPING,
};
/**
@@ -561,7 +569,9 @@ struct kbase_csf_heap_context_allocator {
* @kbase_context. It is not the same as a heap context structure allocated by
* the kernel for use by the firmware.
*
- * @lock: Lock preventing concurrent access to the tiler heaps.
+ * @lock: Lock to prevent the concurrent access to tiler heaps (after the
+ * initialization), a tiler heap can be terminated whilst an OoM
+ * event is being handled for it.
* @list: List of tiler heaps.
* @ctx_alloc: Allocator for heap context structures.
* @nr_of_heaps: Total number of tiler heaps that were added during the
@@ -802,6 +812,11 @@ struct kbase_csf_csg_slot {
* @active_protm_grp: Indicates if firmware has been permitted to let GPU
* enter protected mode with the given group. On exit
* from protected mode the pointer is reset to NULL.
+ * This pointer is set and PROTM_ENTER request is sent
+ * atomically with @interrupt_lock held.
+ * This pointer being set doesn't necessarily indicates
+ * that GPU is in protected mode, kbdev->protected_mode
+ * needs to be checked for that.
* @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
* firmware idle hysteresis timer for preparing a
* GPU suspend on idle.
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 1b31122..785555c 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -27,12 +27,14 @@
#include "mali_kbase_reset_gpu.h"
#include "mali_kbase_ctx_sched.h"
#include "mali_kbase_csf_scheduler.h"
+#include <mali_kbase_hwaccess_time.h>
#include "device/mali_kbase_device.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
#include "tl/mali_kbase_timeline_priv.h"
#include "mali_kbase_csf_tl_reader.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -47,7 +49,7 @@
#include <asm/arch_timer.h>
#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
-
+#define ACK_TIMEOUT_MILLISECONDS 1000
static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
@@ -190,8 +192,10 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
interface->num_pages, KBASE_REG_ZONE_MCU_SHARED);
if (reg) {
+ mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg,
interface->virtual, interface->num_pages, 1);
+ mutex_unlock(&kbdev->csf.reg_lock);
if (ret)
kfree(reg);
else
@@ -1305,9 +1309,12 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
wait_timeout);
if (!remaining) {
- dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for global request %x to complete",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
req_mask);
err = -ETIMEDOUT;
+
}
return err;
@@ -1388,11 +1395,6 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- /* Set the coherency mode for protected mode execution */
- WARN_ON(kbdev->system_coherency == COHERENCY_ACE);
- kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY,
- kbdev->system_coherency);
-
/* Update shader core allocation enable mask */
enable_endpoints_global(global_iface, core_mask);
enable_shader_poweroff_timer(kbdev, global_iface);
@@ -1675,12 +1677,75 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
return pwroff;
}
+/**
+ * kbase_device_csf_iterator_trace_init - Send request to enable iterator
+ * trace port.
+ * @kbdev: Kernel base device pointer
+ *
+ * Return: 0 on success (or if enable request is not sent), or error
+ * code -EINVAL on failure of GPU to acknowledge enable request.
+ */
+static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
+{
+ /* Enable the iterator trace port if supported by the GPU.
+ * It requires the GPU to have a nonzero "iter_trace_enable"
+ * property in the device tree, and the FW must advertise
+ * this feature in GLB_FEATURES.
+ */
+ if (kbdev->pm.backend.gpu_powered) {
+ /* check device tree for iterator trace enable property */
+ const void *iter_trace_param = of_get_property(
+ kbdev->dev->of_node,
+ "iter_trace_enable", NULL);
+
+ const struct kbase_csf_global_iface *iface =
+ &kbdev->csf.global_iface;
+
+ if (iter_trace_param) {
+ u32 iter_trace_value = be32_to_cpup(iter_trace_param);
+
+ if ((iface->features &
+ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) &&
+ iter_trace_value) {
+ long ack_timeout;
+
+ ack_timeout = kbase_csf_timeout_in_jiffies(
+ ACK_TIMEOUT_MILLISECONDS);
+
+ /* write enable request to global input */
+ kbase_csf_firmware_global_input_mask(
+ iface, GLB_REQ,
+ GLB_REQ_ITER_TRACE_ENABLE_MASK,
+ GLB_REQ_ITER_TRACE_ENABLE_MASK);
+ /* Ring global doorbell */
+ kbase_csf_ring_doorbell(kbdev,
+ CSF_KERNEL_DOORBELL_NR);
+
+ ack_timeout = wait_event_timeout(
+ kbdev->csf.event_wait,
+ !((kbase_csf_firmware_global_input_read(
+ iface, GLB_REQ) ^
+ kbase_csf_firmware_global_output(
+ iface, GLB_ACK)) &
+ GLB_REQ_ITER_TRACE_ENABLE_MASK),
+ ack_timeout);
+
+ return ack_timeout ? 0 : -EINVAL;
+
+ }
+ }
+
+ }
+ return 0;
+}
int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
{
init_waitqueue_head(&kbdev->csf.event_wait);
kbdev->csf.interrupt_received = false;
- kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
+
+ kbdev->csf.fw_timeout_ms =
+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
@@ -1721,8 +1786,14 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
}
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+ if (kbase_pm_gpu_sleep_allowed(kbdev))
+ kbdev->csf.gpu_idle_hysteresis_ms /=
+ FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
+ kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
@@ -1851,6 +1922,9 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
if (ret != 0)
goto error;
+ ret = kbase_device_csf_iterator_trace_init(kbdev);
+ if (ret != 0)
+ goto error;
/* Firmware loaded successfully */
release_firmware(firmware);
@@ -2048,30 +2122,20 @@ int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev,
void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
- unsigned long flags;
- int err;
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
dev_dbg(kbdev->dev, "Sending request to enter protected mode");
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
- err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
-
- if (!err) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->protected_mode = true;
- kbase_ipa_protection_mode_switch_event(kbdev);
- kbase_ipa_control_protm_entered(kbdev);
+}
- kbase_csf_scheduler_spin_lock(kbdev, &irq_flags);
- kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
- kbase_csf_scheduler_spin_unlock(kbdev, irq_flags);
+void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
+{
+ int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ if (err) {
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kbdev);
}
}
@@ -2081,12 +2145,38 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ /* Validate there are no on-slot groups when sending the
+ * halt request to firmware.
+ */
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev));
set_global_request(global_iface, GLB_REQ_HALT_MASK);
dev_dbg(kbdev->dev, "Sending request to HALT MCU");
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
+#ifdef KBASE_PM_RUNTIME
+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
+ dev_dbg(kbdev->dev, "Sending sleep request to MCU");
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) &&
+ kbase_csf_firmware_mcu_halted(kbdev));
+}
+#endif
+
int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -2095,6 +2185,7 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
/* Ensure GPU is powered-up until we complete config update.*/
kbase_csf_scheduler_pm_active(kbdev);
+ kbase_csf_scheduler_wait_mcu_active(kbdev);
/* The 'reg_lock' is also taken and is held till the update is
* complete, to ensure the config update gets serialized.
@@ -2288,7 +2379,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
mmu_insert_pages_error:
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(va_reg);
+ kbase_remove_va_region(kbdev, va_reg);
va_region_add_error:
kbase_free_alloced_region(va_reg);
mutex_unlock(&kbdev->csf.reg_lock);
@@ -2320,7 +2411,7 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
{
if (csf_mapping->va_reg) {
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(csf_mapping->va_reg);
+ kbase_remove_va_region(kbdev, csf_mapping->va_reg);
kbase_free_alloced_region(csf_mapping->va_reg);
mutex_unlock(&kbdev->csf.reg_lock);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 60d7065..0edcc30 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -78,9 +78,6 @@
/* Maximum CSs per csg. */
#define MAX_SUPPORTED_STREAMS_PER_GROUP 32
-/* Waiting timeout for status change acknowledgment, in milliseconds */
-#define CSF_FIRMWARE_TIMEOUT_MS (3000) /* Relaxed to 3000ms from 800ms due to Android */
-
struct kbase_device;
@@ -442,13 +439,27 @@ int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout);
/**
* kbase_csf_enter_protected_mode - Send the Global request to firmware to
- * enter protected mode and wait for its
- * completion.
+ * enter protected mode.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * The function must be called with kbdev->csf.scheduler.interrupt_lock held
+ * and it does not wait for the protected mode entry to complete.
*/
void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
+/**
+ * kbase_csf_wait_protected_mode_enter - Wait for the completion of PROTM_ENTER
+ * Global request sent to firmware.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function needs to be called after kbase_csf_wait_protected_mode_enter()
+ * to wait for the protected mode entry to complete. GPU reset is triggered if
+ * the wait is unsuccessful.
+ */
+void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
+
static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
{
return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) ==
@@ -497,6 +508,26 @@ static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
*/
void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev);
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_csf_firmware_trigger_mcu_sleep - Send the command to put MCU in sleep
+ * state.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_is_mcu_in_sleep - Check if sleep request has completed
+ * and MCU has halted.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: true if sleep request has completed, otherwise false.
+ */
+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev);
+#endif
+
/**
* kbase_trigger_firmware_reload - Trigger the reboot of MCU firmware, for the
* cold boot case firmware image would be
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 33ae3f7..e99c968 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -27,6 +27,7 @@
#include "mali_kbase_reset_gpu.h"
#include "mali_kbase_ctx_sched.h"
#include "device/mali_kbase_device.h"
+#include <mali_kbase_hwaccess_time.h>
#include "backend/gpu/mali_kbase_pm_internal.h"
#include "mali_kbase_csf_scheduler.h"
#include "mmu/mali_kbase_mmu.h"
@@ -551,6 +552,8 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete",
req_mask);
err = -ETIMEDOUT;
+
+
}
return err;
@@ -886,7 +889,9 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
{
init_waitqueue_head(&kbdev->csf.event_wait);
kbdev->csf.interrupt_received = false;
- kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
+
+ kbdev->csf.fw_timeout_ms =
+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
@@ -920,8 +925,14 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
}
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+ if (kbase_pm_gpu_sleep_allowed(kbdev))
+ kbdev->csf.gpu_idle_hysteresis_ms /=
+ FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
+ kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
if (ret != 0) {
@@ -1110,15 +1121,21 @@ int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev,
void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
- unsigned long flags;
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
dev_dbg(kbdev->dev, "Sending request to enter protected mode");
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
+{
+ int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
- wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+ if (err) {
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kbdev);
+ }
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@@ -1127,12 +1144,38 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ /* Validate there are no on-slot groups when sending the
+ * halt request to firmware.
+ */
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev));
set_global_request(global_iface, GLB_REQ_HALT_MASK);
dev_dbg(kbdev->dev, "Sending request to HALT MCU");
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
+#ifdef KBASE_PM_RUNTIME
+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
+ dev_dbg(kbdev->dev, "Sending sleep request to MCU");
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) &&
+ kbase_csf_firmware_mcu_halted(kbdev));
+}
+#endif
+
int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -1331,7 +1374,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
mmu_insert_pages_error:
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(va_reg);
+ kbase_remove_va_region(kbdev, va_reg);
va_region_add_error:
kbase_free_alloced_region(va_reg);
mutex_unlock(&kbdev->csf.reg_lock);
@@ -1363,7 +1406,7 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
{
if (csf_mapping->va_reg) {
mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(csf_mapping->va_reg);
+ kbase_remove_va_region(kbdev, csf_mapping->va_reg);
kbase_free_alloced_region(csf_mapping->va_reg);
mutex_unlock(&kbdev->csf.reg_lock);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 96746c6..1815a26 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -50,8 +50,8 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
MAX_TILER_HEAPS);
if (unlikely(heap_nr >= MAX_TILER_HEAPS)) {
- dev_err(kctx->kbdev->dev,
- "No free tiler heap contexts in the pool\n");
+ dev_dbg(kctx->kbdev->dev,
+ "No free tiler heap contexts in the pool");
return 0;
}
@@ -159,6 +159,11 @@ u64 kbase_csf_heap_context_allocator_alloc(
u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
u64 heap_gpu_va = 0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
#ifdef CONFIG_MALI_VECTOR_DUMP
flags |= BASE_MEM_PROT_CPU_RD;
#endif
@@ -169,13 +174,14 @@ u64 kbase_csf_heap_context_allocator_alloc(
* allocate it.
*/
if (!ctx_alloc->region) {
- ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages,
- 0, &flags, &ctx_alloc->gpu_va);
+ ctx_alloc->region =
+ kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+ &ctx_alloc->gpu_va, mmu_sync_info);
}
/* If the pool still isn't allocated then an error occurred. */
if (unlikely(!ctx_alloc->region)) {
- dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n");
+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
} else {
heap_gpu_va = sub_alloc(ctx_alloc);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 4e26a49..8729307 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -34,7 +34,7 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock);
#endif
static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue,
- bool ignore_waits);
+ bool drain_queue);
static void kcpu_queue_process_worker(struct work_struct *data);
@@ -220,7 +220,7 @@ static int kbase_kcpu_jit_allocate_process(
for (i = 0; i < count; i++, info++) {
/* The JIT ID is still in use so fail the allocation */
if (kctx->jit_alloc[info->id]) {
- dev_warn(kctx->kbdev->dev, "JIT ID still in use\n");
+ dev_dbg(kctx->kbdev->dev, "JIT ID still in use");
return -EINVAL;
}
}
@@ -458,7 +458,7 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
int item_err = 0;
if (!kctx->jit_alloc[ids[i]]) {
- dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n");
+ dev_dbg(kctx->kbdev->dev, "invalid JIT free ID");
rc = -EINVAL;
item_err = rc;
} else {
@@ -964,7 +964,7 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
sig_set = *evt > cqs_wait_operation->objs[i].val;
break;
default:
- dev_warn(kbdev->dev,
+ dev_dbg(kbdev->dev,
"Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation);
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
@@ -976,8 +976,9 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
/* Increment evt up to the error_state value depending on the CQS data type */
switch (cqs_wait_operation->objs[i].data_type) {
default:
- dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
- /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
+ /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ fallthrough;
case BASEP_CQS_DATA_TYPE_U32:
evt = (u64 *)((u8 *)evt + sizeof(u32));
break;
@@ -1100,7 +1101,7 @@ static void kbase_kcpu_cqs_set_operation_process(
*evt = cqs_set_operation->objs[i].val;
break;
default:
- dev_warn(kbdev->dev,
+ dev_dbg(kbdev->dev,
"Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
queue->has_error = true;
break;
@@ -1109,8 +1110,9 @@ static void kbase_kcpu_cqs_set_operation_process(
/* Increment evt up to the error_state value depending on the CQS data type */
switch (cqs_set_operation->objs[i].data_type) {
default:
- dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
- /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
+ /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ fallthrough;
case BASEP_CQS_DATA_TYPE_U32:
evt = (u64 *)((u8 *)evt + sizeof(u32));
break;
@@ -1465,8 +1467,8 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
kfree(queue);
} else {
- dev_warn(kctx->kbdev->dev,
- "Attempt to delete a non-existent KCPU queue\n");
+ dev_dbg(kctx->kbdev->dev,
+ "Attempt to delete a non-existent KCPU queue");
mutex_unlock(&kctx->csf.kcpu_queues.lock);
err = -EINVAL;
}
@@ -1525,7 +1527,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
}
static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
- bool ignore_waits)
+ bool drain_queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
bool process_next = true;
@@ -1548,7 +1550,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
- if (ignore_waits) {
+ if (drain_queue) {
kbase_kcpu_fence_wait_cancel(queue,
&cmd->info.fence);
} else {
@@ -1601,7 +1603,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = kbase_kcpu_cqs_wait_process(kbdev, queue,
&cmd->info.cqs_wait);
- if (!status && !ignore_waits) {
+ if (!status && !drain_queue) {
process_next = false;
} else {
/* Either all CQS objects were signaled or
@@ -1623,7 +1625,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue,
&cmd->info.cqs_wait_operation);
- if (!status && !ignore_waits) {
+ if (!status && !drain_queue) {
process_next = false;
} else {
/* Either all CQS objects were signaled or
@@ -1651,22 +1653,25 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
struct kbase_ctx_ext_res_meta *meta = NULL;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
- kbdev, queue);
+ if (!drain_queue) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
+ kbdev, queue);
- kbase_gpu_vm_lock(queue->kctx);
- meta = kbase_sticky_resource_acquire(
- queue->kctx, cmd->info.import.gpu_va);
- kbase_gpu_vm_unlock(queue->kctx);
+ kbase_gpu_vm_lock(queue->kctx);
+ meta = kbase_sticky_resource_acquire(
+ queue->kctx, cmd->info.import.gpu_va);
+ kbase_gpu_vm_unlock(queue->kctx);
- if (meta == NULL) {
- queue->has_error = true;
- dev_warn(kbdev->dev,
- "failed to map an external resource\n");
- }
+ if (meta == NULL) {
+ queue->has_error = true;
+ dev_dbg(
+ kbdev->dev,
+ "failed to map an external resource");
+ }
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(
- kbdev, queue, meta ? 0 : 1);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(
+ kbdev, queue, meta ? 0 : 1);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
@@ -1682,8 +1687,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
if (!ret) {
queue->has_error = true;
- dev_warn(kbdev->dev,
- "failed to release the reference. resource not found\n");
+ dev_dbg(kbdev->dev,
+ "failed to release the reference. resource not found");
}
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
@@ -1703,8 +1708,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
if (!ret) {
queue->has_error = true;
- dev_warn(kbdev->dev,
- "failed to release the reference. resource not found\n");
+ dev_dbg(kbdev->dev,
+ "failed to release the reference. resource not found");
}
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(
@@ -1713,24 +1718,32 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
}
case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:
{
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
- kbdev, queue);
-
- status = kbase_kcpu_jit_allocate_process(queue, cmd);
- if (status == -EAGAIN) {
- process_next = false;
+ if (drain_queue) {
+ /* We still need to call this function to clean the JIT alloc info up */
+ kbase_kcpu_jit_allocate_finish(queue, cmd);
} else {
- if (status != 0)
- queue->has_error = true;
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
+ kbdev, queue);
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
- kbdev, queue, &cmd->info.jit_alloc,
- status);
+ status = kbase_kcpu_jit_allocate_process(queue,
+ cmd);
+ if (status == -EAGAIN) {
+ process_next = false;
+ } else {
+ if (status != 0)
+ queue->has_error = true;
- kbase_kcpu_jit_allocate_finish(queue, cmd);
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
+ kbdev, queue,
+ &cmd->info.jit_alloc, status);
+
+ kbase_kcpu_jit_allocate_finish(queue,
+ cmd);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
kbdev, queue);
+ }
}
+
break;
}
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
@@ -1748,56 +1761,39 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
struct kbase_suspend_copy_buffer *sus_buf =
cmd->info.suspend_buf_copy.sus_buf;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(
- kbdev, queue);
+ if (!drain_queue) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(
+ kbdev, queue);
- status = kbase_csf_queue_group_suspend_process(
+ status = kbase_csf_queue_group_suspend_process(
queue->kctx, sus_buf,
cmd->info.suspend_buf_copy.group_handle);
- if (status)
- queue->has_error = true;
+ if (status)
+ queue->has_error = true;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
- kbdev, queue, status);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
+ kbdev, queue, status);
- if (!sus_buf->cpu_alloc) {
- int i;
+ if (!sus_buf->cpu_alloc) {
+ int i;
- for (i = 0; i < sus_buf->nr_pages; i++)
- put_page(sus_buf->pages[i]);
- } else {
- kbase_mem_phy_alloc_kernel_unmapped(
- sus_buf->cpu_alloc);
- kbase_mem_phy_alloc_put(sus_buf->cpu_alloc);
+ for (i = 0; i < sus_buf->nr_pages; i++)
+ put_page(sus_buf->pages[i]);
+ } else {
+ kbase_mem_phy_alloc_kernel_unmapped(
+ sus_buf->cpu_alloc);
+ kbase_mem_phy_alloc_put(
+ sus_buf->cpu_alloc);
+ }
}
kfree(sus_buf->pages);
kfree(sus_buf);
break;
}
-#if MALI_UNIT_TEST
- case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: {
- u64 time = ktime_get_raw_ns();
- void *target_page = kmap(*cmd->info.sample_time.page);
-
- if (target_page) {
- memcpy(target_page +
- cmd->info.sample_time.page_offset,
- &time, sizeof(time));
- kunmap(*cmd->info.sample_time.page);
- } else {
- dev_warn(kbdev->dev,
- "Could not kmap target page\n");
- queue->has_error = true;
- }
- put_page(*cmd->info.sample_time.page);
- kfree(cmd->info.sample_time.page);
- break;
- }
-#endif /* MALI_UNIT_TEST */
default:
- dev_warn(kbdev->dev,
- "Unrecognized command type\n");
+ dev_dbg(kbdev->dev,
+ "Unrecognized command type");
break;
} /* switch */
@@ -1933,14 +1929,6 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
cmd->info.suspend_buf_copy.group_handle);
break;
-#if MALI_UNIT_TEST
- case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME:
- /*
- * This is test-only KCPU command, no need to have a timeline
- * entry
- */
- break;
-#endif /* MALI_UNIT_TEST */
}
}
@@ -1966,8 +1954,8 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
* in the set.
*/
if (enq->nr_commands != 1) {
- dev_err(kctx->kbdev->dev,
- "More than one commands enqueued\n");
+ dev_dbg(kctx->kbdev->dev,
+ "More than one commands enqueued");
return -EINVAL;
}
@@ -2081,40 +2069,9 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
&command.info.suspend_buf_copy,
kcpu_cmd);
break;
-#if MALI_UNIT_TEST
- case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: {
- int const page_cnt = 1;
-
- kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME;
- kcpu_cmd->info.sample_time.page_addr =
- command.info.sample_time.time & PAGE_MASK;
- kcpu_cmd->info.sample_time.page_offset =
- command.info.sample_time.time & ~PAGE_MASK;
- kcpu_cmd->info.sample_time.page = kcalloc(
- page_cnt, sizeof(struct page *), GFP_KERNEL);
- if (!kcpu_cmd->info.sample_time.page) {
- ret = -ENOMEM;
- } else {
- int pinned_pages = get_user_pages_fast(
- kcpu_cmd->info.sample_time.page_addr,
- page_cnt, 1,
- kcpu_cmd->info.sample_time.page);
-
- if (pinned_pages < 0) {
- ret = pinned_pages;
- kfree(kcpu_cmd->info.sample_time.page);
- } else if (pinned_pages != page_cnt) {
- ret = -EINVAL;
- kfree(kcpu_cmd->info.sample_time.page);
- }
- }
-
- break;
- }
-#endif /* MALI_UNIT_TEST */
default:
- dev_warn(queue->kctx->kbdev->dev,
- "Unknown command type %u\n", command.type);
+ dev_dbg(queue->kctx->kbdev->dev,
+ "Unknown command type %u", command.type);
ret = -EINVAL;
break;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 9964f20..6300569 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -196,13 +196,6 @@ struct kbase_kcpu_command_group_suspend_info {
u8 group_handle;
};
-#if MALI_UNIT_TEST
-struct kbase_kcpu_command_sample_time_info {
- u64 page_addr;
- u64 page_offset;
- struct page **page;
-};
-#endif /* MALI_UNIT_TEST */
/**
* struct kbase_cpu_command - Command which is to be part of the kernel
@@ -235,9 +228,6 @@ struct kbase_kcpu_command {
struct kbase_kcpu_command_jit_alloc_info jit_alloc;
struct kbase_kcpu_command_jit_free_info jit_free;
struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
-#if MALI_UNIT_TEST
- struct kbase_kcpu_command_sample_time_info sample_time;
-#endif /* MALI_UNIT_TEST */
} info;
};
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index f6d61d7..7b63132 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -461,11 +461,14 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
{
struct kbase_device *kbdev = container_of(data, struct kbase_device,
csf.reset.work);
+ bool gpu_sleep_mode_active = false;
bool firmware_inited;
unsigned long flags;
int err = 0;
const enum kbase_csf_reset_gpu_state initial_reset_state =
atomic_read(&kbdev->csf.reset.state);
+ const bool silent =
+ kbase_csf_reset_state_is_silent(initial_reset_state);
/* Ensure any threads (e.g. executing the CSF scheduler) have finished
* using the HW
@@ -474,14 +477,30 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
firmware_inited = kbdev->csf.firmware_inited;
+#ifdef KBASE_PM_RUNTIME
+ gpu_sleep_mode_active = kbdev->pm.backend.gpu_sleep_mode_active;
+#endif
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- if (!kbase_pm_context_active_handle_suspend(kbdev,
- KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
- bool silent =
- kbase_csf_reset_state_is_silent(initial_reset_state);
+ if (unlikely(gpu_sleep_mode_active)) {
+#ifdef KBASE_PM_RUNTIME
+ /* As prior to GPU reset all on-slot groups are suspended,
+ * need to wake up the MCU from sleep.
+ * No pm active reference is taken here since GPU is in sleep
+ * state and both runtime & system suspend synchronize with the
+ * GPU reset before they wake up the GPU to suspend on-slot
+ * groups. GPUCORE-29850 would add the proper handling.
+ */
+ kbase_pm_lock(kbdev);
+ if (kbase_pm_force_mcu_wakeup_after_sleep(kbdev))
+ dev_warn(kbdev->dev, "Wait for MCU wake up failed on GPU reset");
+ kbase_pm_unlock(kbdev);
err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
+#endif
+ } else if (!kbase_pm_context_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+ err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
kbase_pm_context_idle(kbdev);
}
@@ -599,6 +618,8 @@ int kbase_reset_gpu_wait(struct kbase_device *kbdev)
if (!remaining) {
dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete");
+
+
return -ETIMEDOUT;
} else if (atomic_read(&kbdev->csf.reset.state) ==
KBASE_CSF_RESET_GPU_FAILED) {
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 8109570..f22a5d7 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -30,14 +30,16 @@
#include <linux/export.h>
#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
-/* Waiting timeout for scheduler state change for descheduling a CSG */
-#define CSG_SCHED_STOP_TIMEOUT_MS (50)
-
-#define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS
+/* This decides the upper limit on the waiting time for the Scheduler
+ * to exit the sleep state. Usually the value of autosuspend_delay is
+ * expected to be around 100 milli seconds.
+ */
+#define MAX_AUTO_SUSPEND_DELAY_MS (5000)
/* Maximum number of endpoints which may run tiler jobs. */
#define CSG_TILER_MAX ((u8)1)
@@ -75,10 +77,8 @@
/* CS suspended and is wait for a CQS condition */
#define CS_WAIT_SYNC_FLAG (1 << 1)
-/* 2 GPU address space slots are reserved for MCU and privileged context for HW
- * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293.
- */
-#define NUM_RESERVED_AS_SLOTS (2)
+/* A GPU address space slot is reserved for MCU. */
+#define NUM_RESERVED_AS_SLOTS (1)
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
@@ -94,14 +94,116 @@ static struct kbase_queue_group *get_tock_top_group(
static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
static int suspend_active_queue_groups(struct kbase_device *kbdev,
unsigned long *slot_mask);
+static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
+ bool system_suspend);
static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
+#ifdef KBASE_PM_RUNTIME
+/**
+ * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
+ * sleeping state.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function waits until the Scheduler has exited the sleep state and
+ * it is called when an on-slot group is terminated or when the suspend
+ * buffer of an on-slot group needs to be captured.
+ *
+ * Return: 0 when the wait is successful, otherwise an error code.
+ */
+static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
+ unsigned int sleep_exit_wait_time;
+ long remaining;
+ int ret = 0;
+
+ lockdep_assert_held(&scheduler->lock);
+ WARN_ON(scheduler->state != SCHED_SLEEPING);
+
+ /* No point in waiting if autosuspend_delay value is negative.
+ * For the negative value of autosuspend_delay Driver will directly
+ * go for the suspend of Scheduler, but the autosuspend_delay value
+ * could have been changed after the sleep was initiated.
+ */
+ if (autosuspend_delay < 0)
+ return -EINVAL;
+
+ if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
+ autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
+
+ /* Usually Scheduler would remain in sleeping state until the
+ * auto-suspend timer expires and all active CSGs are suspended.
+ */
+ sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
+
+ remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
+
+ while ((scheduler->state == SCHED_SLEEPING) && !ret) {
+ mutex_unlock(&scheduler->lock);
+ remaining = wait_event_timeout(
+ kbdev->csf.event_wait,
+ (scheduler->state != SCHED_SLEEPING),
+ remaining);
+ mutex_lock(&scheduler->lock);
+ if (!remaining && (scheduler->state == SCHED_SLEEPING))
+ ret = -ETIMEDOUT;
+ }
+
+ return ret;
+}
+
+/**
+ * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function will force the Scheduler to exit the sleep state by doing the
+ * wake up of MCU and suspension of on-slot groups. It is called at the time of
+ * system suspend.
+ */
+static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ unsigned long flags;
+ int ret;
+
+ lockdep_assert_held(&scheduler->lock);
+ WARN_ON(scheduler->state != SCHED_SLEEPING);
+ WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
+
+ kbase_pm_lock(kbdev);
+ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
+ if (ret)
+ dev_warn(kbdev->dev, "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
+ kbase_backend_get_cycle_cnt(kbdev));
+ kbase_pm_unlock(kbdev);
+
+ suspend_active_groups_on_powerdown(kbdev, true);
+
+ kbase_pm_lock(kbdev);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+ if (ret)
+ dev_warn(kbdev->dev, "[%llu] Wait for pm state change failed on forced scheduler suspend",
+ kbase_backend_get_cycle_cnt(kbdev));
+ kbase_pm_unlock(kbdev);
+
+ scheduler->state = SCHED_SUSPENDED;
+}
+#endif
+
/**
* tick_timer_callback() - Callback function for the scheduling tick hrtimer
*
- * @timer: Pointer to the device
+ * @timer: Pointer to the scheduling tick hrtimer
*
* This function will enqueue the scheduling tick work item for immediate
* execution, if it has not been queued already.
@@ -173,14 +275,10 @@ static void cancel_tick_timer(struct kbase_device *kbdev)
static void enqueue_tick_work(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- unsigned long flags;
lockdep_assert_held(&scheduler->lock);
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- WARN_ON(scheduler->tick_timer_active);
- queue_work(scheduler->wq, &scheduler->tick_work);
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ kbase_csf_scheduler_invoke_tick(kbdev);
}
static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
@@ -288,11 +386,11 @@ static void scheduler_doorbell_init(struct kbase_device *kbdev)
WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
}
-static u32 get_nr_active_csgs(struct kbase_device *kbdev)
+u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
kbdev->csf.global_iface.group_num);
@@ -300,27 +398,16 @@ static u32 get_nr_active_csgs(struct kbase_device *kbdev)
return nr_active_csgs;
}
-/**
- * csgs_active - returns true if any of CSG slots are in use
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
- * Return: the interface is actively engaged flag.
- */
-static bool csgs_active(struct kbase_device *kbdev)
+u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
+ unsigned long flags;
- mutex_lock(&kbdev->csf.scheduler.lock);
- nr_active_csgs = get_nr_active_csgs(kbdev);
- mutex_unlock(&kbdev->csf.scheduler.lock);
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+ nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
- /* Right now if any of the CSG interfaces are in use
- * then we need to assume that there is some work pending.
- * In future when we have IDLE notifications from firmware implemented
- * then we would have a better idea of the pending work.
- */
- return (nr_active_csgs != 0);
+ return nr_active_csgs;
}
/**
@@ -395,7 +482,9 @@ static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
if (!remaining)
- dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
jiffies_to_msecs(remaining));
@@ -483,20 +572,198 @@ static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
+/**
+ * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
+ * Scheduler
+ *
+ * @kbdev: Pointer to the device
+ * @suspend_handler: Handler code for how to handle a suspend that might occur.
+ *
+ * This function is usually called when Scheduler needs to be activated.
+ * The PM reference count is acquired for the Scheduler and the power on
+ * of GPU is initiated.
+ */
+static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
+ enum kbase_pm_suspend_handler suspend_handler)
+{
+ unsigned long flags;
+ u32 prev_count;
+ int ret = 0;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == U32_MAX))
+ kbdev->csf.scheduler.pm_active_count++;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* On 0 => 1, make a pm_ctx_active request */
+ if (!prev_count) {
+ ret = kbase_pm_context_active_handle_suspend(kbdev,
+ suspend_handler);
+ if (ret) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->csf.scheduler.pm_active_count--;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+ }
+
+ return ret;
+}
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
+ * Scheduler
+ *
+ * @kbdev: Pointer to the device
+ * @flags: flags containing previous interrupt state
+ *
+ * This function is called when Scheduler needs to be activated from the
+ * sleeping state.
+ * The PM reference count is acquired for the Scheduler and the wake up of
+ * MCU is initiated. It resets the flag that indicates to the MCU state
+ * machine that MCU needs to be put in sleep state.
+ *
+ * Note: This function shall be called with hwaccess lock held and it will
+ * release that lock.
+ *
+ * Return: zero when the PM reference was taken and non-zero when the
+ * system is being suspending/suspended.
+ */
+static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
+ unsigned long flags)
+{
+ u32 prev_count;
+ int ret = 0;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == U32_MAX))
+ kbdev->csf.scheduler.pm_active_count++;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* On 0 => 1, make a pm_ctx_active request */
+ if (!prev_count) {
+ ret = kbase_pm_context_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (ret)
+ kbdev->csf.scheduler.pm_active_count--;
+ else
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+
+ return ret;
+}
+#endif
+
+/**
+ * scheduler_pm_idle() - Release the PM reference count held by Scheduler
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is usually called after Scheduler is suspended.
+ * The PM reference count held by the Scheduler is released to trigger the
+ * power down of GPU.
+ */
+static void scheduler_pm_idle(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ u32 prev_count;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == 0))
+ kbdev->csf.scheduler.pm_active_count--;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (prev_count == 1)
+ kbase_pm_context_idle(kbdev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * scheduler_pm_idle_before_sleep() - Release the PM reference count and
+ * trigger the tranistion to sleep state.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called on the GPU idle notification. It releases the
+ * Scheduler's PM reference count and sets the flag to indicate to the
+ * MCU state machine that MCU needs to be put in sleep state.
+ */
+static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ u32 prev_count;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == 0))
+ kbdev->csf.scheduler.pm_active_count--;
+ kbdev->pm.backend.gpu_sleep_mode_active = true;
+ kbdev->pm.backend.exit_gpu_sleep_mode = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (prev_count == 1)
+ kbase_pm_context_idle(kbdev);
+}
+#endif
+
static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ int ret;
lockdep_assert_held(&scheduler->lock);
+ if ((scheduler->state != SCHED_SUSPENDED) &&
+ (scheduler->state != SCHED_SLEEPING))
+ return;
+
if (scheduler->state == SCHED_SUSPENDED) {
- dev_dbg(kbdev->dev, "Re-activating the Scheduler");
- kbase_csf_scheduler_pm_active(kbdev);
- scheduler->state = SCHED_INACTIVE;
+ dev_dbg(kbdev->dev,
+ "Re-activating the Scheduler after suspend");
+ ret = scheduler_pm_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+ } else {
+#ifdef KBASE_PM_RUNTIME
+ unsigned long flags;
- if (kick)
- scheduler_enable_tick_timer_nolock(kbdev);
+ dev_dbg(kbdev->dev,
+ "Re-activating the Scheduler out of sleep");
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ ret = scheduler_pm_active_after_sleep(kbdev, flags);
+ /* hwaccess_lock is released in the previous function call. */
+#endif
}
+
+ if (ret) {
+ /* GPUCORE-29850 would add the handling for the case where
+ * Scheduler could not be activated due to system suspend.
+ */
+ dev_info(kbdev->dev,
+ "Couldn't wakeup Scheduler due to system suspend");
+ return;
+ }
+
+ scheduler->state = SCHED_INACTIVE;
+
+ if (kick)
+ scheduler_enable_tick_timer_nolock(kbdev);
}
static void scheduler_suspend(struct kbase_device *kbdev)
@@ -507,7 +774,7 @@ static void scheduler_suspend(struct kbase_device *kbdev)
if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
dev_dbg(kbdev->dev, "Suspending the Scheduler");
- kbase_csf_scheduler_pm_idle(kbdev);
+ scheduler_pm_idle(kbdev);
scheduler->state = SCHED_SUSPENDED;
}
}
@@ -542,11 +809,30 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group)
/* If scheduler is not suspended and the given group's
* static priority (reflected by the scan_seq_num) is inside
- * the current tick slot-range, schedules an async tock.
+ * the current tick slot-range, or there are some on_slot
+ * idle groups, schedule an async tock.
*/
- if (scheduler->state != SCHED_SUSPENDED &&
- group->scan_seq_num < scheduler->num_csg_slots_for_tick)
- schedule_in_cycle(group, true);
+ if (scheduler->state != SCHED_SUSPENDED) {
+ unsigned long flags;
+ int n_idle;
+ int n_used;
+ int n_slots =
+ group->kctx->kbdev->csf.global_iface.group_num;
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
+ n_slots);
+ n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
+ n_slots);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock,
+ flags);
+
+ if (n_idle ||
+ n_used < scheduler->num_csg_slots_for_tick ||
+ group->scan_seq_num <
+ scheduler->num_csg_slots_for_tick)
+ schedule_in_cycle(group, true);
+ }
} else
return;
@@ -586,6 +872,14 @@ int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group)
return slot_num;
}
+/* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
+ *
+ * @group: GPU queue group to be checked
+ *
+ * This function needs to be called with scheduler's lock held
+ *
+ * Return: true if @group is on slot.
+ */
static bool kbasep_csf_scheduler_group_is_on_slot_locked(
struct kbase_queue_group *group)
{
@@ -653,11 +947,13 @@ static int halt_stream_sync(struct kbase_queue *queue)
== CS_ACK_STATE_START), remaining);
if (!remaining) {
- dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
csi_index, group->handle, group->csg_nr);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
+
return -ETIMEDOUT;
}
@@ -678,7 +974,8 @@ static int halt_stream_sync(struct kbase_queue *queue)
== CS_ACK_STATE_STOP), remaining);
if (!remaining) {
- dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
queue->csi_index, group->handle, group->csg_nr);
/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
@@ -686,6 +983,8 @@ static int halt_stream_sync(struct kbase_queue *queue)
*/
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
+
+
}
return (remaining) ? 0 : -ETIMEDOUT;
}
@@ -739,6 +1038,8 @@ static int sched_halt_stream(struct kbase_queue *queue)
long remaining;
int slot;
int err = 0;
+ const u32 group_schedule_timeout =
+ 20 * kbdev->csf.scheduler.csg_scheduling_period_ms;
if (WARN_ON(!group))
return -EINVAL;
@@ -782,8 +1083,7 @@ retry:
*/
remaining = wait_event_timeout(
kbdev->csf.event_wait, can_halt_stream(kbdev, group),
- kbase_csf_timeout_in_jiffies(
- 20 * kbdev->csf.scheduler.csg_scheduling_period_ms));
+ kbase_csf_timeout_in_jiffies(group_schedule_timeout));
mutex_lock(&scheduler->lock);
@@ -845,26 +1145,62 @@ retry:
kbase_csf_firmware_cs_output(
stream, CS_ACK)) ==
CS_ACK_STATE_STOP),
- kbdev->csf.fw_timeout_ms);
+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
if (!remaining) {
dev_warn(kbdev->dev,
- "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d",
+ "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
queue->csi_index,
group->handle, group->csg_nr);
+
+
err = -ETIMEDOUT;
}
}
}
} else if (!remaining) {
- dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d",
- group->handle, queue->csi_index);
+ dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
+ kbase_backend_get_cycle_cnt(kbdev),
+ group->handle, queue->csi_index,
+ group_schedule_timeout);
+
+
err = -ETIMEDOUT;
}
return err;
}
+/**
+ * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
+ * queue needs to be stopped.
+ *
+ * @queue: Pointer the GPU command queue
+ *
+ * This function is called when the CSI to which GPU queue is bound needs to
+ * be stopped. For that the corresponding queue group needs to be resident on
+ * the CSG slot and MCU firmware should be running. So this function makes the
+ * Scheduler exit the sleeping or suspended state.
+ */
+static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+
+ scheduler_wakeup(kbdev, true);
+
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
+ queue->csi_index, queue->group->handle,
+ queue->kctx->tgid, queue->kctx->id,
+ queue->group->csg_nr);
+ }
+}
+
int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
@@ -890,7 +1226,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
/* Since the group needs to be resumed in order to stop the queue,
* check if GPU needs to be powered up.
*/
- scheduler_wakeup(kbdev, true);
+ scheduler_activate_on_queue_stop(queue);
if ((slot >= 0) &&
(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
@@ -1228,7 +1564,9 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
csg_slot_running(kbdev, slot), remaining);
if (!remaining)
dev_warn(kbdev->dev,
- "slot %d timed out on up-running\n", slot);
+ "[%llu] slot %d timeout (%d ms) on up-running\n",
+ kbase_backend_get_cycle_cnt(kbdev),
+ slot, kbdev->csf.fw_timeout_ms);
}
if (csg_slot_running(kbdev, slot)) {
@@ -1251,6 +1589,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
csg_slot[slot].trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
kbase_csf_ring_csg_doorbell(kbdev, slot);
}
}
@@ -1399,37 +1739,6 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
return is_waiting;
}
-/**
- * Calculate how far in the future an event should be scheduled.
- *
- * The objective of this function is making sure that a minimum period of
- * time is guaranteed between handling two consecutive events.
- *
- * This function guarantees a minimum period of time between two consecutive
- * events: given the minimum period and the distance between the current time
- * and the last event, the function returns the difference between the two.
- * However, if more time than the minimum period has already elapsed
- * since the last event, the function will return 0 to schedule work to handle
- * the event with the lowest latency possible.
- *
- * @last_event: Timestamp of the last event, in jiffies.
- * @time_now: Timestamp of the new event to handle, in jiffies.
- * Must be successive to last_event.
- * @period: Minimum period between two events, in jiffies.
- *
- * Return: Time to delay work to handle the current event, in jiffies
- */
-static unsigned long get_schedule_delay(unsigned long last_event,
- unsigned long time_now,
- unsigned long period)
-{
- const unsigned long t_distance = time_now - last_event;
- const unsigned long delay_t = (t_distance < period) ?
- (period - t_distance) : 0;
-
- return delay_t;
-}
-
static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
{
struct kbase_context *kctx = group->kctx;
@@ -1446,13 +1755,10 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
*/
if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) &&
!scheduler->tock_pending_request) {
- const unsigned long delay =
- get_schedule_delay(scheduler->last_schedule, jiffies,
- CSF_SCHEDULER_TIME_TOCK_JIFFIES);
scheduler->tock_pending_request = true;
dev_dbg(kbdev->dev, "Kicking async for group %d\n",
group->handle);
- mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay);
+ mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
}
}
@@ -1494,7 +1800,8 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
(scheduler->total_runnable_grps == 1 ||
- scheduler->state == SCHED_SUSPENDED)) {
+ scheduler->state == SCHED_SUSPENDED ||
+ scheduler->state == SCHED_SLEEPING)) {
dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
/* Fire a scheduling to start the time-slice */
enqueue_tick_work(kbdev);
@@ -1516,6 +1823,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *new_head_grp;
struct list_head *list =
&kctx->csf.sched.runnable_groups[group->priority];
+ unsigned long flags;
lockdep_assert_held(&scheduler->lock);
@@ -1524,6 +1832,30 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
group->run_state = run_state;
list_del_init(&group->link);
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ /* The below condition will be true when the group running in protected
+ * mode is being terminated but the protected mode exit interrupt was't
+ * received. This can happen if the FW got stuck during protected mode
+ * for some reason (like GPU page fault or some internal error).
+ * In normal cases FW is expected to send the protected mode exit
+ * interrupt before it handles the CSG termination request.
+ */
+ if (unlikely(scheduler->active_protm_grp == group)) {
+ /* CSG slot cleanup should have happened for the pmode group */
+ WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
+ /* Initiate a GPU reset, in case it wasn't initiated yet,
+ * in order to rectify the anomaly.
+ */
+ if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kctx->kbdev);
+
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_EXIT_PROTM,
+ scheduler->active_protm_grp, 0u);
+ scheduler->active_protm_grp = NULL;
+ }
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
if (scheduler->top_grp == group) {
/*
* Note: this disables explicit rotation in the next scheduling
@@ -2025,6 +2357,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
tiler_mask & U32_MAX);
+ /* Register group UID with firmware */
+ kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
+ group->group_uid);
ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
@@ -2077,8 +2412,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
csg_slot->priority = prio;
/* Trace the programming of the CSG on the slot */
- KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev,
- kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
+ group->handle, slot);
dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
group->handle, kctx->tgid, kctx->id, slot, prio);
@@ -2175,11 +2511,14 @@ static int term_group_sync(struct kbase_queue_group *group)
csg_slot_stopped_locked(kbdev, group->csg_nr), remaining);
if (!remaining) {
- dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d",
+ dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
+
+
err = -ETIMEDOUT;
}
@@ -2190,46 +2529,70 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- long remaining =
- kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS);
- bool force = false;
+ bool on_slot;
kbase_reset_gpu_assert_failed_or_prevented(kbdev);
lockdep_assert_held(&group->kctx->csf.lock);
mutex_lock(&scheduler->lock);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
- while (queue_group_scheduled_locked(group)) {
- u32 saved_state = scheduler->state;
-
- if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
- sched_evict_group(group, false, true);
- } else if (saved_state == SCHED_INACTIVE || force) {
- bool as_faulty;
-
- term_group_sync(group);
- /* Treat the csg been terminated */
- as_faulty = cleanup_csg_slot(group);
- /* remove from the scheduler list */
- sched_evict_group(group, as_faulty, false);
- }
+ if (!queue_group_scheduled_locked(group))
+ goto unlock;
- /* waiting scheduler state to change */
- if (queue_group_scheduled_locked(group)) {
- mutex_unlock(&scheduler->lock);
- remaining = wait_event_timeout(
- kbdev->csf.event_wait,
- saved_state != scheduler->state,
- remaining);
- if (!remaining) {
- dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d",
- group->handle, group->csg_nr);
- force = true;
- }
- mutex_lock(&scheduler->lock);
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
+
+#ifdef KBASE_PM_RUNTIME
+ /* If the queue group is on slot and Scheduler is in SLEEPING state,
+ * then we need to wait here for Scheduler to exit the sleep state
+ * (i.e. wait for the runtime suspend or power down of GPU). This would
+ * be better than aborting the power down. The group will be suspended
+ * anyways on power down, so won't have to send the CSG termination
+ * request to FW.
+ */
+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
+ if (wait_for_scheduler_to_exit_sleep(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "Wait for scheduler to exit sleep state timedout when terminating group %d of context %d_%d on slot %d",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
+
+ scheduler_wakeup(kbdev, true);
+
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev))
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
}
+
+ /* Check the group state again as scheduler lock would have been
+ * released when waiting for the exit from SLEEPING state.
+ */
+ if (!queue_group_scheduled_locked(group))
+ goto unlock;
+
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
+ }
+#endif
+ if (!on_slot) {
+ sched_evict_group(group, false, true);
+ } else {
+ bool as_faulty;
+
+ term_group_sync(group);
+ /* Treat the csg been terminated */
+ as_faulty = cleanup_csg_slot(group);
+ /* remove from the scheduler list */
+ sched_evict_group(group, as_faulty, false);
}
+ WARN_ON(queue_group_scheduled_locked(group));
+
+unlock:
mutex_unlock(&scheduler->lock);
}
@@ -2684,9 +3047,11 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
*/
dev_warn(
kbdev->dev,
- "Group %d of context %d_%d on slot %u failed to suspend",
+ "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
+ kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid,
- group->kctx->id, i);
+ group->kctx->id, i,
+ kbdev->csf.fw_timeout_ms);
/* The group has failed suspension, stop
* further examination.
@@ -2784,7 +3149,9 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
}
} else {
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
num_groups, slot_mask);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
@@ -2904,9 +3271,12 @@ static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev,
if (remaining)
bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
- else
+ else {
+
+
/* Timed-out on the wait */
return -ETIMEDOUT;
+ }
}
return 0;
@@ -2929,7 +3299,9 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
*/
dev_warn(
kbdev->dev,
- "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
+ "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
slot_mask[0]);
}
}
@@ -3075,7 +3447,11 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev);
+ /* Check if the previous transition to enter & exit the protected
+ * mode has completed or not.
+ */
+ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
+ kbdev->protected_mode;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
protm_in_use);
@@ -3123,8 +3499,10 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
input_grp, 0u);
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
kbase_csf_enter_protected_mode(kbdev);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ kbase_csf_wait_protected_mode_enter(kbdev);
return;
}
}
@@ -3433,7 +3811,9 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
dev_warn(
kbdev->dev,
- "Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
+ "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
csg_bitmap[0]);
/* Store the bitmap of timed out slots */
@@ -3576,7 +3956,7 @@ static struct kbase_queue_group *get_tock_top_group(
}
static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
- bool is_suspend)
+ bool system_suspend)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
@@ -3587,15 +3967,19 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
/* The suspend of CSGs failed, trigger the GPU reset and wait
* for it to complete to be in a deterministic state.
*/
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
kbdev->csf.global_iface.group_num, slot_mask);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
- if (is_suspend) {
+ if (system_suspend) {
mutex_unlock(&scheduler->lock);
+ kbase_reset_gpu_allow(kbdev);
kbase_reset_gpu_wait(kbdev);
+ kbase_reset_gpu_prevent_and_wait(kbdev);
mutex_lock(&scheduler->lock);
}
return -1;
@@ -3604,7 +3988,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
/* Check if the groups became active whilst the suspend was ongoing,
* but only for the case where the system suspend is not in progress
*/
- if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
+ if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
return -1;
return 0;
@@ -3618,7 +4002,8 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
lockdep_assert_held(&scheduler->lock);
- if (scheduler->state == SCHED_SUSPENDED)
+ if ((scheduler->state == SCHED_SUSPENDED) ||
+ (scheduler->state == SCHED_SLEEPING))
return false;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -3639,12 +4024,66 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
return suspend;
}
+#ifdef KBASE_PM_RUNTIME
+/**
+ * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
+ * becoming idle.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called on GPU idle notification to trigger the transition of
+ * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
+ * turned off. Scheduler's state is changed to sleeping and all the active queue
+ * groups remain on the CSG slots.
+ */
+static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ dev_dbg(kbdev->dev,
+ "Scheduler to be put to sleep on GPU becoming idle");
+ cancel_tick_timer(kbdev);
+ scheduler_pm_idle_before_sleep(kbdev);
+ scheduler->state = SCHED_SLEEPING;
+}
+#endif
+
+/**
+ * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
+ * becoming idle.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called on GPU idle notification to trigger the power down of
+ * GPU. Scheduler's state is changed to suspended and all the active queue
+ * groups are suspended before halting the MCU firmware.
+ */
+static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
+{
+ int ret = suspend_active_groups_on_powerdown(kbdev, false);
+
+ if (ret) {
+ dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
+ atomic_read(
+ &kbdev->csf.scheduler.non_idle_offslot_grps));
+ /* Bring forward the next tick */
+ kbase_csf_scheduler_advance_tick(kbdev);
+ return false;
+ }
+
+ dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
+ scheduler_suspend(kbdev);
+ cancel_tick_timer(kbdev);
+ return true;
+}
+
static void gpu_idle_worker(struct work_struct *work)
{
struct kbase_device *kbdev = container_of(
work, struct kbase_device, csf.scheduler.gpu_idle_work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- bool reset_active = false;
bool scheduler_is_idle_suspendable = false;
bool all_groups_suspended = false;
@@ -3664,27 +4103,22 @@ static void gpu_idle_worker(struct work_struct *work)
/* Cycle completed, disable the firmware idle timer */
disable_gpu_idle_fw_timer(kbdev);
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
- reset_active = kbase_reset_gpu_is_active(kbdev);
- if (scheduler_is_idle_suspendable && !reset_active) {
- all_groups_suspended =
- !suspend_active_groups_on_powerdown(kbdev, false);
-
- if (all_groups_suspended) {
- dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now");
- scheduler_suspend(kbdev);
- cancel_tick_timer(kbdev);
- } else {
- dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
- atomic_read(&scheduler->non_idle_offslot_grps));
- /* Bring forward the next tick */
- kbase_csf_scheduler_advance_tick(kbdev);
- }
+ if (scheduler_is_idle_suspendable) {
+#ifdef KBASE_PM_RUNTIME
+ if (kbase_pm_gpu_sleep_allowed(kbdev) &&
+ scheduler->total_runnable_grps)
+ scheduler_sleep_on_idle(kbdev);
+ else
+#endif
+ all_groups_suspended = scheduler_suspend_on_idle(kbdev);
}
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
- __ENCODE_KTRACE_INFO(reset_active, scheduler_is_idle_suspendable, all_groups_suspended));
+ __ENCODE_KTRACE_INFO(false,
+ scheduler_is_idle_suspendable,
+ all_groups_suspended));
#undef __ENCODE_KTRACE_INFO
}
@@ -3777,32 +4211,151 @@ static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
enable_gpu_idle_fw_timer(kbdev);
}
-static void schedule_actions(struct kbase_device *kbdev)
+/**
+ * keep_lru_on_slots() - Check the condition for LRU is met.
+ *
+ * This function tries to maintain the Last-Recent-Use case on slots, when
+ * the scheduler has no non-idle off-slot CSGs for a replacement
+ * consideration. This effectively extends the previous scheduling results
+ * for the new one. That is, the last recent used CSGs are retained on slots
+ * for the new tick/tock action.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
+ * otherwise false.
+ */
+static bool keep_lru_on_slots(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ bool keep_lru = false;
+ int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
+ kbdev->csf.global_iface.group_num);
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ /* All on-slots are idle, no non-idle off-slot CSGs available
+ * for considering a meaningful change. Set keep_lru.
+ */
+ keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
+
+ if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
+ scheduler->gpu_idle_fw_timer_enabled = true;
+ kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+ }
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
+ keep_lru, on_slots);
+ }
+
+ return keep_lru;
+}
+
+/**
+ * prepare_fast_local_tock() - making preparation arrangement for exercizing
+ * a fast local tock inside scheduling-actions.
+ *
+ * The function assumes that a scheduling action of firing a fast local tock
+ * call (i.e. an equivalent tock action without dropping the lock) is desired
+ * if there are idle onslot CSGs. The function updates those affected CSGs'
+ * run-state as a preparation. This should only be called from inside the
+ * schedule_actions(), where the previous idle-flags are still considered to
+ * be reflective, following its earlier idle confirmation operational call,
+ * plus some potential newly idle CSGs in the scheduling action committing
+ * steps.
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * Return: number of on-slots CSGs that can be considered for replacing.
+ */
+static int prepare_fast_local_tock(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u32 num_groups = kbdev->csf.global_iface.group_num;
+ unsigned long flags, i;
+ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
+
+ lockdep_assert_held(&scheduler->lock);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ /* Marking the flagged idle CSGs' run state to IDLE, so
+ * the intended fast local tock can replacing them with off-slots
+ * non-idle CSGs.
+ */
+ for_each_set_bit(i, csg_bitmap, num_groups) {
+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
+ struct kbase_queue_group *group = csg_slot->resident_group;
+
+ if (!queue_group_idle_locked(group))
+ group->run_state = KBASE_CSF_GROUP_IDLE;
+ }
+
+ /* Return the number of idle slots for potential replacement */
+ return bitmap_weight(csg_bitmap, num_groups);
+}
+
+static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
struct kbase_queue_group *protm_grp;
int ret;
+ bool skip_scheduling_actions;
bool skip_idle_slots_update;
bool new_protm_top_grp = false;
+ int local_tock_slots = 0;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&scheduler->lock);
- ret = kbase_pm_wait_for_desired_state(kbdev);
+ ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
if (ret) {
- dev_err(kbdev->dev, "Wait for MCU power on failed");
+ dev_err(kbdev->dev,
+ "Wait for MCU power on failed on scheduling tick/tock");
return;
}
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
+ skip_scheduling_actions =
+ !skip_idle_slots_update && kbdev->protected_mode;
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- /* Skip updating on-slot idle CSGs if GPU is in protected mode. */
- if (!skip_idle_slots_update)
+ /* Skip scheduling actions as GPU reset hasn't been performed yet to
+ * rectify the anomaly that happened when pmode exit interrupt wasn't
+ * received before the termination of group running in pmode.
+ */
+ if (unlikely(skip_scheduling_actions)) {
+ dev_info(kbdev->dev,
+ "Scheduling actions skipped due to anomaly in pmode");
+ return;
+ }
+
+ if (!skip_idle_slots_update) {
+ /* Updating on-slot idle CSGs when not in protected mode. */
scheduler_handle_idle_slots(kbdev);
+ /* Determine whether the condition is met for keeping the
+ * Last-Recent-Use. If true, skipping the remaining action
+ * steps and thus extending the previous tick's arrangement,
+ * in particular, no alterations to on-slot CSGs.
+ */
+ if (keep_lru_on_slots(kbdev))
+ return;
+ }
+
+ if (is_tick)
+ scheduler_rotate(kbdev);
+
+redo_local_tock:
scheduler_prepare(kbdev);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
@@ -3866,6 +4419,21 @@ static void schedule_actions(struct kbase_device *kbdev)
if (new_protm_top_grp) {
scheduler_group_check_protm_enter(kbdev,
scheduler->top_grp);
+ } else if (!local_tock_slots &&
+ atomic_read(&scheduler->non_idle_offslot_grps)) {
+ /* If during the scheduling action, we have off-slot
+ * non-idle CSGs in waiting, if it happens to have
+ * some new idle slots emerging during the committed
+ * action steps, trigger a one-off fast local tock.
+ */
+ local_tock_slots = prepare_fast_local_tock(kbdev);
+
+ if (local_tock_slots) {
+ dev_dbg(kbdev->dev,
+ "In-cycle %d idle slots available\n",
+ local_tock_slots);
+ goto redo_local_tock;
+ }
}
return;
@@ -3875,13 +4443,66 @@ static void schedule_actions(struct kbase_device *kbdev)
return;
}
+/**
+ * can_skip_scheduling() - Check if the scheduling actions can be skipped.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called on a scheduling tick or tock to determine if the
+ * scheduling actions can be skipped.
+ * If Scheduler is in sleeping state and exit from the sleep state is allowed
+ * then activation of MCU will be triggered. The tick or tock work item could
+ * have been in flight when the state of Scheduler was changed to sleeping.
+ *
+ * Return: true if the scheduling actions can be skipped.
+ */
+static bool can_skip_scheduling(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (scheduler->state == SCHED_SUSPENDED)
+ return true;
+
+#ifdef KBASE_PM_RUNTIME
+ if (scheduler->state == SCHED_SLEEPING) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (kbdev->pm.backend.exit_gpu_sleep_mode) {
+ int ret = scheduler_pm_active_after_sleep(kbdev, flags);
+ /* hwaccess_lock is released in the previous function
+ * call.
+ */
+ if (!ret) {
+ scheduler->state = SCHED_INACTIVE;
+ return false;
+ }
+
+ dev_info(kbdev->dev,
+ "Skip scheduling due to system suspend");
+ return true;
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return true;
+ }
+#endif
+
+ return false;
+}
+
static void schedule_on_tock(struct work_struct *work)
{
struct kbase_device *kbdev = container_of(work, struct kbase_device,
csf.scheduler.tock_work.work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ int err;
- int err = kbase_reset_gpu_try_prevent(kbdev);
+ /* Tock work item is serviced */
+ scheduler->tock_pending_request = false;
+
+ err = kbase_reset_gpu_try_prevent(kbdev);
/* Regardless of whether reset failed or is currently happening, exit
* early
*/
@@ -3889,7 +4510,7 @@ static void schedule_on_tock(struct work_struct *work)
return;
mutex_lock(&scheduler->lock);
- if (scheduler->state == SCHED_SUSPENDED)
+ if (can_skip_scheduling(kbdev))
goto exit_no_schedule_unlock;
WARN_ON(!(scheduler->state == SCHED_INACTIVE));
@@ -3897,15 +4518,14 @@ static void schedule_on_tock(struct work_struct *work)
/* Undertaking schedule action steps */
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
- schedule_actions(kbdev);
+ schedule_actions(kbdev, false);
- /* Record time information */
+ /* Record time information on a non-skipped tock */
scheduler->last_schedule = jiffies;
- /* Tock is serviced */
- scheduler->tock_pending_request = false;
-
scheduler->state = SCHED_INACTIVE;
+ if (!scheduler->total_runnable_grps)
+ queue_work(system_wq, &scheduler->gpu_idle_work);
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
@@ -3936,17 +4556,15 @@ static void schedule_on_tick(struct work_struct *work)
mutex_lock(&scheduler->lock);
WARN_ON(scheduler->tick_timer_active);
- if (scheduler->state == SCHED_SUSPENDED)
+ if (can_skip_scheduling(kbdev))
goto exit_no_schedule_unlock;
scheduler->state = SCHED_BUSY;
- /* Do scheduling stuff */
- scheduler_rotate(kbdev);
/* Undertaking schedule action steps */
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
scheduler->total_runnable_grps);
- schedule_actions(kbdev);
+ schedule_actions(kbdev, true);
/* Record time information */
scheduler->last_schedule = jiffies;
@@ -3958,7 +4576,8 @@ static void schedule_on_tick(struct work_struct *work)
dev_dbg(kbdev->dev,
"scheduling for next tick, num_runnable_groups:%u\n",
scheduler->total_runnable_grps);
- }
+ } else if (!scheduler->total_runnable_grps)
+ queue_work(system_wq, &scheduler->gpu_idle_work);
scheduler->state = SCHED_INACTIVE;
mutex_unlock(&scheduler->lock);
@@ -4024,8 +4643,11 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev,
}
}
} else {
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
+ kbase_backend_get_cycle_cnt(kbdev),
num_groups, slot_mask_local);
+
+
err = -ETIMEDOUT;
}
}
@@ -4069,7 +4691,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
ret = suspend_active_queue_groups(kbdev, slot_mask);
if (ret) {
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
+ dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
kbdev->csf.global_iface.group_num, slot_mask);
}
@@ -4088,7 +4710,8 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
kbdev->reset_timeout_ms);
if (ret2) {
- dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
+ kbase_backend_get_cycle_cnt(kbdev));
if (!ret)
ret = ret2;
}
@@ -4125,7 +4748,8 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u32 const num_groups = kbdev->csf.global_iface.group_num;
struct kbase_queue_group *protm_grp;
- bool suspend_on_slot_groups;
+ bool suspend_on_slot_groups = true;
+ bool pmode_active;
unsigned long flags;
u32 csg_nr;
@@ -4133,20 +4757,51 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
+ pmode_active = kbdev->protected_mode;
+
+ if (likely(!protm_grp && !pmode_active)) {
+ /* Case 1: GPU is not in protected mode or it successfully
+ * exited protected mode. All on-slot groups can be suspended in
+ * the regular way before reset.
+ */
+ suspend_on_slot_groups = true;
+ } else if (protm_grp && pmode_active) {
+ /* Case 2: GPU went successfully into protected mode and hasn't
+ * exited from it yet and the protected mode group is still
+ * active. If there was no fault for the protected mode group
+ * then it can be suspended in the regular way before reset.
+ * The other normal mode on-slot groups were already implicitly
+ * suspended on entry to protected mode so they can be marked as
+ * suspended right away.
+ */
+ suspend_on_slot_groups = !protm_grp->faulted;
+ } else if (!protm_grp && pmode_active) {
+ /* Case 3: GPU went successfully into protected mode and hasn't
+ * exited from it yet but the protected mode group got deleted.
+ * This would have happened if the FW got stuck during protected
+ * mode for some reason (like GPU page fault or some internal
+ * error). In normal cases FW is expected to send the pmode exit
+ * interrupt before it handles the CSG termination request.
+ * The other normal mode on-slot groups would already have been
+ * implicitly suspended on entry to protected mode so they can be
+ * marked as suspended right away.
+ */
+ suspend_on_slot_groups = false;
+ } else if (protm_grp && !pmode_active) {
+ /* Case 4: GPU couldn't successfully enter protected mode, i.e.
+ * PROTM_ENTER request had timed out.
+ * All the on-slot groups need to be suspended in the regular
+ * way before reset.
+ */
+ suspend_on_slot_groups = true;
+ }
- /* If GPU wasn't in protected mode or had exited it before the GPU reset
- * then all the on-slot groups can be suspended in the regular way by
- * sending CSG SUSPEND requests to FW.
- * If there wasn't a fault for protected mode group, then it would
- * also need to be suspended in the regular way before the reset.
- */
- suspend_on_slot_groups = !(protm_grp && protm_grp->faulted);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- if (!protm_grp)
+ if (likely(!pmode_active))
goto unlock;
- /* GPU is in protected mode, so all the on-slot groups barring the
+ /* GPU hasn't exited protected mode, so all the on-slot groups barring
* the protected mode group can be marked as suspended right away.
*/
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
@@ -4174,19 +4829,25 @@ unlock:
return suspend_on_slot_groups;
}
+static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
+{
+ cancel_delayed_work_sync(&scheduler->tock_work);
+ scheduler->tock_pending_request = false;
+}
+
static void scheduler_inner_reset(struct kbase_device *kbdev)
{
u32 const num_groups = kbdev->csf.global_iface.group_num;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
- WARN_ON(csgs_active(kbdev));
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
/* Cancel any potential queued delayed work(s) */
cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
cancel_tick_timer(kbdev);
cancel_work_sync(&scheduler->tick_work);
- cancel_delayed_work_sync(&scheduler->tock_work);
+ cancel_tock_work(scheduler);
cancel_delayed_work_sync(&scheduler->ping_work);
mutex_lock(&scheduler->lock);
@@ -4292,10 +4953,11 @@ static void firmware_aliveness_monitor(struct work_struct *work)
}
#endif
- if (kbdev->csf.scheduler.state == SCHED_SUSPENDED)
+ if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
+ kbdev->csf.scheduler.state == SCHED_SLEEPING)
goto exit;
- if (get_nr_active_csgs(kbdev) != 1)
+ if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
goto exit;
if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
@@ -4307,7 +4969,7 @@ static void firmware_aliveness_monitor(struct work_struct *work)
goto exit;
}
- kbase_pm_wait_for_desired_state(kbdev);
+ kbase_csf_scheduler_wait_mcu_active(kbdev);
err = kbase_csf_firmware_ping_wait(kbdev);
@@ -4318,7 +4980,7 @@ static void firmware_aliveness_monitor(struct work_struct *work)
if (kbase_prepare_to_reset_gpu(
kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
- } else if (get_nr_active_csgs(kbdev) == 1) {
+ } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
queue_delayed_work(system_long_wq,
&kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
@@ -4337,13 +4999,42 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
struct kbase_context *const kctx = group->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ bool on_slot;
int err = 0;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&kctx->csf.lock);
mutex_lock(&scheduler->lock);
- if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
+
+#ifdef KBASE_PM_RUNTIME
+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
+ if (wait_for_scheduler_to_exit_sleep(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
+
+ scheduler_wakeup(kbdev, true);
+
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev))
+ dev_warn(
+ kbdev->dev,
+ "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
+ }
+
+ /* Check the group state again as scheduler lock would have been
+ * released when waiting for the exit from SLEEPING state.
+ */
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
+ }
+#endif
+ if (on_slot) {
DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
@@ -4353,8 +5044,9 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
err = wait_csg_slots_suspend(kbdev, slot_mask,
kbdev->csf.fw_timeout_ms);
if (err) {
- dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d",
- group->handle, group->csg_nr);
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
+ group->handle, group->csg_nr);
goto exit;
}
}
@@ -4547,20 +5239,22 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
}
/**
- * check_sync_update_for_idle_group_protm() - Check the sync wait condition
- * for all the queues bound to
- * the given group.
+ * check_sync_update_for_on_slot_group() - Check the sync wait condition
+ * for all the queues bound to
+ * the given on-slot group.
*
- * @group: Pointer to the group that requires evaluation.
+ * @group: Pointer to the on-slot group that requires evaluation.
*
* This function is called if the GPU is in protected mode and there are on
- * slot idle groups with higher priority than the active protected mode group.
+ * slot idle groups with higher priority than the active protected mode group
+ * or this function is called when CQS object is signaled whilst GPU is in
+ * sleep state.
* This function will evaluate the sync condition, if any, of all the queues
* bound to the given group.
*
* Return true if the sync condition of at least one queue has been satisfied.
*/
-static bool check_sync_update_for_idle_group_protm(
+static bool check_sync_update_for_on_slot_group(
struct kbase_queue_group *group)
{
struct kbase_device *const kbdev = group->kctx->kbdev;
@@ -4680,7 +5374,7 @@ static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
* has a higher priority than the protm group, then we
* need to exit protected mode.
*/
- if (check_sync_update_for_idle_group_protm(group))
+ if (check_sync_update_for_on_slot_group(group))
exit_protm = true;
}
}
@@ -4688,6 +5382,28 @@ static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
return exit_protm;
}
+static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u32 const num_groups = kbdev->csf.global_iface.group_num;
+ u32 csg_nr;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
+ struct kbase_queue_group *const group =
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+
+ if (!group)
+ continue;
+
+ if (check_sync_update_for_on_slot_group(group)) {
+ scheduler_wakeup(kbdev, true);
+ return;
+ }
+ }
+}
+
/**
* check_group_sync_update_worker() - Check the sync wait condition for all the
* blocked queue groups
@@ -4709,6 +5425,7 @@ static void check_group_sync_update_worker(struct work_struct *work)
struct kbase_context, csf.sched.sync_update_work);
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ bool sync_updated = false;
mutex_lock(&scheduler->lock);
@@ -4719,6 +5436,7 @@ static void check_group_sync_update_worker(struct work_struct *work)
list_for_each_entry_safe(group, temp,
&kctx->csf.sched.idle_wait_groups, link) {
if (group_sync_updated(group)) {
+ sync_updated = true;
/* Move this group back in to the runnable
* groups list of the context.
*/
@@ -4730,8 +5448,17 @@ static void check_group_sync_update_worker(struct work_struct *work)
WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
}
- if (check_sync_update_for_idle_groups_protm(kbdev))
+ if (check_sync_update_for_idle_groups_protm(kbdev)) {
scheduler_force_protm_exit(kbdev);
+ sync_updated = true;
+ }
+
+ /* If scheduler is in sleep or suspended state, re-activate it
+ * to serve on-slot CSGs blocked on CQS which has been signaled.
+ */
+ if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
+ check_sync_update_in_sleep_mode(kbdev);
+
KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
mutex_unlock(&scheduler->lock);
@@ -4829,7 +5556,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
- BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS);
mutex_init(&scheduler->lock);
spin_lock_init(&scheduler->interrupt_lock);
@@ -4869,16 +5595,22 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
{
if (kbdev->csf.scheduler.csg_slots) {
WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
- WARN_ON(csgs_active(kbdev));
+ /* The unload of Driver can take place only when all contexts have
+ * been terminated. The groups that were not terminated by the User
+ * are terminated on context termination. So no CSGs are expected
+ * to be active at the time of Driver unload.
+ */
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
flush_work(&kbdev->csf.scheduler.gpu_idle_work);
mutex_lock(&kbdev->csf.scheduler.lock);
+
if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED))
scheduler_suspend(kbdev);
mutex_unlock(&kbdev->csf.scheduler.lock);
cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
cancel_tick_timer(kbdev);
cancel_work_sync(&kbdev->csf.scheduler.tick_work);
- cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work);
+ cancel_tock_work(&kbdev->csf.scheduler);
mutex_destroy(&kbdev->csf.scheduler.lock);
kfree(kbdev->csf.scheduler.csg_slots);
kbdev->csf.scheduler.csg_slots = NULL;
@@ -4911,7 +5643,8 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
return;
WARN_ON((scheduler->state != SCHED_INACTIVE) &&
- (scheduler->state != SCHED_SUSPENDED));
+ (scheduler->state != SCHED_SUSPENDED) &&
+ (scheduler->state != SCHED_SLEEPING));
if (scheduler->total_runnable_grps > 0) {
enqueue_tick_work(kbdev);
@@ -4953,6 +5686,7 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
scheduler->timer_enabled = false;
cancel_tick_timer(kbdev);
cancel_delayed_work(&scheduler->tock_work);
+ scheduler->tock_pending_request = false;
mutex_unlock(&scheduler->lock);
/* The non-sync version to cancel the normal work item is not
* available, so need to drop the lock before cancellation.
@@ -4990,7 +5724,7 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
/* Cancel any potential queued delayed work(s) */
cancel_work_sync(&scheduler->tick_work);
- cancel_delayed_work_sync(&scheduler->tock_work);
+ cancel_tock_work(scheduler);
if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
dev_warn(kbdev->dev,
@@ -5002,6 +5736,15 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
disable_gpu_idle_fw_timer(kbdev);
+#ifdef KBASE_PM_RUNTIME
+ /* If scheduler is in sleeping state, then MCU needs to be activated
+ * to suspend CSGs.
+ */
+ if (scheduler->state == SCHED_SLEEPING) {
+ dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
+ force_scheduler_to_exit_sleep(kbdev);
+ }
+#endif
if (scheduler->state != SCHED_SUSPENDED) {
suspend_active_groups_on_powerdown(kbdev, true);
dev_info(kbdev->dev, "Scheduler PM suspend");
@@ -5019,9 +5762,8 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
mutex_lock(&scheduler->lock);
-
- if (scheduler->total_runnable_grps > 0) {
- WARN_ON(scheduler->state != SCHED_SUSPENDED);
+ if ((scheduler->total_runnable_grps > 0) &&
+ (scheduler->state == SCHED_SUSPENDED)) {
dev_info(kbdev->dev, "Scheduler PM resume");
scheduler_wakeup(kbdev, true);
}
@@ -5031,33 +5773,141 @@ KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
{
+ /* Here the lock is taken to synchronize against the runtime suspend
+ * callback function, which may need to wake up the MCU for suspending
+ * the CSGs before powering down the GPU.
+ */
+ mutex_lock(&kbdev->csf.scheduler.lock);
+ scheduler_pm_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
+
+void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
+{
+ /* Here the lock is taken just to maintain symmetry with
+ * kbase_csf_scheduler_pm_active().
+ */
+ mutex_lock(&kbdev->csf.scheduler.lock);
+ scheduler_pm_idle(kbdev);
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
+
+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
- u32 prev_count;
+ int err;
+ kbase_pm_lock(kbdev);
+ WARN_ON(!kbdev->pm.active_count);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- prev_count = kbdev->csf.scheduler.pm_active_count++;
+ WARN_ON(!scheduler->pm_active_count);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_pm_unlock(kbdev);
- /* On 0 => 1, make a pm_ctx_active request */
- if (!prev_count)
- kbase_pm_context_active(kbdev);
- else
- WARN_ON(prev_count == U32_MAX);
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
+
+ err = kbase_pm_wait_for_desired_state(kbdev);
+ if (!err) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+
+ return err;
}
-KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
-void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
+#ifdef KBASE_PM_RUNTIME
+int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
- u32 prev_count;
+ int ret;
+
+ dev_dbg(kbdev->dev, "Handling runtime suspend");
+
+ kbase_reset_gpu_assert_prevented(kbdev);
+ lockdep_assert_held(&scheduler->lock);
+ WARN_ON(scheduler->pm_active_count);
+
+ if (scheduler->state == SCHED_SUSPENDED) {
+ WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
+ return 0;
+ }
+
+ ret = suspend_active_groups_on_powerdown(kbdev, false);
+
+ if (ret) {
+ dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
+ atomic_read(&scheduler->non_idle_offslot_grps));
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_csf_scheduler_invoke_tick(kbdev);
+ return ret;
+ }
+
+ scheduler->state = SCHED_SUSPENDED;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- prev_count = kbdev->csf.scheduler.pm_active_count--;
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- if (prev_count == 1)
- kbase_pm_context_idle(kbdev);
- else
- WARN_ON(prev_count == 0);
+ wake_up_all(&kbdev->csf.event_wait);
+ return 0;
+}
+
+void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
+{
+ u32 csg_nr;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
+
+ for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
+ struct kbase_csf_cmd_stream_group_info *ginfo =
+ &kbdev->csf.global_iface.groups[csg_nr];
+ bool csg_idle;
+
+ if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
+ continue;
+
+ csg_idle =
+ kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+ CSG_STATUS_STATE_IDLE_MASK;
+ if (!csg_idle) {
+ dev_dbg(kbdev->dev,
+ "Re-activate Scheduler after MCU sleep");
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ kbase_csf_scheduler_invoke_tick(kbdev);
+ break;
+ }
+ }
+}
+
+void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ mutex_lock(&scheduler->lock);
+ if (kbase_pm_gpu_sleep_allowed(kbdev) &&
+ (scheduler->state == SCHED_INACTIVE))
+ scheduler_sleep_on_idle(kbdev);
+ mutex_unlock(&scheduler->lock);
+}
+#endif
+
+void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ mutex_lock(&scheduler->lock);
+ scheduler_wakeup(kbdev, true);
+ mutex_unlock(&scheduler->lock);
}
-KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 428ecbe..73ebb66 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -374,7 +374,11 @@ static inline bool kbase_csf_scheduler_protected_mode_in_use(
* kbase_csf_scheduler_pm_active - Perform scheduler power active operation
*
* Note: This function will increase the scheduler's internal pm_active_count
- * value, ensuring that both GPU and MCU are powered for access.
+ * value, ensuring that both GPU and MCU are powered for access. The MCU may
+ * not have actually become active when this function returns, so need to
+ * call kbase_csf_scheduler_wait_mcu_active() for that.
+ *
+ * This function should not be called with global scheduler lock held.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
@@ -384,13 +388,27 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev);
* kbase_csf_scheduler_pm_idle - Perform the scheduler power idle operation
*
* Note: This function will decrease the scheduler's internal pm_active_count
- * value. On reaching 0, the MCU and GPU could be powered off.
+ * value. On reaching 0, the MCU and GPU could be powered off. This function
+ * should not be called with global scheduler lock held.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
/**
+ * kbase_csf_scheduler_wait_mcu_active - Wait for the MCU to actually become active
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function will wait for the MCU to actually become active. It is supposed
+ * to be called after calling kbase_csf_scheduler_pm_active(). It is needed as
+ * kbase_csf_scheduler_pm_active() may not make the MCU active right away.
+ *
+ * Return: 0 if the MCU was successfully activated otherwise an error code.
+ */
+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev);
+
+/**
* kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
@@ -472,6 +490,26 @@ static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev)
}
/**
+ * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function will queue the scheduling tick work item for immediate
+ * execution if tick timer is not active. This can be called from interrupt
+ * context to resume the scheduling after GPU was put to sleep.
+ */
+static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ if (!scheduler->tick_timer_active)
+ queue_work(scheduler->wq, &scheduler->tick_work);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
+/**
* kbase_csf_scheduler_queue_has_trace() - report whether the queue has been
* configured to operate with the
* cs_trace feature.
@@ -491,4 +529,97 @@ static inline bool kbase_csf_scheduler_queue_has_trace(struct kbase_queue *queue
return (queue->trace_buffer_size && queue->trace_buffer_base);
}
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_csf_scheduler_reval_idleness_post_sleep() - Check GPU's idleness after
+ * putting MCU to sleep state
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function re-evaluates the idleness of on-slot queue groups after MCU
+ * was put to the sleep state and invokes the scheduling tick if any of the
+ * on-slot queue group became non-idle.
+ * CSG_OUTPUT_BLOCK.CSG_STATUS_STATE.IDLE bit is checked to determine the
+ * idleness which is updated by MCU firmware on handling of the sleep request.
+ *
+ * This function is needed to detect if more work was flushed in the window
+ * between the GPU idle notification and the enabling of Doorbell mirror
+ * interrupt (from MCU state machine). Once Doorbell mirror interrupt is
+ * enabled, Host can receive the notification on User doorbell rings.
+ */
+void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_scheduler_handle_runtime_suspend() - Handle runtime suspend by
+ * suspending CSGs.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called from the runtime suspend callback function for
+ * suspending all the on-slot queue groups. If any of the group is found to
+ * be non-idle after the completion of CSG suspend operation or the CSG
+ * suspend operation times out, then the scheduling tick is invoked and an
+ * error is returned so that the GPU power down can be aborted.
+ *
+ * Return: 0 if all the CSGs were suspended, otherwise an error code.
+ */
+int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function calculates the number of CSG slots that have a queue group
+ * resident on them.
+ *
+ * Note: This function should not be used if the interrupt_lock is held. Use
+ * kbase_csf_scheduler_get_nr_active_csgs_locked() instead.
+ *
+ * Return: number of active CSGs.
+ */
+u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_scheduler_get_nr_active_csgs_locked() - Get the number of active
+ * CSGs
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function calculates the number of CSG slots that have a queue group
+ * resident on them.
+ *
+ * Note: This function should be called with interrupt_lock held.
+ *
+ * Return: number of active CSGs.
+ */
+u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_scheduler_force_wakeup() - Forcefully resume the scheduling of CSGs
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called to forcefully resume the scheduling of CSGs, even
+ * when there wasn't any work submitted for them.
+ * This function is only used for testing purpose.
+ */
+void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev);
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_csf_scheduler_force_sleep() - Forcefully put the Scheduler to sleeping
+ * state.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called to forcefully put the Scheduler to sleeping state
+ * and trigger the sleep of MCU. If the CSGs are not idle, then the Scheduler
+ * would get reactivated again immediately.
+ * This function is only used for testing purpose.
+ */
+void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev);
+#endif
+
#endif /* _KBASE_CSF_SCHEDULER_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 8ecf235..06a7824 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -66,8 +66,6 @@ static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr)
static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
struct kbase_csf_tiler_heap *const heap)
{
- lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
-
if (list_empty(&heap->chunks_list))
return NULL;
@@ -176,7 +174,7 @@ static int init_chunk(struct kbase_csf_tiler_heap *const heap,
* Return: 0 if successful or a negative error code on failure.
*/
static int create_chunk(struct kbase_csf_tiler_heap *const heap,
- bool link_with_prev)
+ bool link_with_prev)
{
int err = 0;
struct kbase_context *const kctx = heap->kctx;
@@ -186,14 +184,17 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
BASE_MEM_COHERENT_LOCAL;
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
- flags |= base_mem_group_id_set(kctx->jit_group_id);
+ /* Calls to this function are inherently synchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
+
+ flags |= kbase_mem_group_id_set(kctx->jit_group_id);
#if defined(CONFIG_MALI_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP)
flags |= BASE_MEM_PROT_CPU_RD;
#endif
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
-
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
if (unlikely(!chunk)) {
dev_err(kctx->kbdev->dev,
@@ -203,8 +204,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
/* Allocate GPU memory for the new chunk. */
INIT_LIST_HEAD(&chunk->link);
- chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0,
- &flags, &chunk->gpu_va);
+ chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+ &chunk->gpu_va, mmu_sync_info);
if (unlikely(!chunk->region)) {
dev_err(kctx->kbdev->dev,
@@ -251,8 +252,6 @@ static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
{
struct kbase_context *const kctx = heap->kctx;
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
-
kbase_gpu_vm_lock(kctx);
chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
kbase_mem_free_region(kctx, chunk->region);
@@ -273,9 +272,6 @@ static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
{
struct list_head *entry = NULL, *tmp = NULL;
- struct kbase_context *const kctx = heap->kctx;
-
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
list_for_each_safe(entry, tmp, &heap->chunks_list) {
struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
@@ -429,6 +425,9 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
"Creating a tiler heap with %u chunks (limit: %u) of size %u\n",
initial_chunks, max_chunks, chunk_size);
+ if (!kbase_mem_allow_alloc(kctx))
+ return -EINVAL;
+
if (chunk_size == 0)
return -EINVAL;
@@ -459,11 +458,9 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
- mutex_lock(&kctx->csf.tiler_heaps.lock);
-
if (unlikely(!heap->gpu_va)) {
- dev_err(kctx->kbdev->dev,
- "Failed to allocate a tiler heap context\n");
+ dev_dbg(kctx->kbdev->dev,
+ "Failed to allocate a tiler heap context");
err = -ENOMEM;
} else {
err = create_initial_chunks(heap, initial_chunks);
@@ -480,13 +477,14 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
list_first_entry(&heap->chunks_list,
struct kbase_csf_tiler_heap_chunk, link);
+ *heap_gpu_va = heap->gpu_va;
+ *first_chunk_va = first_chunk->gpu_va;
+
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
kctx->csf.tiler_heaps.nr_of_heaps++;
heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
list_add(&heap->link, &kctx->csf.tiler_heaps.list);
- *heap_gpu_va = heap->gpu_va;
- *first_chunk_va = first_chunk->gpu_va;
-
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
kctx->kbdev, kctx->id, heap->heap_id,
PFN_UP(heap->chunk_size * heap->max_chunks),
@@ -496,10 +494,9 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
heap->gpu_va);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
}
- mutex_unlock(&kctx->csf.tiler_heaps.lock);
-
return err;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.c b/mali_kbase/csf/mali_kbase_csf_timeout.c
index 4d93fe5..f52cbab 100644
--- a/mali_kbase/csf/mali_kbase_csf_timeout.c
+++ b/mali_kbase/csf/mali_kbase_csf_timeout.c
@@ -100,7 +100,7 @@ static ssize_t progress_timeout_store(struct device * const dev,
if (!err) {
kbase_csf_scheduler_pm_active(kbdev);
- err = kbase_pm_wait_for_desired_state(kbdev);
+ err = kbase_csf_scheduler_wait_mcu_active(kbdev);
if (!err)
err = kbase_csf_firmware_set_timeout(kbdev, timeout);
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 1824c2d..563faec 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -171,13 +171,12 @@ static int kbase_ts_converter_init(
*
* Return: The CPU timestamp.
*/
-static void kbase_ts_converter_convert(
- const struct kbase_ts_converter *self,
- u64 *gpu_ts)
+void kbase_ts_converter_convert(const struct kbase_ts_converter *self,
+ u64 *gpu_ts)
{
u64 old_gpu_ts = *gpu_ts;
- *gpu_ts = div64_u64(old_gpu_ts * self->multiplier,
- self->divisor) + self->offset;
+ *gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) +
+ self->offset;
}
/**
@@ -256,6 +255,7 @@ static void tl_reader_reset(struct kbase_csf_tl_reader *self)
self->tl_header.btc = 0;
}
+
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
int ret = 0;
@@ -280,6 +280,7 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
return -EBUSY;
}
+
/* Copying the whole buffer in a single shot. We assume
* that the buffer will not contain partially written messages.
*/
@@ -330,9 +331,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
struct kbase_csffw_tl_message *msg =
(struct kbase_csffw_tl_message *) csffw_data_it;
- kbase_ts_converter_convert(
- &self->ts_converter,
- &msg->timestamp);
+ kbase_ts_converter_convert(&self->ts_converter,
+ &msg->timestamp);
}
/* Copy the message out to the tl_stream. */
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
index 1b0fcd7..891a8f3 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
@@ -43,9 +43,9 @@ struct kbase_device;
* struct kbase_ts_converter -
* System timestamp to CPU timestamp converter state.
*
- * @multiplier: Numerator of the converter's fraction.
- * @divisor: Denominator of the converter's fraction.
- * @offset: Converter's offset term.
+ * @multiplier: Numerator of the converter's fraction.
+ * @divisor: Denominator of the converter's fraction.
+ * @offset: Converter's offset term.
*
* According to Generic timer spec, system timer:
* - Increments at a fixed frequency
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_jm.h
index f419f70..6ba98b7 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_jm.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_jm.h
@@ -56,6 +56,14 @@ int dummy_array[] = {
*/
/* info_val==exit code; gpu_addr==chain gpuaddr */
KBASE_KTRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+ /* gpu_addr==JS_HEAD read
+ * info_val==event code
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(JM_RETURN_ATOM_TO_JS),
+ /* gpu_addr==JS_HEAD read
+ * info_val==event code
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(JM_MARK_FOR_RETURN_TO_JS),
/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of
* affinity
*/
@@ -120,6 +128,13 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(JS_ADD_JOB),
/* gpu_addr==last value written/would be written to JS_HEAD */
KBASE_KTRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+ /* gpu_addr==value to write into JS_HEAD
+ * info_val==priority of atom as a KBASE_JS_ATOM_SCHED_PRIO_<...> value
+ * (0 highest)
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_PULL_JOB),
+ /* gpu_addr==value that would be written to JS_HEAD if run again */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_UNPULL_JOB),
KBASE_KTRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
/* gpu_addr==value to write into JS_HEAD */
KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
@@ -146,6 +161,25 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
/* info_val == the ctx attribute now off runpool */
KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_RETURN_WORKER),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_RETURN_WORKER_END),
+ /* info_val==priority level blocked (0 highest) */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_BLOCKED),
+ /* info_val==priority level unblocked (0 highest)
+ * note that the priority level may still be blocked on higher levels
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_UNBLOCKED),
+ /* gpu_addr==value to write into JS_HEAD
+ * info_val==priority level unblocked - priorities at this and higher
+ * are unblocked (0 highest)
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED),
+ /* gpu_addr==value to write into JS_HEAD
+ * info_val==priority level blocked (0 highest)
+ */
+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_IS_BLOCKED),
/*
* Scheduler Policy events
*/
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_jm.h
index c01f930..efa8ab0 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_jm.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_jm.h
@@ -45,9 +45,12 @@
*
* ftrace backend now outputs kctx field (as %d_%u format).
*
+ * 2.2:
+ * Add tracing codes for pulling, unpulling, and returns atoms to JS for
+ * diagnosing soft-stop path and preemption problems
*/
#define KBASE_KTRACE_VERSION_MAJOR 2
-#define KBASE_KTRACE_VERSION_MINOR 1
+#define KBASE_KTRACE_VERSION_MINOR 2
#endif /* KBASE_KTRACE_TARGET_RBUF */
/*
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c
index fed9c1f..05d1677 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.c
@@ -71,10 +71,11 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg,
}
void kbasep_ktrace_add_jm(struct kbase_device *kbdev,
- enum kbase_ktrace_code code, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, u64 gpu_addr,
- kbase_ktrace_flag_t flags, int refcount, int jobslot,
- u64 info_val)
+ enum kbase_ktrace_code code,
+ struct kbase_context *kctx,
+ const struct kbase_jd_atom *katom, u64 gpu_addr,
+ kbase_ktrace_flag_t flags, int refcount, int jobslot,
+ u64 info_val)
{
unsigned long irqflags;
struct kbase_ktrace_msg *trace_msg;
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.h
index 8b09d05..ffae8d4 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_jm.h
@@ -41,10 +41,11 @@
* PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead.
*/
void kbasep_ktrace_add_jm(struct kbase_device *kbdev,
- enum kbase_ktrace_code code, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, u64 gpu_addr,
- kbase_ktrace_flag_t flags, int refcount, int jobslot,
- u64 info_val);
+ enum kbase_ktrace_code code,
+ struct kbase_context *kctx,
+ const struct kbase_jd_atom *katom, u64 gpu_addr,
+ kbase_ktrace_flag_t flags, int refcount, int jobslot,
+ u64 info_val);
#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \
refcount, jobslot, info_val) \
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_jm.h
index 2e88e69..8fa4e2a 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_jm.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_jm.h
@@ -50,6 +50,8 @@ DECLARE_EVENT_CLASS(mali_jm_slot_template,
DEFINE_EVENT(mali_jm_slot_template, mali_##name, \
TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \
TP_ARGS(kctx, jobslot, info_val))
+DEFINE_MALI_JM_SLOT_EVENT(JM_RETURN_ATOM_TO_JS);
+DEFINE_MALI_JM_SLOT_EVENT(JM_MARK_FOR_RETURN_TO_JS);
DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT);
DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE);
DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD);
@@ -68,6 +70,7 @@ DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_CURRENT);
DEFINE_MALI_JM_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_JM_SLOT_EVENT(JS_PULL_JOB);
DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
@@ -76,6 +79,10 @@ DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_BLOCKED);
+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_UNBLOCKED);
+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED);
+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_IS_BLOCKED);
#undef DEFINE_MALI_JM_SLOT_EVENT
DECLARE_EVENT_CLASS(mali_jm_refcount_template,
@@ -152,10 +159,13 @@ DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_SCHEDULED);
DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_DONE);
DEFINE_MALI_JM_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
DEFINE_MALI_JM_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_JM_ADD_EVENT(JS_UNPULL_JOB);
DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_JM_ADD_EVENT(JS_RETURN_WORKER);
+DEFINE_MALI_JM_ADD_EVENT(JS_RETURN_WORKER_END);
DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_END);
DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_START);
DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h b/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
index 3309834..1c6b4cd 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
@@ -138,6 +138,10 @@ int dummy_array[] = {
/* info_val == policy number */
KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+ KBASE_KTRACE_CODE_MAKE_CODE(PM_POWEROFF_WAIT_WQ),
+ KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK),
+ KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK),
+
/*
* Context Scheduler events
*/
diff --git a/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h b/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
index b56dec4..5fac763 100644
--- a/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
+++ b/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
@@ -95,6 +95,9 @@ DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
DEFINE_MALI_ADD_EVENT(PM_CONTEXT_ACTIVE);
DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE);
DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ);
+DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK);
+DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK);
DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK);
DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 0c5052b..7b37a96 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -37,6 +37,7 @@
#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
#include <csf/mali_kbase_csf_csg_debugfs.h>
#include <mali_kbase_hwcnt_virtualizer.h>
+#include <mali_kbase_kinstr_prfcnt.h>
#include <mali_kbase_vinstr.h>
/**
@@ -51,6 +52,7 @@
static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev)
{
if (kbdev->csf.firmware_inited) {
+ kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx);
kbase_vinstr_term(kbdev->vinstr_ctx);
kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
@@ -266,6 +268,8 @@ static const struct kbase_device_init dev_init[] = {
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
+ { kbase_lowest_gpu_freq_init, NULL,
+ "Lowest freq initialization failed" },
{ kbase_device_hwcnt_backend_csf_if_init,
kbase_device_hwcnt_backend_csf_if_term,
"GPU hwcnt backend CSF interface creation failed" },
@@ -390,8 +394,19 @@ static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev)
goto vinstr_fail;
}
+ ret = kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt,
+ &kbdev->kinstr_prfcnt_ctx);
+ if (ret) {
+ dev_err(kbdev->dev,
+ "Performance counter instrumentation initialization failed");
+ goto kinstr_prfcnt_fail;
+ }
+
return ret;
+kinstr_prfcnt_fail:
+ kbase_vinstr_term(kbdev->vinstr_ctx);
+
vinstr_fail:
kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
@@ -418,8 +433,6 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->fw_load_lock);
- kbase_pm_context_active(kbdev);
-
err = kbase_csf_firmware_init(kbdev);
if (!err) {
unsigned long flags;
@@ -432,8 +445,6 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
dev_err(kbdev->dev, "Firmware initialization failed");
}
- kbase_pm_context_idle(kbdev);
-
return err;
}
@@ -444,6 +455,8 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
mutex_lock(&kbdev->fw_load_lock);
if (!kbdev->csf.firmware_inited) {
+ kbase_pm_context_active(kbdev);
+
ret = kbase_csf_firmware_deferred_init(kbdev);
if (ret)
goto out;
@@ -455,9 +468,10 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
}
kbase_csf_debugfs_init(kbdev);
+out:
+ kbase_pm_context_idle(kbdev);
}
-out:
mutex_unlock(&kbdev->fw_load_lock);
return ret;
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 8427edb..ae6dc1b 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -80,6 +80,7 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev)
}
} else
kbase_report_gpu_fault(kbdev, status, as_nr, as_valid);
+
}
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
@@ -124,6 +125,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
if (kbase_prepare_to_reset_gpu(
kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
+
+ /* Defer the clearing to the GPU reset sequence */
+ val &= ~GPU_PROTECTED_FAULT;
}
if (val & RESET_COMPLETED)
@@ -132,6 +136,20 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+#ifdef KBASE_PM_RUNTIME
+ if (val & DOORBELL_MIRROR) {
+ unsigned long flags;
+
+ dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+ kbase_pm_disable_db_mirror_interrupt(kbdev);
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ kbase_csf_scheduler_invoke_tick(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+#endif
+
/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
* be called after the IRQ has been cleared. This is because it might
* trigger further power transitions and we don't want to miss the
@@ -160,3 +178,60 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
}
+
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+static bool kbase_is_register_accessible(u32 offset)
+{
+#ifdef CONFIG_MALI_DEBUG
+ if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) ||
+ ((offset >= GPU_CONTROL_MCU_BASE) && (offset < USER_BASE))) {
+ WARN(1, "Invalid register offset 0x%x", offset);
+ return false;
+ }
+#endif
+
+ return true;
+}
+
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
+{
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+ if (!kbase_is_register_accessible(offset))
+ return;
+
+ writel(value, kbdev->reg + offset);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(kbdev->io_history.enabled))
+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+ value, 1);
+#endif /* CONFIG_DEBUG_FS */
+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
+}
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
+{
+ u32 val;
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+ if (!kbase_is_register_accessible(offset))
+ return 0;
+
+ val = readl(kbdev->reg + offset);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(kbdev->io_history.enabled))
+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+ val, 0);
+#endif /* CONFIG_DEBUG_FS */
+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
+
+ return val;
+}
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index c4e6eb8..e8f8953 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -51,6 +51,7 @@ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
address);
if (multiple)
dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+
}
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
@@ -96,3 +97,41 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
}
+
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
+{
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+ writel(value, kbdev->reg + offset);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(kbdev->io_history.enabled))
+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+ value, 1);
+#endif /* CONFIG_DEBUG_FS */
+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
+}
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
+{
+ u32 val;
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+ val = readl(kbdev->reg + offset);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(kbdev->io_history.enabled))
+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+ val, 0);
+#endif /* CONFIG_DEBUG_FS */
+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
+
+ return val;
+}
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 6a6ab60..7288e8e 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -185,6 +185,8 @@ static const struct kbase_device_init dev_init[] = {
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
+ { kbase_lowest_gpu_freq_init, NULL,
+ "Lowest freq initialization failed" },
{ kbase_instr_backend_init, kbase_instr_backend_term,
"Instrumentation backend initialization failed" },
{ kbase_device_hwcnt_backend_jm_init,
@@ -197,6 +199,8 @@ static const struct kbase_device_init dev_init[] = {
"GPU hwcnt virtualizer initialization failed" },
{ kbase_device_vinstr_init, kbase_device_vinstr_term,
"Virtual instrumentation initialization failed" },
+ { kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term,
+ "Performance counter instrumentation initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term,
"Late backend initialization failed" },
#ifdef MALI_KBASE_BUILD
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 0f992c3..518aaf9 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -40,6 +40,7 @@
#include <linux/priority_control_manager.h>
#include <tl/mali_kbase_timeline.h>
+#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_vinstr.h"
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_virtualizer.h"
@@ -49,6 +50,7 @@
#include "backend/gpu/mali_kbase_pm_internal.h"
#include "backend/gpu/mali_kbase_irq_internal.h"
#include "mali_kbase_regs_history_debugfs.h"
+#include "mali_kbase_pbha.h"
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include "arbiter/mali_kbase_arbiter_pm.h"
@@ -273,6 +275,14 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
if (err)
goto dma_set_mask_failed;
+ /* There is no limit for Mali, so set to max. We only do this if dma_parms
+ * is already allocated by the platform.
+ */
+ if (kbdev->dev->dma_parms)
+ err = dma_set_max_seg_size(kbdev->dev, UINT_MAX);
+ if (err)
+ goto dma_set_mask_failed;
+
kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
err = kbase_device_all_as_init(kbdev);
@@ -282,6 +292,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
err = kbase_ktrace_init(kbdev);
if (err)
goto term_as;
+ err = kbase_pbha_read_dtb(kbdev);
+ if (err)
+ goto term_ktrace;
init_waitqueue_head(&kbdev->cache_clean_wait);
@@ -309,6 +322,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
}
return 0;
+term_ktrace:
+ kbase_ktrace_term(kbdev);
term_as:
kbase_device_all_as_term(kbdev);
dma_set_mask_failed:
@@ -395,6 +410,17 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev)
kbase_vinstr_term(kbdev->vinstr_ctx);
}
+int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev)
+{
+ return kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt,
+ &kbdev->kinstr_prfcnt_ctx);
+}
+
+void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev)
+{
+ kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx);
+}
+
int kbase_device_io_history_init(struct kbase_device *kbdev)
{
return kbase_io_history_init(&kbdev->io_history,
@@ -461,6 +487,11 @@ int kbase_device_early_init(struct kbase_device *kbdev)
if (err)
goto fail_runtime_pm;
+ /* This spinlock is initialized before doing the first access to GPU
+ * registers and installing interrupt handlers.
+ */
+ spin_lock_init(&kbdev->hwaccess_lock);
+
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
@@ -470,10 +501,6 @@ int kbase_device_early_init(struct kbase_device *kbdev)
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
- /* This spinlock has to be initialized before installing interrupt
- * handlers that require to hold it to process interrupts.
- */
- spin_lock_init(&kbdev->hwaccess_lock);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbdev->arb.arb_if)
err = kbase_arbiter_pm_install_interrupts(kbdev);
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index e80559a..4c98ae1 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -28,44 +28,6 @@
#include <mmu/mali_kbase_mmu.h>
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
-void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
-{
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
-
- writel(value, kbdev->reg + offset);
-
-#if IS_ENABLED(CONFIG_DEBUG_FS)
- if (unlikely(kbdev->io_history.enabled))
- kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
- value, 1);
-#endif /* CONFIG_DEBUG_FS */
- dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
-}
-
-KBASE_EXPORT_TEST_API(kbase_reg_write);
-
-u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
-{
- u32 val;
-
- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
-
- val = readl(kbdev->reg + offset);
-
-#if IS_ENABLED(CONFIG_DEBUG_FS)
- if (unlikely(kbdev->io_history.enabled))
- kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
- val, 0);
-#endif /* CONFIG_DEBUG_FS */
- dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
-
- return val;
-}
-
-KBASE_EXPORT_TEST_API(kbase_reg_read);
-
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
u32 val;
@@ -99,7 +61,7 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES);
+ GPU_COMMAND_CACHE_CLN_INV_L2);
kbdev->cache_clean_in_progress = true;
}
@@ -134,7 +96,7 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES);
+ GPU_COMMAND_CACHE_CLN_INV_L2);
} else {
/* Disable interrupt */
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h
index d422407..d4f6875 100644
--- a/mali_kbase/device/mali_kbase_device_internal.h
+++ b/mali_kbase/device/mali_kbase_device_internal.h
@@ -39,6 +39,9 @@ struct kbase_device_init {
int kbase_device_vinstr_init(struct kbase_device *kbdev);
void kbase_device_vinstr_term(struct kbase_device *kbdev);
+int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev);
+void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev);
+
int kbase_device_timeline_init(struct kbase_device *kbdev);
void kbase_device_timeline_term(struct kbase_device *kbdev);
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index f9d4c14..7499729 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -42,15 +42,19 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
case CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT:
e = "FATAL_CS_ENDPOINT_FAULT";
break;
- case CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT:
- e = "FATAL_CS_BUS_FAULT";
- break;
case CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION:
e = "FATAL_CS_INVALID_INSTRUCTION";
break;
case CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW:
e = "FATAL_CS_CALL_STACK_OVERFLOW";
break;
+ /*
+ * CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT and CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT share the same error code
+ * Type of CS_BUS_FAULT will be differentiated by CSF exception handler
+ */
+ case CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT:
+ e = "CS_BUS_FAULT";
+ break;
/* Shader exceptions */
case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC:
e = "INSTR_INVALID_PC";
@@ -61,6 +65,10 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
case CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT:
e = "INSTR_BARRIER_FAULT";
break;
+ /* Iterator exceptions */
+ case CS_FAULT_EXCEPTION_TYPE_KABOOM:
+ e = "KABOOM";
+ break;
/* Misc exceptions */
case CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT:
e = "DATA_INVALID_FAULT";
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
index 4737b0e..e240117 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
@@ -44,8 +44,9 @@ static inline u32 kbase_ipa_read_hwcnt(
u32 offset)
{
u8 *p = (u8 *)model_data->dump_buf.dump_buf;
+ u64 val = *(u64 *)&p[offset];
- return *(u32 *)&p[offset];
+ return (val > U32_MAX) ? U32_MAX : (u32)val;
}
static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
index 3486a9b..faf08ef 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -30,7 +30,7 @@
#define KBASE_IPA_MAX_GROUP_DEF_NUM 16
/* Number of bytes per hardware counter in a vinstr_buffer. */
-#define KBASE_IPA_NR_BYTES_PER_CNT 4
+#define KBASE_IPA_NR_BYTES_PER_CNT (sizeof(u64))
/* Number of hardware counters per block in a vinstr_buffer. */
#define KBASE_IPA_NR_CNT_PER_BLOCK 64
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
index 1852c3c..a47699c 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -25,14 +25,18 @@
/* MEMSYS counter block offsets */
#define L2_RD_MSG_IN (16)
#define L2_WR_MSG_IN (18)
+#define L2_RD_MSG_OUT (22)
#define L2_READ_LOOKUP (26)
#define L2_EXT_WRITE_NOSNP_FULL (43)
/* SC counter block offsets */
+#define FRAG_STARVING (8)
+#define FRAG_PARTIAL_QUADS_RAST (10)
#define FRAG_QUADS_EZS_UPDATE (13)
#define FULL_QUAD_WARPS (21)
#define EXEC_INSTR_FMA (27)
#define EXEC_INSTR_CVT (28)
+#define EXEC_INSTR_MSG (30)
#define TEX_FILT_NUM_OPS (39)
#define LS_MEM_READ_SHORT (45)
#define LS_MEM_WRITE_SHORT (47)
@@ -44,6 +48,8 @@
#define VFETCH_POS_READ_WAIT (29)
#define VFETCH_VERTEX_WAIT (30)
#define IDVS_VAR_SHAD_STALL (38)
+#define ITER_STALL (40)
+#define PMGR_PTR_RD_STALL (48)
#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \
{ \
@@ -80,6 +86,33 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_todx[] = {
TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT),
};
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tgrx[] = {
+ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN),
+ MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL),
+
+ TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL),
+ TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL),
+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL),
+ TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT),
+};
+
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tvax[] = {
+ MEMSYS_COUNTER_DEF("l2_rd_msg_out", 491414, L2_RD_MSG_OUT),
+ MEMSYS_COUNTER_DEF("l2_wr_msg_in", 408645, L2_WR_MSG_IN),
+
+ TILER_COUNTER_DEF("iter_stall", 893324, ITER_STALL),
+ TILER_COUNTER_DEF("pmgr_ptr_rd_stall", -975117, PMGR_PTR_RD_STALL),
+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 22555, IDVS_POS_SHAD_STALL),
+};
+
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
+ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 800836, L2_RD_MSG_IN),
+ MEMSYS_COUNTER_DEF("l2_wr_msg_in", 415579, L2_WR_MSG_IN),
+ MEMSYS_COUNTER_DEF("l2_read_lookup", -198124, L2_READ_LOOKUP),
+
+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 117358, IDVS_POS_SHAD_STALL),
+ TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT),
+};
/* These tables provide a description of each performance counter
* used by the shader cores counter model for energy estimation.
@@ -93,6 +126,32 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16),
};
+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tgrx[] = {
+ SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA),
+ SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS),
+ SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT),
+ SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE),
+ SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT),
+ SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16),
+};
+
+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tvax[] = {
+ SC_COUNTER_DEF("tex_filt_num_operations", 142536, TEX_FILT_NUM_OPS),
+ SC_COUNTER_DEF("exec_instr_fma", 243497, EXEC_INSTR_FMA),
+ SC_COUNTER_DEF("exec_instr_msg", 1344410, EXEC_INSTR_MSG),
+ SC_COUNTER_DEF("vary_slot_16", -119612, VARY_SLOT_16),
+ SC_COUNTER_DEF("frag_partial_quads_rast", 676201, FRAG_PARTIAL_QUADS_RAST),
+ SC_COUNTER_DEF("frag_starving", 62421, FRAG_STARVING),
+};
+
+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
+ SC_COUNTER_DEF("exec_instr_fma", 457012, EXEC_INSTR_FMA),
+ SC_COUNTER_DEF("tex_filt_num_operations", 441911, TEX_FILT_NUM_OPS),
+ SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT),
+ SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS),
+ SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT),
+ SC_COUNTER_DEF("frag_quads_ezs_update",372032, FRAG_QUADS_EZS_UPDATE),
+};
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
@@ -128,13 +187,21 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
/* Reference voltage value is 750 mV.
*/
STANDARD_POWER_MODEL(todx, 750);
+STANDARD_POWER_MODEL(tgrx, 750);
+STANDARD_POWER_MODEL(tvax, 750);
+STANDARD_POWER_MODEL(ttux, 750);
/* Assuming LODX is an alias of TODX for IPA */
ALIAS_POWER_MODEL(lodx, todx);
+/* Assuming LTUX is an alias of TTUX for IPA */
+ALIAS_POWER_MODEL(ltux, ttux);
+
static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
&kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops,
+ &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops,
+ &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops
};
const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
@@ -165,6 +232,14 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
return "mali-todx-power-model";
case GPU_ID2_PRODUCT_LODX:
return "mali-lodx-power-model";
+ case GPU_ID2_PRODUCT_TGRX:
+ return "mali-tgrx-power-model";
+ case GPU_ID2_PRODUCT_TVAX:
+ return "mali-tvax-power-model";
+ case GPU_ID2_PRODUCT_TTUX:
+ return "mali-ttux-power-model";
+ case GPU_ID2_PRODUCT_LTUX:
+ return "mali-ltux-power-model";
default:
return NULL;
}
diff --git a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
index 5976389..14df542 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
@@ -247,7 +247,7 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
dir = debugfs_create_dir(model->ops->name,
model->kbdev->mali_debugfs_directory);
- if (!dir) {
+ if (IS_ERR_OR_NULL(dir)) {
dev_err(model->kbdev->dev,
"Couldn't create mali debugfs %s directory",
model->ops->name);
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index c490f1c..cb1c276 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -87,8 +87,6 @@
#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
-/* Atom is currently holding a context reference */
-#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
/* Atom requires GPU to be in protected mode */
#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
/* Atom has been stored in runnable_tree */
@@ -176,7 +174,7 @@ struct kbase_jd_atom_dependency {
static inline const struct kbase_jd_atom *
kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
{
- LOCAL_ASSERT(dep != NULL);
+ KBASE_DEBUG_ASSERT(dep != NULL);
return (const struct kbase_jd_atom *)(dep->atom);
}
@@ -191,7 +189,7 @@ kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
static inline u8 kbase_jd_katom_dep_type(
const struct kbase_jd_atom_dependency *dep)
{
- LOCAL_ASSERT(dep != NULL);
+ KBASE_DEBUG_ASSERT(dep != NULL);
return dep->dep_type;
}
@@ -209,7 +207,7 @@ static inline void kbase_jd_katom_dep_set(
{
struct kbase_jd_atom_dependency *dep;
- LOCAL_ASSERT(const_dep != NULL);
+ KBASE_DEBUG_ASSERT(const_dep != NULL);
dep = (struct kbase_jd_atom_dependency *)const_dep;
@@ -227,7 +225,7 @@ static inline void kbase_jd_katom_dep_clear(
{
struct kbase_jd_atom_dependency *dep;
- LOCAL_ASSERT(const_dep != NULL);
+ KBASE_DEBUG_ASSERT(const_dep != NULL);
dep = (struct kbase_jd_atom_dependency *)const_dep;
@@ -653,6 +651,48 @@ static inline bool kbase_jd_katom_is_protected(
return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
}
+/**
+ * kbase_atom_is_younger - query if one atom is younger by age than another
+ * @katom_a the first atom
+ * @katom_a the second atom
+ *
+ * Return: true if the first atom is strictly younger than the second, false
+ * otherwise.
+ */
+static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
+ const struct kbase_jd_atom *katom_b)
+{
+ return ((s32)(katom_a->age - katom_b->age) < 0);
+}
+
+/**
+ * kbase_jd_atom_is_earlier
+ * @katom_a: the first atom
+ * @katom_b: the second atom
+ *
+ * Return: true if the first atom has been submitted earlier than the
+ * second atom. It is used to understand if an atom that is ready has been
+ * submitted earlier than the currently running atom, so that the currently
+ * running atom should be preempted to allow the ready atom to run.
+ */
+static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a,
+ const struct kbase_jd_atom *katom_b)
+{
+ /* No seq_nr set? */
+ if (!katom_a->seq_nr || !katom_b->seq_nr)
+ return false;
+
+ /* Efficiently handle the unlikely case of wrapping.
+ * The following code assumes that the delta between the sequence number
+ * of the two atoms is less than INT64_MAX.
+ * In the extremely unlikely case where the delta is higher, the comparison
+ * defaults for no preemption.
+ * The code also assumes that the conversion from unsigned to signed types
+ * works because the signed integers are 2's complement.
+ */
+ return (s64)(katom_a->seq_nr - katom_b->seq_nr) < 0;
+}
+
/*
* Theory of operations:
*
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
index 5e0c4bc..5a972a5 100644
--- a/mali_kbase/jm/mali_kbase_jm_js.h
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -108,6 +108,52 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx);
*/
void kbasep_js_kctx_term(struct kbase_context *kctx);
+/* kbase_jsctx_slot_prio_blocked_set - Set a context as being blocked for a job
+ * slot at and below a given priority level
+ * @kctx: The kbase_context
+ * @js: The job slot
+ * @sched_prio: The priority levels that the context is blocked at for @js (all
+ * priority levels at this level and below will be blocked)
+ *
+ * To preserve ordering and dependencies of atoms on soft-stopping (both within
+ * an between priority levels), a context must be marked as blocked for that
+ * atom's job slot, for all priority levels at or below the atom's priority.
+ *
+ * This must only be called due to an atom that was pulled from the context,
+ * otherwise there will be no way of unblocking the context when the atom is
+ * completed/unpulled.
+ *
+ * Atoms of higher priority might still be able to be pulled from the context
+ * on @js. This helps with starting a high priority atom as soon as possible.
+ */
+static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx,
+ int js, int sched_prio)
+{
+ struct kbase_jsctx_slot_tracking *slot_tracking =
+ &kctx->slot_tracking[js];
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ WARN(!slot_tracking->atoms_pulled_pri[sched_prio],
+ "When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
+ js, sched_prio);
+
+ slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio;
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_BLOCKED, kctx,
+ NULL, 0, js, (unsigned int)sched_prio);
+}
+
+/* kbase_jsctx_atoms_pulled - Return number of atoms pulled on a context
+ * @kctx: The kbase_context
+ *
+ * Having atoms pulled indicates the context is not idle.
+ *
+ * Return: the number of atoms pulled on @kctx
+ */
+static inline int kbase_jsctx_atoms_pulled(struct kbase_context *kctx)
+{
+ return atomic_read(&kctx->atoms_pulled_all_slots);
+}
+
/**
* kbasep_js_add_job - Add a job chain to the Job Scheduler,
* and take necessary actions to
@@ -947,7 +993,38 @@ static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
*
* Return: The same or lower priority than requested.
*/
-
base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority);
+/**
+ * kbase_js_atom_runs_before - determine if atoms for the same slot have an
+ * ordering relation
+ * @kbdev: kbase device
+ * @katom_a: the first atom
+ * @katom_b: the second atom.
+ * @order_flags: combination of KBASE_ATOM_ORDERING_FLAG_<...> for the ordering
+ * relation
+ *
+ * This is for making consistent decisions about the ordering of atoms when we
+ * need to do pre-emption on a slot, which includes stopping existing atoms
+ * when a new atom is ready to run, and also which other atoms to remove from
+ * the slot when the atom in JSn_HEAD is being pre-empted.
+ *
+ * This only handles @katom_a and @katom_b being for the same job slot, as
+ * pre-emption only operates within a slot.
+ *
+ * Note: there is currently no use-case for this as a sorting comparison
+ * functions, hence only a boolean returned instead of int -1, 0, +1 return. If
+ * required in future, a modification to do so would be better than calling
+ * twice with katom_a and katom_b swapped.
+ *
+ * Return:
+ * true if @katom_a should run before @katom_b, false otherwise.
+ * A false return value does not distinguish between "no ordering relation" and
+ * "@katom_a should run after @katom_b".
+ */
+bool kbase_js_atom_runs_before(struct kbase_device *kbdev,
+ const struct kbase_jd_atom *katom_a,
+ const struct kbase_jd_atom *katom_b,
+ const kbase_atom_ordering_flag_t order_flags);
+
#endif /* _KBASE_JM_JS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 75152fb..a1d40ba 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -187,6 +187,33 @@ enum {
*/
#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+/* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits
+ * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels.
+ *
+ * Must be strictly larger than the number of bits to represent a bitmap of
+ * priorities, so that we can do calculations such as:
+ * (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1
+ * ...without causing undefined behavior due to a shift beyond the width of the
+ * type
+ *
+ * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth
+ * moving to DECLARE_BITMAP()
+ */
+typedef u8 kbase_js_prio_bitmap_t;
+
+/* Ordering modification for kbase_js_atom_runs_before() */
+typedef u32 kbase_atom_ordering_flag_t;
+
+/* Atoms of the same context and priority should have their ordering decided by
+ * their seq_nr instead of their age.
+ *
+ * seq_nr is used as a more slowly changing variant of age - it increases once
+ * per group of related atoms, as determined by user-space. Hence, it can be
+ * used to limit re-ordering decisions (such as pre-emption) to only re-order
+ * between such groups, rather than re-order within those groups of atoms.
+ */
+#define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0)
+
/**
* struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
* @runpool_irq: Sub-structure to collect together Job Scheduling data used in
@@ -393,4 +420,23 @@ struct kbasep_js_atom_retained_state {
*/
#define KBASEP_JS_TICK_RESOLUTION_US 1
+/**
+ * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's
+ * use of a job slot
+ * @blocked: bitmap of priorities that this slot is blocked at
+ * @atoms_pulled: counts of atoms that have been pulled from this slot,
+ * across all priority levels
+ * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per
+ * priority level
+ *
+ * Controls how a slot from the &struct kbase_context's jsctx_queue is managed,
+ * for example to ensure correct ordering of atoms when atoms of different
+ * priorities are unpulled.
+ */
+struct kbase_jsctx_slot_tracking {
+ kbase_js_prio_bitmap_t blocked;
+ atomic_t atoms_pulled;
+ int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+};
+
#endif /* _KBASE_JS_DEFS_H_ */
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 93cd05f..2e81cb1 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -28,26 +28,7 @@
#define _BASE_HWCONFIG_FEATURES_H_
enum base_hw_feature {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_FLUSH_REDUCTION,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
@@ -55,6 +36,7 @@ enum base_hw_feature {
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_ASN_HASH,
+ BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_END
};
@@ -63,240 +45,69 @@ static const enum base_hw_feature base_hw_features_generic[] = {
};
static const enum base_hw_feature base_hw_features_tMIx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tHEx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tSIx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tDVx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tNOx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tGOx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tTRx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tNAx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tBEx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -304,27 +115,8 @@ static const enum base_hw_feature base_hw_features_tBEx[] = {
};
static const enum base_hw_feature base_hw_features_tBAx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -332,27 +124,8 @@ static const enum base_hw_feature base_hw_features_tBAx[] = {
};
static const enum base_hw_feature base_hw_features_tDUx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -360,85 +133,37 @@ static const enum base_hw_feature base_hw_features_tDUx[] = {
};
static const enum base_hw_feature base_hw_features_tODx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tGRx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tVAx[] = {
- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
- BASE_HW_FEATURE_XAFFINITY,
- BASE_HW_FEATURE_WARPING,
- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
- BASE_HW_FEATURE_BRNDOUT_CC,
- BASE_HW_FEATURE_BRNDOUT_KILL,
- BASE_HW_FEATURE_LD_ST_LEA_TEX,
- BASE_HW_FEATURE_LD_ST_TILEBUFFER,
- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
- BASE_HW_FEATURE_MRT,
- BASE_HW_FEATURE_MSAA_16X,
- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
- BASE_HW_FEATURE_T7XX_PAIRING_RULES,
- BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
- BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_END
};
+static const enum base_hw_feature base_hw_features_tTUx[] = {
+ BASE_HW_FEATURE_FLUSH_REDUCTION,
+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+ BASE_HW_FEATURE_L2_CONFIG,
+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+ BASE_HW_FEATURE_ASN_HASH,
+ BASE_HW_FEATURE_END
+};
+
#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index beda1e4..d188120 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -59,6 +59,7 @@ enum base_hw_issue {
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_GPU2019_3212,
+ BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_END
};
@@ -637,5 +638,21 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
BASE_HW_ISSUE_END
};
+static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_TSIX_2033,
+ BASE_HW_ISSUE_TTRX_1337,
+ BASE_HW_ISSUE_TURSEHW_1997,
+ BASE_HW_ISSUE_END
+};
+
#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index b4e50ae..6bcb754 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -491,6 +491,46 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev);
*/
void kbase_pm_metrics_stop(struct kbase_device *kbdev);
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+/**
+ * kbase_pm_handle_runtime_suspend - Handle the runtime suspend of GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is called from the runtime suspend callback function for
+ * saving the HW state and powering down GPU, if GPU was in sleep state mode.
+ * It does the following steps
+ * - Powers up the L2 cache and re-activates the MCU.
+ * - Suspend the CSGs
+ * - Halts the MCU
+ * - Powers down the L2 cache.
+ * - Invokes the power_off callback to power down the GPU.
+ *
+ * Return: 0 if the GPU was already powered down or no error was encountered
+ * in the power down, otherwise an error code.
+ */
+int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_force_mcu_wakeup_after_sleep - Force the wake up of MCU from sleep
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function forces the wake up of MCU from sleep state and wait for
+ * MCU to become active.
+ * It usually gets called from the runtime suspend callback function.
+ * It also gets called from the GPU reset handler or at the time of system
+ * suspend or when User tries to terminate/suspend the on-slot group.
+ *
+ * Note: @gpu_wakeup_override flag that forces the reactivation of MCU is
+ * set by this function and it is the caller's responsibility to
+ * clear the flag.
+ *
+ * Return: 0 if the wake up was successful.
+ */
+int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev);
+#endif
+
#if !MALI_USE_CSF
/**
* Return the atom's ID, as was originally supplied by userspace in
@@ -498,7 +538,8 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev);
* @kctx: KBase context pointer
* @katom: Atome for which to return ID
*/
-static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+static inline int kbase_jd_atom_id(struct kbase_context *kctx,
+ const struct kbase_jd_atom *katom)
{
int result;
diff --git a/mali_kbase/mali_kbase_as_fault_debugfs.c b/mali_kbase/mali_kbase_as_fault_debugfs.c
index 027eb8c..deb412c 100644
--- a/mali_kbase/mali_kbase_as_fault_debugfs.c
+++ b/mali_kbase/mali_kbase_as_fault_debugfs.c
@@ -93,7 +93,10 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
debugfs_directory = debugfs_create_dir("address_spaces",
kbdev->mali_debugfs_directory);
- if (debugfs_directory) {
+ if (IS_ERR_OR_NULL(debugfs_directory)) {
+ dev_warn(kbdev->dev,
+ "unable to create address_spaces debugfs directory");
+ } else {
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
debugfs_create_file(as_name, S_IRUGO,
@@ -101,9 +104,6 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
(void *)(uintptr_t)i,
&as_fault_fops);
}
- } else {
- dev_warn(kbdev->dev,
- "unable to create address_spaces debugfs directory");
}
#endif /* CONFIG_MALI_DEBUG */
diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h
index e7eb334..8b7ee13 100644
--- a/mali_kbase/mali_kbase_config.h
+++ b/mali_kbase/mali_kbase_config.h
@@ -170,6 +170,12 @@ struct kbase_pm_callback_conf {
* the clocks to the GPU, or to completely power down the GPU.
* The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
* platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+ *
+ * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used
+ * then this callback should power off the GPU (or switch off the clocks
+ * to GPU) immediately. If @power_runtime_gpu_idle_callback is not used,
+ * then this callback can set the autosuspend timeout (if desired) and
+ * let the GPU be powered down later.
*/
void (*power_off_callback)(struct kbase_device *kbdev);
@@ -289,6 +295,49 @@ struct kbase_pm_callback_conf {
* be raised. On error, return the corresponding OS error code.
*/
int (*soft_reset_callback)(struct kbase_device *kbdev);
+
+ /*
+ * Optional callback invoked after GPU becomes idle, not supported on
+ * JM GPUs.
+ *
+ * This callback will be invoked by the Kbase when GPU becomes idle.
+ * For JM GPUs or when runtime PM is disabled, Kbase will not invoke
+ * this callback and @power_off_callback will be invoked directly.
+ *
+ * This callback is supposed to decrement the runtime PM core reference
+ * count to zero and trigger the auto-suspend timer, which implies that
+ * @power_off_callback shouldn't initiate the runtime suspend.
+ *
+ * GPU registers still remain accessible until @power_off_callback gets
+ * invoked later on the expiry of auto-suspend timer.
+ *
+ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+ * this feature.
+ */
+ void (*power_runtime_gpu_idle_callback)(struct kbase_device *kbdev);
+
+ /*
+ * Optional callback invoked to change the runtime PM core state to
+ * active.
+ *
+ * This callback will be invoked by Kbase when GPU needs to be
+ * reactivated, but only if @power_runtime_gpu_idle_callback was invoked
+ * previously. So both @power_runtime_gpu_idle_callback and this
+ * callback needs to be implemented at the same time.
+ *
+ * Kbase will invoke @power_on_callback first before invoking this
+ * callback if the GPU was powered down previously, otherwise directly.
+ *
+ * This callback is supposed to increment the runtime PM core reference
+ * count to 1, which implies that @power_on_callback shouldn't initiate
+ * the runtime resume. The runtime resume may not happen synchronously
+ * to avoid a potential deadlock due to the runtime suspend happening
+ * simultaneously from some other thread.
+ *
+ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+ * this feature.
+ */
+ void (*power_runtime_gpu_active_callback)(struct kbase_device *kbdev);
};
/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier.
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 63c36e2..8d64184 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -177,6 +177,19 @@ enum {
*/
#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+/* Waiting timeout for status change acknowledgment, in clock cycles
+ * Based on 3000ms timeout at nominal 100MHz, as is required for Android - based
+ * on scaling from a 50MHz GPU system.
+ */
+#define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000)
+#define CSF_FIRMWARE_TIMEOUT_CYCLES (300000000)
+
+/* A default timeout to be used when an invalid timeout selector is
+ * used to retrieve the timeout, on JM GPUs. CSF GPUs use the Firmware
+ * timeout as the default.
+ */
+#define JM_DEFAULT_TIMEOUT_CYCLES (150000000)
+
/**
* Default timeslice that a context is scheduled in for, in nanoseconds.
*
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index e7fc41e..2472c7c 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -53,6 +53,7 @@
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_vinstr.h"
#if MALI_USE_CSF
#include "csf/mali_kbase_csf_firmware.h"
@@ -71,6 +72,9 @@
#endif
#include "backend/gpu/mali_kbase_pm_internal.h"
#include "mali_kbase_dvfs_debugfs.h"
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+#include "mali_kbase_pbha_debugfs.h"
+#endif
#include <linux/module.h>
#include <linux/init.h>
@@ -403,6 +407,22 @@ static int kbase_api_handshake_dummy(struct kbase_file *kfile,
return -EPERM;
}
+static int kbase_api_kinstr_prfcnt_enum_info(
+ struct kbase_file *kfile,
+ struct kbase_ioctl_kinstr_prfcnt_enum_info *prfcnt_enum_info)
+{
+ return kbase_kinstr_prfcnt_enum_info(kfile->kbdev->kinstr_prfcnt_ctx,
+ prfcnt_enum_info);
+}
+
+static int kbase_api_kinstr_prfcnt_setup(
+ struct kbase_file *kfile,
+ union kbase_ioctl_kinstr_prfcnt_setup *prfcnt_setup)
+{
+ return kbase_kinstr_prfcnt_setup(kfile->kbdev->kinstr_prfcnt_ctx,
+ prfcnt_setup);
+}
+
static struct kbase_device *to_kbase_device(struct device *dev)
{
return dev_get_drvdata(dev);
@@ -808,16 +828,13 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
u64 flags = alloc->in.flags;
u64 gpu_va;
- rcu_read_lock();
- /* Don't allow memory allocation until user space has set up the
- * tracking page (which sets kctx->process_mm). Also catches when we've
- * forked.
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
*/
- if (rcu_dereference(kctx->process_mm) != current->mm) {
- rcu_read_unlock();
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ if (!kbase_mem_allow_alloc(kctx))
return -EINVAL;
- }
- rcu_read_unlock();
if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
return -ENOMEM;
@@ -849,7 +866,8 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
#endif
reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages,
- alloc->in.extension, &flags, &gpu_va);
+ alloc->in.extension, &flags, &gpu_va,
+ mmu_sync_info);
if (!reg)
return -ENOMEM;
@@ -1643,6 +1661,20 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_set_flags,
kfile);
break;
+
+ case KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO:
+ KBASE_HANDLE_IOCTL_INOUT(
+ KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO,
+ kbase_api_kinstr_prfcnt_enum_info,
+ struct kbase_ioctl_kinstr_prfcnt_enum_info, kfile);
+ break;
+
+ case KBASE_IOCTL_KINSTR_PRFCNT_SETUP:
+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_PRFCNT_SETUP,
+ kbase_api_kinstr_prfcnt_setup,
+ union kbase_ioctl_kinstr_prfcnt_setup,
+ kfile);
+ break;
}
kctx = kbase_file_get_kctx_if_setup_complete(kfile);
@@ -3097,6 +3129,10 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
.name = "Mali-G510" },
{ .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G310" },
+ { .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+ .name = "Mali-TTUX" },
+ { .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+ .name = "Mali-LTUX" },
};
const char *product_name = "(Unknown Mali GPU)";
struct kbase_device *kbdev;
@@ -4574,25 +4610,31 @@ MAKE_QUIRK_ACCESSORS(tiler);
MAKE_QUIRK_ACCESSORS(mmu);
MAKE_QUIRK_ACCESSORS(gpu);
-static ssize_t kbase_device_debugfs_reset_write(struct file *file,
- const char __user *ubuf, size_t count, loff_t *ppos)
+/**
+ * kbase_device_debugfs_reset_write() - Reset the GPU
+ *
+ * @data: Pointer to the Kbase device.
+ * @wait_for_reset: Value written to the file.
+ *
+ * This function will perform the GPU reset, and if the value written to
+ * the file is 1 it will also wait for the reset to complete.
+ *
+ * Return: 0 in case of no error otherwise a negative value.
+ */
+static int kbase_device_debugfs_reset_write(void *data, u64 wait_for_reset)
{
- struct kbase_device *kbdev = file->private_data;
- CSTD_UNUSED(ubuf);
- CSTD_UNUSED(count);
- CSTD_UNUSED(ppos);
+ struct kbase_device *kbdev = data;
trigger_reset(kbdev);
- return count;
+ if (wait_for_reset == 1)
+ return kbase_reset_gpu_wait(kbdev);
+
+ return 0;
}
-static const struct file_operations fops_trigger_reset = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .write = kbase_device_debugfs_reset_write,
- .llseek = default_llseek,
-};
+DEFINE_SIMPLE_ATTRIBUTE(fops_trigger_reset,
+ NULL, &kbase_device_debugfs_reset_write, "%llu\n");
/**
* debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
@@ -4692,7 +4734,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
NULL);
- if (!kbdev->mali_debugfs_directory) {
+ if (IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)) {
dev_err(kbdev->dev,
"Couldn't create mali debugfs directory: %s\n",
kbdev->devname);
@@ -4702,7 +4744,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
kbdev->mali_debugfs_directory);
- if (!kbdev->debugfs_ctx_directory) {
+ if (IS_ERR_OR_NULL(kbdev->debugfs_ctx_directory)) {
dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
err = -ENOMEM;
goto out;
@@ -4710,7 +4752,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
kbdev->debugfs_instr_directory = debugfs_create_dir("instrumentation",
kbdev->mali_debugfs_directory);
- if (!kbdev->debugfs_instr_directory) {
+ if (IS_ERR_OR_NULL(kbdev->debugfs_instr_directory)) {
dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n");
err = -ENOMEM;
goto out;
@@ -4718,7 +4760,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
kbdev->debugfs_ctx_directory);
- if (!debugfs_ctx_defaults_directory) {
+ if (IS_ERR_OR_NULL(debugfs_ctx_defaults_directory)) {
dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
err = -ENOMEM;
goto out;
@@ -4735,6 +4777,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
kbase_instr_backend_debugfs_init(kbdev);
#endif
+ kbase_pbha_debugfs_init(kbdev);
+
/* fops_* variables created by invocations of macro
* MAKE_QUIRK_ACCESSORS() above.
*/
@@ -5293,11 +5337,19 @@ static int kbase_device_resume(struct device *dev)
static int kbase_device_runtime_suspend(struct device *dev)
{
struct kbase_device *kbdev = to_kbase_device(dev);
+ int ret = 0;
if (!kbdev)
return -ENODEV;
dev_dbg(dev, "Callback %s\n", __func__);
+ KBASE_KTRACE_ADD(kbdev, PM_RUNTIME_SUSPEND_CALLBACK, NULL, 0);
+
+#if MALI_USE_CSF
+ ret = kbase_pm_handle_runtime_suspend(kbdev);
+ if (ret)
+ return ret;
+#endif
#ifdef CONFIG_MALI_MIDGARD_DVFS
kbase_pm_metrics_stop(kbdev);
@@ -5312,7 +5364,7 @@ static int kbase_device_runtime_suspend(struct device *dev)
kbdev->pm.backend.callback_power_runtime_off(kbdev);
dev_dbg(dev, "runtime suspend\n");
}
- return 0;
+ return ret;
}
#endif /* KBASE_PM_RUNTIME */
@@ -5336,6 +5388,7 @@ static int kbase_device_runtime_resume(struct device *dev)
return -ENODEV;
dev_dbg(dev, "Callback %s\n", __func__);
+ KBASE_KTRACE_ADD(kbdev, PM_RUNTIME_RESUME_CALLBACK, NULL, 0);
if (kbdev->pm.backend.callback_power_runtime_on) {
ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
dev_dbg(dev, "runtime resume\n");
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 146695c..5b1fdd3 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -71,10 +71,6 @@
#include <linux/regulator/consumer.h>
#include <linux/memory_group_manager.h>
-#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM)
-#define KBASE_PM_RUNTIME 1
-#endif
-
#include "debug/mali_kbase_debug_ktrace_defs.h"
/** Number of milliseconds before we time out on a GPU soft/hard reset */
@@ -111,12 +107,12 @@
/**
* Maximum size in bytes of a MMU lock region, as a logarithm
*/
-#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (64)
+#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */
/**
* Minimum size in bytes of a MMU lock region, as a logarithm
*/
-#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15)
+#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) /* 32 kB */
/**
* Maximum number of GPU memory region zones
@@ -269,6 +265,21 @@ struct kbase_mmu_table {
struct kbase_context *kctx;
};
+/**
+ * struct kbase_reg_zone - Information about GPU memory region zones
+ * @base_pfn: Page Frame Number in GPU virtual address space for the start of
+ * the Zone
+ * @va_size_pages: Size of the Zone in pages
+ *
+ * Track information about a zone KBASE_REG_ZONE() and related macros.
+ * In future, this could also store the &rb_root that are currently in
+ * &kbase_context and &kbase_csf_device.
+ */
+struct kbase_reg_zone {
+ u64 base_pfn;
+ u64 va_size_pages;
+};
+
#if MALI_USE_CSF
#include "csf/mali_kbase_csf_defs.h"
#else
@@ -363,6 +374,12 @@ struct kbase_clk_rate_trace_manager {
* that some code paths keep shaders/the tiler powered whilst this is 0.
* Use kbase_pm_is_active() instead to check for such cases.
* @suspending: Flag indicating suspending/suspended
+ * @runtime_active: Flag to track if the GPU is in runtime suspended or active
+ * state. This ensures that runtime_put and runtime_get
+ * functions are called in pairs. For example if runtime_get
+ * has already been called from the power_on callback, then
+ * the call to it from runtime_gpu_active callback can be
+ * skipped.
* @gpu_lost: Flag indicating gpu lost
* This structure contains data for the power management framework. There
* is one instance of this structure per device in the system.
@@ -388,6 +405,9 @@ struct kbase_pm_device_data {
struct mutex lock;
int active_count;
bool suspending;
+#if MALI_USE_CSF
+ bool runtime_active;
+#endif
#ifdef CONFIG_MALI_ARBITER_SUPPORT
atomic_t gpu_lost;
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
@@ -529,8 +549,11 @@ struct kbase_devfreq_opp {
* @entry_set_ate: program the pte to be a valid address translation entry to
* encode the physical address of the actual page being mapped.
* @entry_set_pte: program the pte to be a valid entry to encode the physical
- * address of the next lower level page table.
+ * address of the next lower level page table and also update
+ * the number of valid entries.
* @entry_invalidate: clear out or invalidate the pte.
+ * @get_num_valid_entries: returns the number of valid entries for a specific pgd.
+ * @set_num_valid_entries: sets the number of valid entries for a specific pgd
* @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants.
*/
struct kbase_mmu_mode {
@@ -545,8 +568,11 @@ struct kbase_mmu_mode {
int (*pte_is_valid)(u64 pte, int level);
void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
unsigned long flags, int level);
- void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+ void (*entry_set_pte)(u64 *pgd, u64 vpfn, phys_addr_t phy);
void (*entry_invalidate)(u64 *entry);
+ unsigned int (*get_num_valid_entries)(u64 *pgd);
+ void (*set_num_valid_entries)(u64 *pgd,
+ unsigned int num_of_valid_entries);
unsigned long flags;
};
@@ -722,6 +748,7 @@ struct kbase_process {
* kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
* @hwcnt_gpu_virt: Virtualizer for GPU hardware counters.
* @vinstr_ctx: vinstr context created per device.
+ * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device.
* @timeline_flags: Bitmask defining which sets of timeline tracepoints
* are enabled. If zero, there is no timeline client and
* therefore timeline is disabled.
@@ -738,6 +765,8 @@ struct kbase_process {
* @reset_timeout_ms: Number of milliseconds to wait for the soft stop to
* complete for the GPU jobs before proceeding with the
* GPU reset.
+ * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used
+ * to calculate suitable timeouts for wait operations.
* @cache_clean_in_progress: Set when a cache clean has been started, and
* cleared when it has finished. This prevents multiple
* cache cleans being done simultaneously.
@@ -752,8 +781,6 @@ struct kbase_process {
* including any contexts that might be created for
* hardware counters.
* @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list.
- * @group_max_uid_in_devices: Max value of any queue group UID in any kernel
- * context in the kbase device.
* @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed
* to devfreq_add_device() to add devfreq feature to Mali
* GPU device.
@@ -891,6 +918,10 @@ struct kbase_process {
* @l2_hash_override: Used to set L2 cache hash via device tree blob
* @l2_hash_values_override: true if @l2_hash_values is valid.
* @l2_hash_values: Used to set L2 asn_hash via device tree blob
+ * @sysc_alloc: Array containing values to be programmed into
+ * SYSC_ALLOC[0..7] GPU registers on L2 cache
+ * power down. These come from either DTB or
+ * via DebugFS (if it is available in kernel).
* @process_root: rb_tree root node for maintaining a rb_tree of
* kbase_process based on key tgid(thread group ID).
* @dma_buf_root: rb_tree root node for maintaining a rb_tree of
@@ -993,6 +1024,7 @@ struct kbase_device {
struct kbase_hwcnt_context *hwcnt_gpu_ctx;
struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
struct kbase_vinstr_context *vinstr_ctx;
+ struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx;
atomic_t timeline_flags;
struct kbase_timeline *timeline;
@@ -1002,6 +1034,8 @@ struct kbase_device {
#endif
u32 reset_timeout_ms;
+ u64 lowest_gpu_freq_khz;
+
bool cache_clean_in_progress;
bool cache_clean_queued;
wait_queue_head_t cache_clean_wait;
@@ -1010,7 +1044,6 @@ struct kbase_device {
struct list_head kctx_list;
struct mutex kctx_list_lock;
- atomic_t group_max_uid_in_devices;
#ifdef CONFIG_MALI_DEVFREQ
struct devfreq_dev_profile devfreq_profile;
@@ -1129,6 +1162,8 @@ struct kbase_device {
bool l2_hash_values_override;
u32 l2_hash_values[ASN_HASH_COUNT];
+ u32 sysc_alloc[SYSC_ALLOC_COUNT];
+
struct mutex fw_load_lock;
#if MALI_USE_CSF
/* CSF object for the GPU device. */
@@ -1396,21 +1431,6 @@ struct kbase_sub_alloc {
};
/**
- * struct kbase_reg_zone - Information about GPU memory region zones
- * @base_pfn: Page Frame Number in GPU virtual address space for the start of
- * the Zone
- * @va_size_pages: Size of the Zone in pages
- *
- * Track information about a zone KBASE_REG_ZONE() and related macros.
- * In future, this could also store the &rb_root that are currently in
- * &kbase_context
- */
-struct kbase_reg_zone {
- u64 base_pfn;
- u64 va_size_pages;
-};
-
-/**
* struct kbase_context - Kernel base context
*
* @filp: Pointer to the struct file corresponding to device file
@@ -1561,17 +1581,10 @@ struct kbase_reg_zone {
* of RB-tree holding currently runnable atoms on the job slot
* and the head item of the linked list of atoms blocked on
* cross-slot dependencies.
- * @atoms_pulled: Total number of atoms currently pulled from the context.
- * @atoms_pulled_slot: Per slot count of the number of atoms currently pulled
- * from the context.
- * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently
- * pulled from the context. hwaccess_lock shall be held when
- * accessing it.
- * @blocked_js: Indicates if the context is blocked from submitting atoms
- * on a slot at a given priority. This is set to true, when
- * the atom corresponding to context is soft/hard stopped or
- * removed from the HEAD_NEXT register in response to
- * soft/hard stop.
+ * @slot_tracking: Tracking and control of this context's use of all job
+ * slots
+ * @atoms_pulled_all_slots: Total number of atoms currently pulled from the
+ * context, across all slots.
* @slots_pullable: Bitmask of slots, indicating the slots for which the
* context has pullable atoms in the runnable tree.
* @work: Work structure used for deferred ASID assignment.
@@ -1717,17 +1730,14 @@ struct kbase_context {
struct kbase_jd_context jctx;
struct jsctx_queue jsctx_queue
[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+ struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS];
+ atomic_t atoms_pulled_all_slots;
struct list_head completed_jobs;
atomic_t work_count;
struct timer_list soft_job_timeout;
- atomic_t atoms_pulled;
- atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
- int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
- KBASE_JS_ATOM_SCHED_PRIO_COUNT];
int priority;
- bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
u32 slots_pullable;
u32 age_count;
@@ -1888,6 +1898,13 @@ enum kbase_share_attr_bits {
};
/**
+ * enum kbase_timeout_selector - The choice of which timeout to get scaled
+ * using current GPU frequency.
+ * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
+ */
+enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT };
+
+/**
* kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
* @kbdev: kbase device
*
diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c
index 69ff8cc..bf2d9cc 100644
--- a/mali_kbase/mali_kbase_dma_fence.c
+++ b/mali_kbase/mali_kbase_dma_fence.c
@@ -249,8 +249,10 @@ kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
err = reservation_object_get_fences_rcu(
-#else
+#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE)
err = dma_resv_get_fences_rcu(
+#else
+ err = dma_resv_get_fences(
#endif
resv,
&excl_fence,
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c
index 1e91ba0..bdc5d6d 100644
--- a/mali_kbase/mali_kbase_dummy_job_wa.c
+++ b/mali_kbase/mali_kbase_dummy_job_wa.c
@@ -281,6 +281,11 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
int err;
struct kbase_context *kctx;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
lockdep_assert_held(&kbdev->fw_load_lock);
if (!wa_blob_load_needed(kbdev))
@@ -375,8 +380,8 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
nr_pages = PFN_UP(blob->size);
flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED;
- va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages,
- 0, &flags, &gpu_va);
+ va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+ &gpu_va, mmu_sync_info);
if (!va_region) {
dev_err(kbdev->dev, "Failed to allocate for blob\n");
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index e4d52c9..967c08e 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -661,6 +661,19 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n",
regdump.l2_config);
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) {
+ int idx;
+ const bool asn_he = regdump.l2_config &
+ L2_CONFIG_ASN_HASH_ENABLE_MASK;
+ if (!asn_he && kbdev->l2_hash_values_override)
+ dev_err(kbdev->dev,
+ "Failed to use requested ASN_HASH, fallback to default");
+ for (idx = 0; idx < ASN_HASH_COUNT; idx++)
+ dev_info(kbdev->dev,
+ "%s ASN_HASH[%d] is [0x%08x]\n",
+ asn_he ? "Overridden" : "Default", idx,
+ regdump.l2_asn_hash[idx]);
+ }
/* Update gpuprops with reflected L2_FEATURES */
gpu_props->raw_props.l2_features = regdump.l2_features;
diff --git a/mali_kbase/mali_kbase_gpuprops_types.h b/mali_kbase/mali_kbase_gpuprops_types.h
index 02705a0..67a4d7d 100644
--- a/mali_kbase/mali_kbase_gpuprops_types.h
+++ b/mali_kbase/mali_kbase_gpuprops_types.h
@@ -35,6 +35,7 @@ struct kbase_gpuprops_regdump {
u32 gpu_id;
u32 l2_features;
u32 l2_config;
+ u32 l2_asn_hash[ASN_HASH_COUNT];
u32 core_features;
u32 tiler_features;
u32 mem_features;
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index 7ad583c..183fd18 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -81,6 +81,10 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TVAX:
features = base_hw_features_tVAx;
break;
+ case GPU_ID2_PRODUCT_TTUX:
+ case GPU_ID2_PRODUCT_LTUX:
+ features = base_hw_features_tTUx;
+ break;
default:
features = base_hw_features_generic;
break;
@@ -225,6 +229,15 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_PRODUCT_TVAX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 },
{ U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_TTUX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+ { U32_MAX, NULL } } },
+
+ { GPU_ID2_PRODUCT_LTUX,
+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+ { U32_MAX, NULL } } },
+
};
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
@@ -380,6 +393,11 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TVAX:
issues = base_hw_issues_model_tVAx;
break;
+ case GPU_ID2_PRODUCT_TTUX:
+ case GPU_ID2_PRODUCT_LTUX:
+ issues = base_hw_issues_model_tTUx;
+ break;
+
default:
dev_err(kbdev->dev,
"Unknown GPU ID %x", gpu_id);
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index 8a4ece4..27e2cb7 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -48,3 +48,25 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
struct timespec64 *ts);
#endif /* _KBASE_BACKEND_TIME_H_ */
+
+/**
+ * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
+ * GPU frequency, using a choice from
+ * kbase_timeout_selector.
+ *
+ * @kbdev: KBase device pointer.
+ * @selector: Value from kbase_scaled_timeout_selector enum.
+ *
+ * Return: Timeout in milliseconds, as an unsigned integer.
+ */
+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
+ enum kbase_timeout_selector selector);
+
+/**
+ * kbase_backend_get_cycle_cnt - Reads the GPU cycle counter
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: Snapshot of the GPU cycle count register.
+ */
+u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev);
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
index 58b5e72..7ba1671 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
@@ -157,19 +157,20 @@ struct kbase_hwcnt_backend_csf_info {
* @shader_cnt: Shader Core block count.
* @block_cnt: Total block count (sum of all other block counts).
* @shader_avail_mask: Bitmap of all shader cores in the system.
- * @offset_enable_mask: Offset of enable mask in the block.
+ * @enable_mask_offset: Offset in array elements of enable mask in each block
+ * starting from the beginning of block.
* @headers_per_block: Header size per block.
* @counters_per_block: Counters size per block.
* @values_per_block: Total size per block.
*/
struct kbase_hwcnt_csf_physical_layout {
- size_t fe_cnt;
- size_t tiler_cnt;
- size_t mmu_l2_cnt;
- size_t shader_cnt;
- size_t block_cnt;
+ u8 fe_cnt;
+ u8 tiler_cnt;
+ u8 mmu_l2_cnt;
+ u8 shader_cnt;
+ u8 block_cnt;
u64 shader_avail_mask;
- size_t offset_enable_mask;
+ size_t enable_mask_offset;
size_t headers_per_block;
size_t counters_per_block;
size_t values_per_block;
@@ -184,11 +185,13 @@ struct kbase_hwcnt_csf_physical_layout {
* to accumulate up to.
* @enable_state_waitq: Wait queue object used to notify the enable
* changing flag is done.
- * @to_user_buf: HWC sample buffer for client user.
+ * @to_user_buf: HWC sample buffer for client user, size
+ * metadata.dump_buf_bytes.
* @accum_buf: HWC sample buffer used as an internal
- * accumulator.
+ * accumulator, size metadata.dump_buf_bytes.
* @old_sample_buf: HWC sample buffer to save the previous values
- * for delta calculation.
+ * for delta calculation, size
+ * prfcnt_info.dump_bytes.
* @ring_buf: Opaque pointer for ring buffer object.
* @ring_buf_cpu_base: CPU base address of the allocated ring buffer.
* @clk_enable_map: The enable map specifying enabled clock domains.
@@ -213,8 +216,8 @@ struct kbase_hwcnt_backend_csf {
enum kbase_hwcnt_backend_csf_enable_state enable_state;
u32 insert_index_to_accumulate;
wait_queue_head_t enable_state_waitq;
- u32 *to_user_buf;
- u32 *accum_buf;
+ u64 *to_user_buf;
+ u64 *accum_buf;
u32 *old_sample_buf;
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
void *ring_buf_cpu_base;
@@ -333,34 +336,40 @@ static void kbasep_hwcnt_backend_csf_init_layout(
const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
struct kbase_hwcnt_csf_physical_layout *phys_layout)
{
+ u8 shader_core_cnt;
+ size_t values_per_block;
+
WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
- phys_layout->fe_cnt = 1;
- phys_layout->tiler_cnt = 1;
- phys_layout->mmu_l2_cnt = prfcnt_info->l2_count;
- phys_layout->shader_cnt = fls64(prfcnt_info->core_mask);
- phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt +
- phys_layout->mmu_l2_cnt +
- phys_layout->shader_cnt;
-
- phys_layout->shader_avail_mask = prfcnt_info->core_mask;
-
- phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- phys_layout->values_per_block =
- prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_BYTES;
- phys_layout->counters_per_block =
- phys_layout->values_per_block - phys_layout->headers_per_block;
- phys_layout->offset_enable_mask = KBASE_HWCNT_V5_PRFCNT_EN_HEADER;
+ shader_core_cnt = fls64(prfcnt_info->core_mask);
+ values_per_block =
+ prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+
+ *phys_layout = (struct kbase_hwcnt_csf_physical_layout){
+ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
+ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
+ .mmu_l2_cnt = prfcnt_info->l2_count,
+ .shader_cnt = shader_core_cnt,
+ .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
+ KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ prfcnt_info->l2_count + shader_core_cnt,
+ .shader_avail_mask = prfcnt_info->core_mask,
+ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .values_per_block = values_per_block,
+ .counters_per_block =
+ values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
+ };
}
static void kbasep_hwcnt_backend_csf_reset_internal_buffers(
struct kbase_hwcnt_backend_csf *backend_csf)
{
- memset(backend_csf->to_user_buf, 0,
- backend_csf->info->prfcnt_info.dump_bytes);
- memset(backend_csf->accum_buf, 0,
- backend_csf->info->prfcnt_info.dump_bytes);
+ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+
+ memset(backend_csf->to_user_buf, 0, user_buf_bytes);
+ memset(backend_csf->accum_buf, 0, user_buf_bytes);
memset(backend_csf->old_sample_buf, 0,
backend_csf->info->prfcnt_info.dump_bytes);
}
@@ -376,7 +385,7 @@ static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
block_buf = sample + block_idx * phys_layout->values_per_block;
- block_buf[phys_layout->offset_enable_mask] = 0;
+ block_buf[phys_layout->enable_mask_offset] = 0;
}
}
@@ -400,33 +409,35 @@ static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(
static void kbasep_hwcnt_backend_csf_update_user_sample(
struct kbase_hwcnt_backend_csf *backend_csf)
{
+ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+
/* Copy the data into the sample and wait for the user to get it. */
memcpy(backend_csf->to_user_buf, backend_csf->accum_buf,
- backend_csf->info->prfcnt_info.dump_bytes);
+ user_buf_bytes);
/* After copied data into user sample, clear the accumulator values to
* prepare for the next accumulator, such as the next request or
* threshold.
*/
- memset(backend_csf->accum_buf, 0,
- backend_csf->info->prfcnt_info.dump_bytes);
+ memset(backend_csf->accum_buf, 0, user_buf_bytes);
}
static void kbasep_hwcnt_backend_csf_accumulate_sample(
const struct kbase_hwcnt_csf_physical_layout *phys_layout,
- size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf,
+ size_t dump_bytes, u64 *accum_buf, const u32 *old_sample_buf,
const u32 *new_sample_buf, bool clearing_samples)
{
- size_t block_idx, ctr_idx;
+ size_t block_idx;
const u32 *old_block = old_sample_buf;
const u32 *new_block = new_sample_buf;
- u32 *acc_block = accum_buf;
+ u64 *acc_block = accum_buf;
+ const size_t values_per_block = phys_layout->values_per_block;
for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
const u32 old_enable_mask =
- old_block[phys_layout->offset_enable_mask];
+ old_block[phys_layout->enable_mask_offset];
const u32 new_enable_mask =
- new_block[phys_layout->offset_enable_mask];
+ new_block[phys_layout->enable_mask_offset];
if (new_enable_mask == 0) {
/* Hardware block was unavailable or we didn't turn on
@@ -436,11 +447,14 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
/* Hardware block was available and it had some counters
* enabled. We need to update the accumulation buffer.
*/
+ size_t ctr_idx;
/* Unconditionally copy the headers. */
- memcpy(acc_block, new_block,
- phys_layout->headers_per_block *
- KBASE_HWCNT_VALUE_BYTES);
+ for (ctr_idx = 0;
+ ctr_idx < phys_layout->headers_per_block;
+ ctr_idx++) {
+ acc_block[ctr_idx] = new_block[ctr_idx];
+ }
/* Accumulate counter samples
*
@@ -470,8 +484,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
for (ctr_idx =
phys_layout
->headers_per_block;
- ctr_idx <
- phys_layout->values_per_block;
+ ctr_idx < values_per_block;
ctr_idx++) {
acc_block[ctr_idx] +=
new_block[ctr_idx];
@@ -484,8 +497,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
for (ctr_idx =
phys_layout
->headers_per_block;
- ctr_idx <
- phys_layout->values_per_block;
+ ctr_idx < values_per_block;
ctr_idx++) {
acc_block[ctr_idx] +=
new_block[ctr_idx] -
@@ -494,23 +506,23 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
}
} else {
for (ctr_idx = phys_layout->headers_per_block;
- ctr_idx < phys_layout->values_per_block;
- ctr_idx++) {
+ ctr_idx < values_per_block; ctr_idx++) {
acc_block[ctr_idx] +=
new_block[ctr_idx];
}
}
}
- old_block += phys_layout->values_per_block;
- new_block += phys_layout->values_per_block;
- acc_block += phys_layout->values_per_block;
+ old_block += values_per_block;
+ new_block += values_per_block;
+ acc_block += values_per_block;
}
WARN_ON(old_block !=
- old_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES);
+ old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(new_block !=
- new_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES);
- WARN_ON(acc_block != accum_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES);
+ new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(acc_block !=
+ accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
(void)dump_bytes;
}
@@ -1218,7 +1230,7 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
&backend_csf->phys_layout);
backend_csf->accum_buf =
- kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+ kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
if (!backend_csf->accum_buf)
goto err_alloc_acc_buf;
@@ -1228,7 +1240,7 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
goto err_alloc_pre_sample_buf;
backend_csf->to_user_buf =
- kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+ kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
if (!backend_csf->to_user_buf)
goto err_alloc_user_sample_buf;
@@ -1237,6 +1249,7 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
&backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf);
if (errcode)
goto err_ring_buf_alloc;
+ errcode = -ENOMEM;
/* Zero all performance enable header to prepare for first enable. */
kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
@@ -1787,17 +1800,17 @@ int kbase_hwcnt_backend_csf_metadata_init(
gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
gpu_info.prfcnt_values_per_block =
csf_info->prfcnt_info.prfcnt_block_size /
- KBASE_HWCNT_VALUE_BYTES;
+ KBASE_HWCNT_VALUE_HW_BYTES;
errcode = kbase_hwcnt_csf_metadata_create(
&gpu_info, csf_info->counter_set, &csf_info->metadata);
if (errcode)
return errcode;
/*
- * Dump abstraction size should be exactly the same size and layout as
- * the physical dump size, for backwards compatibility.
+ * Dump abstraction size should be exactly twice the size and layout as
+ * the physical dump size since 64-bit per value used in metadata.
*/
- WARN_ON(csf_info->prfcnt_info.dump_bytes !=
+ WARN_ON(csf_info->prfcnt_info.dump_bytes * 2 !=
csf_info->metadata->dump_buf_bytes);
return 0;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
index 78a8dc0..124224d 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -223,7 +223,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
u32 prfcnt_hw_size = 0;
u32 prfcnt_fw_size = 0;
u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
- KBASE_HWCNT_VALUE_BYTES;
+ KBASE_HWCNT_VALUE_HW_BYTES;
WARN_ON(!ctx);
WARN_ON(!prfcnt_info);
@@ -235,6 +235,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
prfcnt_fw_size = (prfcnt_size >> 16) << 8;
fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
+ /* Read the block size if the GPU has the register PRFCNT_FEATURES
+ * which was introduced in architecture version 11.x.7.
+ */
+ if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
+ GPU_ID2_PRODUCT_TTUX) {
+ prfcnt_block_size =
+ PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read(
+ kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
+ << 8;
+ }
prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
prfcnt_info->prfcnt_block_size = prfcnt_block_size;
@@ -246,7 +256,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
prfcnt_info->clearing_samples = true;
/* Block size must be multiple of counter size. */
- WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_BYTES) !=
+ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
0);
/* Total size must be multiple of block size. */
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
@@ -274,6 +284,11 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
WARN_ON(!ctx);
WARN_ON(!cpu_dump_base);
WARN_ON(!out_ring_buf);
@@ -322,7 +337,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
/* Update MMU table */
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, phys, num_pages,
- flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
+ flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+ mmu_sync_info);
if (ret)
goto mmu_insert_failed;
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
index 64001b1..56bb1b6 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -35,17 +35,47 @@
/**
* struct kbase_hwcnt_backend_jm_info - Information used to create an instance
* of a JM hardware counter backend.
- * @kbdev: KBase device.
- * @counter_set: The performance counter set to use.
- * @metadata: Hardware counter metadata.
- * @dump_bytes: Bytes of GPU memory required to perform a
- * hardware counter dump.
+ * @kbdev: KBase device.
+ * @counter_set: The performance counter set to use.
+ * @metadata: Hardware counter metadata.
+ * @dump_bytes: Bytes of GPU memory required to perform a
+ * hardware counter dump.
+ * @hwcnt_gpu_info: Hardware counter block information.
*/
struct kbase_hwcnt_backend_jm_info {
struct kbase_device *kbdev;
enum kbase_hwcnt_set counter_set;
const struct kbase_hwcnt_metadata *metadata;
size_t dump_bytes;
+ struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+};
+
+/**
+ * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout
+ * information.
+ * @fe_cnt: Front end block count.
+ * @tiler_cnt: Tiler block count.
+ * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count.
+ * @shader_cnt: Shader Core block count.
+ * @block_cnt: Total block count (sum of all other block counts).
+ * @shader_avail_mask: Bitmap of all shader cores in the system.
+ * @enable_mask_offset: Offset in array elements of enable mask in each block
+ * starting from the beginning of block.
+ * @headers_per_block: Header size per block.
+ * @counters_per_block: Counters size per block.
+ * @values_per_block: Total size per block.
+ */
+struct kbase_hwcnt_jm_physical_layout {
+ u8 fe_cnt;
+ u8 tiler_cnt;
+ u8 mmu_l2_cnt;
+ u8 shader_cnt;
+ u8 block_cnt;
+ u64 shader_avail_mask;
+ size_t enable_mask_offset;
+ size_t headers_per_block;
+ size_t counters_per_block;
+ size_t values_per_block;
};
/**
@@ -56,11 +86,13 @@ struct kbase_hwcnt_backend_jm_info {
* @gpu_dump_va: GPU hardware counter dump buffer virtual address.
* @cpu_dump_va: CPU mapping of gpu_dump_va.
* @vmap: Dump buffer vmap.
+ * @to_user_buf: HWC sample buffer for client user, size
+ * metadata.dump_buf_bytes.
* @enabled: True if dumping has been enabled, else false.
* @pm_core_mask: PM state sync-ed shaders core mask for the enabled
* dumping.
- * @curr_config: Current allocated hardware resources to correctly map the src
- * raw dump buffer to the dst dump buffer.
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * source raw dump buffer to the destination dump buffer.
* @clk_enable_map: The enable map specifying enabled clock domains.
* @cycle_count_elapsed:
* Cycle count elapsed for a given sample period.
@@ -71,6 +103,7 @@ struct kbase_hwcnt_backend_jm_info {
* sample period.
* @rate_listener: Clock rate listener callback state.
* @ccswe_shader_cores: Shader cores cycle count software estimator.
+ * @phys_layout: Physical memory layout information of HWC sample buffer.
*/
struct kbase_hwcnt_backend_jm {
const struct kbase_hwcnt_backend_jm_info *info;
@@ -78,6 +111,7 @@ struct kbase_hwcnt_backend_jm {
u64 gpu_dump_va;
void *cpu_dump_va;
struct kbase_vmap_struct *vmap;
+ u64 *to_user_buf;
bool enabled;
u64 pm_core_mask;
struct kbase_hwcnt_curr_config curr_config;
@@ -86,6 +120,7 @@ struct kbase_hwcnt_backend_jm {
u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
struct kbase_clk_rate_listener rate_listener;
struct kbase_ccswe ccswe_shader_cores;
+ struct kbase_hwcnt_jm_physical_layout phys_layout;
};
/**
@@ -127,6 +162,63 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
return 0;
}
+static void kbasep_hwcnt_backend_jm_init_layout(
+ const struct kbase_hwcnt_gpu_info *gpu_info,
+ struct kbase_hwcnt_jm_physical_layout *phys_layout)
+{
+ u8 shader_core_cnt;
+
+ WARN_ON(!gpu_info);
+ WARN_ON(!phys_layout);
+
+ shader_core_cnt = fls64(gpu_info->core_mask);
+
+ *phys_layout = (struct kbase_hwcnt_jm_physical_layout){
+ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
+ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
+ .mmu_l2_cnt = gpu_info->l2_count,
+ .shader_cnt = shader_core_cnt,
+ .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
+ KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ gpu_info->l2_count + shader_core_cnt,
+ .shader_avail_mask = gpu_info->core_mask,
+ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .values_per_block = gpu_info->prfcnt_values_per_block,
+ .counters_per_block = gpu_info->prfcnt_values_per_block -
+ KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
+ };
+}
+
+static void kbasep_hwcnt_backend_jm_dump_sample(
+ const struct kbase_hwcnt_backend_jm *const backend_jm)
+{
+ size_t block_idx;
+ const u32 *new_sample_buf = backend_jm->cpu_dump_va;
+ const u32 *new_block = new_sample_buf;
+ u64 *dst_buf = backend_jm->to_user_buf;
+ u64 *dst_block = dst_buf;
+ const size_t values_per_block =
+ backend_jm->phys_layout.values_per_block;
+ const size_t dump_bytes = backend_jm->info->dump_bytes;
+
+ for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt;
+ block_idx++) {
+ size_t ctr_idx;
+
+ for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
+ dst_block[ctr_idx] = new_block[ctr_idx];
+
+ new_block += values_per_block;
+ dst_block += values_per_block;
+ }
+
+ WARN_ON(new_block !=
+ new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(dst_block !=
+ dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+}
+
/**
* kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
*
@@ -487,6 +579,9 @@ static int kbasep_hwcnt_backend_jm_dump_get(
kbase_sync_mem_regions(
backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
+ /* Dump sample to the internal 64-bit user buffer. */
+ kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
+
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
@@ -496,7 +591,7 @@ static int kbasep_hwcnt_backend_jm_dump_get(
dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
}
- return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va,
+ return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf,
dst_enable_map, backend_jm->pm_core_mask,
&backend_jm->curr_config, accumulate);
}
@@ -519,6 +614,11 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
u64 flags;
u64 nr_pages;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
WARN_ON(!info);
WARN_ON(!kctx);
WARN_ON(!gpu_dump_va);
@@ -531,7 +631,8 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
nr_pages = PFN_UP(info->dump_bytes);
- reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va,
+ mmu_sync_info);
if (!reg)
return -ENOMEM;
@@ -580,6 +681,8 @@ static void kbasep_hwcnt_backend_jm_destroy(
kbase_destroy_context(kctx);
}
+ kfree(backend->to_user_buf);
+
kfree(backend);
}
@@ -608,6 +711,8 @@ static int kbasep_hwcnt_backend_jm_create(
goto alloc_error;
backend->info = info;
+ kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info,
+ &backend->phys_layout);
backend->kctx = kbase_create_context(kbdev, true,
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
@@ -623,7 +728,12 @@ static int kbasep_hwcnt_backend_jm_create(
backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
backend->gpu_dump_va, &backend->vmap);
- if (!backend->cpu_dump_va)
+ if (!backend->cpu_dump_va || !backend->vmap)
+ goto alloc_error;
+
+ backend->to_user_buf =
+ kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
+ if (!backend->to_user_buf)
goto alloc_error;
kbase_ccswe_init(&backend->ccswe_shader_cores);
@@ -710,19 +820,14 @@ static int kbasep_hwcnt_backend_jm_info_create(
const struct kbase_hwcnt_backend_jm_info **out_info)
{
int errcode = -ENOMEM;
- struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
struct kbase_hwcnt_backend_jm_info *info = NULL;
WARN_ON(!kbdev);
WARN_ON(!out_info);
- errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info);
- if (errcode)
- return errcode;
-
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
- goto error;
+ return errcode;
info->kbdev = kbdev;
@@ -735,7 +840,12 @@ static int kbasep_hwcnt_backend_jm_info_create(
info->counter_set = KBASE_HWCNT_SET_PRIMARY;
#endif
- errcode = kbase_hwcnt_jm_metadata_create(&hwcnt_gpu_info,
+ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev,
+ &info->hwcnt_gpu_info);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info,
info->counter_set,
&info->metadata,
&info->dump_bytes);
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 2975269..97a7511 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -223,7 +223,7 @@ kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
WARN_ON(!gpu_info);
return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
- gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_BYTES;
+ gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
}
int kbase_hwcnt_jm_metadata_create(
@@ -253,10 +253,11 @@ int kbase_hwcnt_jm_metadata_create(
return errcode;
/*
- * Dump abstraction size should be exactly the same size and layout as
- * the physical dump size, for backwards compatibility.
+ * The physical dump size should be half of dump abstraction size in
+ * metadata since physical HW uses 32-bit per value but metadata
+ * specifies 64-bit per value.
*/
- WARN_ON(dump_bytes != metadata->dump_buf_bytes);
+ WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes);
*out_metadata = metadata;
*out_dump_bytes = dump_bytes;
@@ -302,127 +303,6 @@ void kbase_hwcnt_csf_metadata_destroy(
kbase_hwcnt_metadata_destroy(metadata);
}
-int kbase_hwcnt_gpu_metadata_create_truncate_64(
- const struct kbase_hwcnt_metadata **dst_md,
- const struct kbase_hwcnt_metadata *src_md)
-{
- struct kbase_hwcnt_description desc;
- struct kbase_hwcnt_group_description group;
- struct kbase_hwcnt_block_description
- blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
- size_t prfcnt_values_per_block;
- size_t blk;
-
- if (!dst_md || !src_md || !src_md->grp_metadata ||
- !src_md->grp_metadata[0].blk_metadata)
- return -EINVAL;
-
- /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
- * count in the metadata.
- */
- if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
- (kbase_hwcnt_metadata_block_count(src_md, 0) !=
- KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
- return -EINVAL;
-
- /* Get the values count in the first block. */
- prfcnt_values_per_block =
- kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
-
- /* check all blocks should have same values count. */
- for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
- size_t val_cnt =
- kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
- if (val_cnt != prfcnt_values_per_block)
- return -EINVAL;
- }
-
- /* Only support 64 and 128 entries per block. */
- if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
- return -EINVAL;
-
- if (prfcnt_values_per_block == 64) {
- /* If the values per block is 64, no need to truncate. */
- *dst_md = NULL;
- return 0;
- }
-
- /* Truncate from 128 to 64 entries per block to keep API backward
- * compatibility.
- */
- prfcnt_values_per_block = 64;
-
- for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
- blks[blk].type =
- kbase_hwcnt_metadata_block_type(src_md, 0, blk);
- blks[blk].inst_cnt = kbase_hwcnt_metadata_block_instance_count(
- src_md, 0, blk);
- blks[blk].hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
- src_md, 0, blk);
- blks[blk].ctr_cnt = prfcnt_values_per_block - blks[blk].hdr_cnt;
- }
-
- group.type = kbase_hwcnt_metadata_group_type(src_md, 0);
- group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
- group.blks = blks;
-
- desc.grp_cnt = kbase_hwcnt_metadata_group_count(src_md);
- desc.avail_mask = src_md->avail_mask;
- desc.clk_cnt = src_md->clk_cnt;
- desc.grps = &group;
-
- return kbase_hwcnt_metadata_create(&desc, dst_md);
-}
-
-void kbase_hwcnt_dump_buffer_copy_strict_narrow(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
-{
- const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
- size_t clk;
-
- if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
- WARN_ON(dst == src) || WARN_ON(dst->metadata == src->metadata) ||
- WARN_ON(dst->metadata->grp_cnt != src->metadata->grp_cnt) ||
- WARN_ON(src->metadata->grp_cnt != 1) ||
- WARN_ON(dst->metadata->grp_metadata[0].blk_cnt !=
- src->metadata->grp_metadata[0].blk_cnt) ||
- WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != 4) ||
- WARN_ON(dst->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
- src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
- return;
-
- /* Don't use src metadata since src buffer is bigger than dst buffer. */
- metadata = dst->metadata;
-
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
- dst, grp, blk, blk_inst);
- const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
- dst_enable_map, grp, blk, blk_inst);
- size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
- metadata, grp, blk);
- /* Align upwards to include padding bytes */
- val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
- val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES));
-
- kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk,
- blk_em, val_cnt);
- }
-
- kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
- bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled(
- dst_enable_map->clk_enable_map, clk);
-
- dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
- }
-}
-
static bool is_block_type_shader(
const u64 grp_type,
const u64 blk_type,
@@ -462,28 +342,26 @@ static bool is_block_type_l2_cache(
return is_l2_cache;
}
-int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
u64 pm_core_mask,
const struct kbase_hwcnt_curr_config *curr_config,
bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
- const u32 *dump_src;
- size_t src_offset, grp, blk, blk_inst;
+ size_t grp, blk, blk_inst;
+ const u64 *dump_src = src;
+ size_t src_offset = 0;
u64 core_mask = pm_core_mask;
/* Variables to deal with the current configuration */
int l2_count = 0;
- bool hw_res_available = true;
if (!dst || !src || !dst_enable_map ||
(dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
- dump_src = (const u32 *)src;
- src_offset = 0;
kbase_hwcnt_metadata_for_each_block(
metadata, grp, blk, blk_inst) {
@@ -501,6 +379,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
const bool is_l2_cache = is_block_type_l2_cache(
kbase_hwcnt_metadata_group_type(metadata, grp),
blk_type);
+ bool hw_res_available = true;
/*
* If l2 blocks is greater than the current allocated number of
@@ -525,14 +404,13 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
}
/*
- * Early out if no values in the dest block are enabled or if
- * the resource target of the block is not available in the HW.
+ * Skip block if no values in the destination block are enabled.
*/
if (kbase_hwcnt_enable_map_block_enabled(
dst_enable_map, grp, blk, blk_inst)) {
- u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
- const u32 *src_blk = dump_src + src_offset;
+ const u64 *src_blk = dump_src + src_offset;
if ((!is_shader_core || (core_mask & 1)) && hw_res_available) {
if (accumulate) {
@@ -560,21 +438,20 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
return 0;
}
-int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
- const u32 *dump_src;
- size_t src_offset, grp, blk, blk_inst;
+ const u64 *dump_src = src;
+ size_t src_offset = 0;
+ size_t grp, blk, blk_inst;
if (!dst || !src || !dst_enable_map ||
(dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
- dump_src = (const u32 *)src;
- src_offset = 0;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
@@ -583,12 +460,14 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
kbase_hwcnt_metadata_block_counters_count(metadata, grp,
blk);
- /* Early out if no values in the dest block are enabled */
+ /*
+ * Skip block if no values in the destination block are enabled.
+ */
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp,
blk, blk_inst)) {
- u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
- const u32 *src_blk = dump_src + src_offset;
+ const u64 *src_blk = dump_src + src_offset;
if (accumulate) {
kbase_hwcnt_dump_buffer_block_accumulate(
@@ -606,48 +485,6 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
}
/**
- * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
- * enable map abstraction to
- * a physical block enable
- * map.
- * @lo: Low 64 bits of block enable map abstraction.
- * @hi: High 64 bits of block enable map abstraction.
- *
- * The abstraction uses 128 bits to enable 128 block values, whereas the
- * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
- * Therefore, this conversion is lossy.
- *
- * Return: 32-bit physical block enable map.
- */
-static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical(
- u64 lo,
- u64 hi)
-{
- u32 phys = 0;
- u64 dwords[2] = {lo, hi};
- size_t dword_idx;
-
- for (dword_idx = 0; dword_idx < 2; dword_idx++) {
- const u64 dword = dwords[dword_idx];
- u16 packed = 0;
-
- size_t hword_bit;
-
- for (hword_bit = 0; hword_bit < 16; hword_bit++) {
- const size_t dword_bit = hword_bit * 4;
- const u16 mask =
- ((dword >> (dword_bit + 0)) & 0x1) |
- ((dword >> (dword_bit + 1)) & 0x1) |
- ((dword >> (dword_bit + 2)) & 0x1) |
- ((dword >> (dword_bit + 3)) & 0x1);
- packed |= (mask << hword_bit);
- }
- phys |= ((u32)packed) << (16 * dword_idx);
- }
- return phys;
-}
-
-/**
* kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
* block enable map to a
* block enable map
@@ -746,14 +583,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
}
}
- dst->fe_bm =
- kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0);
+ dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0);
dst->shader_bm =
- kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
+ kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
dst->tiler_bm =
- kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0);
+ kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0);
dst->mmu_l2_bm =
- kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
+ kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
}
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
@@ -857,12 +693,12 @@ void kbase_hwcnt_gpu_patch_dump_headers(
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
const u64 grp_type =
kbase_hwcnt_metadata_group_type(metadata, grp);
- u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
+ u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
buf, grp, blk, blk_inst);
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
enable_map, grp, blk, blk_inst);
const u32 prfcnt_en =
- kbasep_hwcnt_backend_gpu_block_map_to_physical(
+ kbase_hwcnt_backend_gpu_block_map_to_physical(
blk_map[0], 0);
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
index 50ae80d..648f85f 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -29,15 +29,25 @@ struct kbase_hwcnt_metadata;
struct kbase_hwcnt_enable_map;
struct kbase_hwcnt_dump_buffer;
+/* Hardware counter version 5 definitions, V5 is the only supported version. */
#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \
KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
-/** Index of the PRFCNT_EN header into a V5 counter block */
+
+/* FrontEnd block count in V5 GPU hardware counter. */
+#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
+/* Tiler block count in V5 GPU hardware counter. */
+#define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1
+
+/* Index of the PRFCNT_EN header into a V5 counter block */
#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+/* Number of bytes for each counter value in hardware. */
+#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32))
+
/**
* enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
* identify metadata groups.
@@ -84,11 +94,13 @@ enum kbase_hwcnt_gpu_v5_block_type {
* @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters
* @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
* @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters
+ * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters
*/
enum kbase_hwcnt_set {
KBASE_HWCNT_SET_PRIMARY,
KBASE_HWCNT_SET_SECONDARY,
KBASE_HWCNT_SET_TERTIARY,
+ KBASE_HWCNT_SET_UNDEFINED = 255,
};
/**
@@ -225,61 +237,19 @@ void kbase_hwcnt_csf_metadata_destroy(
const struct kbase_hwcnt_metadata *metadata);
/**
- * kbase_hwcnt_gpu_metadata_create_truncate_64() - Create HWC metadata with HWC
- * block entries truncated
- * to 64.
- *
- * @dst_md: Non-NULL pointer to where created metadata is stored on success.
- * @src_md: Non-NULL pointer to the HWC metadata used as the source to create
- * dst_md.
- *
- * If the total block entries in src_md is 64, metadata dst_md returns NULL
- * since no need to truncate.
- * if the total block entries in src_md is 128, then a new metadata with block
- * entries truncated to 64 will be created for dst_md, which keeps the interface
- * to user clients backward compatible.
- * If the total block entries in src_md is other values, function returns error
- * since it's not supported.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_gpu_metadata_create_truncate_64(
- const struct kbase_hwcnt_metadata **dst_md,
- const struct kbase_hwcnt_metadata *src_md);
-
-/**
- * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values from
- * src to dst.
- *
- * @dst: Non-NULL pointer to dst dump buffer.
- * @src: Non-NULL pointer to src dump buffer.
- * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- *
- * After the operation, all non-enabled values (including padding bytes) will be
- * zero.
- *
- * The dst and src have different metadata, and the dst metadata is narrower
- * than src metadata.
- */
-void kbase_hwcnt_dump_buffer_copy_strict_narrow(
- struct kbase_hwcnt_dump_buffer *dst,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
-
-/**
* kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
* dump buffer in src into the dump buffer
* abstraction in dst.
- * @dst: Non-NULL pointer to dst dump buffer.
- * @src: Non-NULL pointer to src raw dump buffer, of same length
- * as returned in out_dump_bytes parameter of
- * kbase_hwcnt_jm_metadata_create.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of destination dump
+ * buffer.
* @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
* @pm_core_mask: PM state synchronized shaders core mask with the dump.
* @curr_config: Current allocated hardware resources to correctly map the
- * src raw dump buffer to the dst dump buffer.
- * @accumulate: True if counters in src should be accumulated into dst,
- * rather than copied.
+ * source raw dump buffer to the destination dump buffer.
+ * @accumulate: True if counters in source should be accumulated into
+ * destination, rather than copied.
*
* The dst and dst_enable_map MUST have been created from the same metadata as
* returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
@@ -287,7 +257,7 @@ void kbase_hwcnt_dump_buffer_copy_strict_narrow(
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
const u64 pm_core_mask,
const struct kbase_hwcnt_curr_config *curr_config,
@@ -297,13 +267,12 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
* dump buffer in src into the dump buffer
* abstraction in dst.
- * @dst: Non-NULL pointer to dst dump buffer.
- * @src: Non-NULL pointer to src raw dump buffer, of same length
- * as returned in out_dump_bytes parameter of
- * kbase_hwcnt_csf_metadata_create.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of dst dump buffer.
* @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- * @accumulate: True if counters in src should be accumulated into dst,
- * rather than copied.
+ * @accumulate: True if counters in src should be accumulated into
+ * destination, rather than copied.
*
* The dst and dst_enable_map MUST have been created from the same metadata as
* returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
@@ -311,15 +280,54 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
*
* Return: 0 on success, else error code.
*/
-int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
bool accumulate);
/**
+ * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
+ * enable map abstraction to
+ * a physical block enable
+ * map.
+ * @lo: Low 64 bits of block enable map abstraction.
+ * @hi: High 64 bits of block enable map abstraction.
+ *
+ * The abstraction uses 128 bits to enable 128 block values, whereas the
+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
+ * Therefore, this conversion is lossy.
+ *
+ * Return: 32-bit physical block enable map.
+ */
+static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
+{
+ u32 phys = 0;
+ u64 dwords[2] = { lo, hi };
+ size_t dword_idx;
+
+ for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+ const u64 dword = dwords[dword_idx];
+ u16 packed = 0;
+
+ size_t hword_bit;
+
+ for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+ const size_t dword_bit = hword_bit * 4;
+ const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) |
+ ((dword >> (dword_bit + 1)) & 0x1) |
+ ((dword >> (dword_bit + 2)) & 0x1) |
+ ((dword >> (dword_bit + 3)) & 0x1);
+ packed |= (mask << hword_bit);
+ }
+ phys |= ((u32)packed) << (16 * dword_idx);
+ }
+ return phys;
+}
+
+/**
* kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
* into a physical enable map.
- * @dst: Non-NULL pointer to dst physical enable map.
- * @src: Non-NULL pointer to src enable map abstraction.
+ * @dst: Non-NULL pointer to destination physical enable map.
+ * @src: Non-NULL pointer to source enable map abstraction.
*
* The src must have been created from a metadata returned from a call to
* kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
@@ -336,8 +344,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
* kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
* SET_SELECT value.
*
- * @dst: Non-NULL pointer to dst physical SET_SELECT value.
- * @src: Non-NULL pointer to src counter set selection.
+ * @dst: Non-NULL pointer to destination physical SET_SELECT value.
+ * @src: Non-NULL pointer to source counter set selection.
*/
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
enum kbase_hwcnt_set src);
@@ -345,8 +353,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
/**
* kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
* an enable map abstraction.
- * @dst: Non-NULL pointer to dst enable map abstraction.
- * @src: Non-NULL pointer to src physical enable map.
+ * @dst: Non-NULL pointer to destination enable map abstraction.
+ * @src: Non-NULL pointer to source physical enable map.
*
* The dst must have been created from a metadata returned from a call to
* kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c
new file mode 100644
index 0000000..e2caa1c
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_gpu_narrow.h"
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+int kbase_hwcnt_gpu_metadata_narrow_create(
+ const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md)
+{
+ struct kbase_hwcnt_description desc;
+ struct kbase_hwcnt_group_description group;
+ struct kbase_hwcnt_block_description
+ blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ size_t prfcnt_values_per_block;
+ size_t blk;
+ int err;
+ struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+
+ if (!dst_md_narrow || !src_md || !src_md->grp_metadata ||
+ !src_md->grp_metadata[0].blk_metadata)
+ return -EINVAL;
+
+ /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
+ * count in the metadata.
+ */
+ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
+ (kbase_hwcnt_metadata_block_count(src_md, 0) !=
+ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
+ return -EINVAL;
+
+ /* Get the values count in the first block. */
+ prfcnt_values_per_block =
+ kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
+
+ /* check all blocks should have same values count. */
+ for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+ size_t val_cnt =
+ kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
+ if (val_cnt != prfcnt_values_per_block)
+ return -EINVAL;
+ }
+
+ /* Only support 64 and 128 entries per block. */
+ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
+ return -EINVAL;
+
+ metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL);
+ if (!metadata_narrow)
+ return -ENOMEM;
+
+ /* Narrow to 64 entries per block to keep API backward compatibility. */
+ prfcnt_values_per_block = 64;
+
+ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+ size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+ src_md, 0, blk);
+ blks[blk] = (struct kbase_hwcnt_block_description){
+ .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
+ .inst_cnt = kbase_hwcnt_metadata_block_instance_count(
+ src_md, 0, blk),
+ .hdr_cnt = blk_hdr_cnt,
+ .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
+ };
+ }
+
+ group = (struct kbase_hwcnt_group_description){
+ .type = kbase_hwcnt_metadata_group_type(src_md, 0),
+ .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT,
+ .blks = blks,
+ };
+
+ desc = (struct kbase_hwcnt_description){
+ .grp_cnt = kbase_hwcnt_metadata_group_count(src_md),
+ .avail_mask = src_md->avail_mask,
+ .clk_cnt = src_md->clk_cnt,
+ .grps = &group,
+ };
+
+ err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata);
+ if (!err) {
+ /* Narrow down the buffer size to half as the narrowed metadata
+ * only supports 32-bit but the created metadata uses 64-bit for
+ * block entry.
+ */
+ metadata_narrow->dump_buf_bytes =
+ metadata_narrow->metadata->dump_buf_bytes >> 1;
+ *dst_md_narrow = metadata_narrow;
+ } else {
+ kfree(metadata_narrow);
+ }
+
+ return err;
+}
+
+void kbase_hwcnt_gpu_metadata_narrow_destroy(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+ if (!md_narrow)
+ return;
+
+ kbase_hwcnt_metadata_destroy(md_narrow->metadata);
+ kfree(md_narrow);
+}
+
+int kbase_hwcnt_dump_buffer_narrow_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
+{
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ u8 *buf;
+
+ if (!md_narrow || !dump_buf)
+ return -EINVAL;
+
+ dump_buf_bytes = md_narrow->dump_buf_bytes;
+ clk_cnt_buf_bytes =
+ sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */
+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){
+ .md_narrow = md_narrow,
+ .dump_buf = (u32 *)buf,
+ .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes),
+ };
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_free(
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
+{
+ if (!dump_buf_narrow)
+ return;
+
+ kfree(dump_buf_narrow->dump_buf);
+ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ 0 };
+}
+
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+ struct kbase_hwcnt_dump_buffer_narrow *buffers;
+ size_t buf_idx;
+ unsigned int order;
+ unsigned long addr;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ size_t total_dump_buf_size;
+
+ if (!md_narrow || !dump_bufs)
+ return -EINVAL;
+
+ dump_buf_bytes = md_narrow->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) *
+ md_narrow->metadata->clk_cnt;
+
+ /* Allocate memory for the dump buffer struct array */
+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+ if (!buffers)
+ return -ENOMEM;
+
+ /* Allocate pages for the actual dump buffers, as they tend to be fairly
+ * large.
+ */
+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
+ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+
+ if (!addr) {
+ kfree(buffers);
+ return -ENOMEM;
+ }
+
+ *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){
+ .page_addr = addr,
+ .page_order = order,
+ .buf_cnt = n,
+ .bufs = buffers,
+ };
+
+ total_dump_buf_size = dump_buf_bytes * n;
+ /* Set the buffer of each dump buf */
+ for (buf_idx = 0; buf_idx < n; buf_idx++) {
+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+ const size_t clk_cnt_buf_offset =
+ total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx);
+
+ buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){
+ .md_narrow = md_narrow,
+ .dump_buf = (u32 *)(addr + dump_buf_offset),
+ .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset),
+ };
+ }
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+ if (!dump_bufs)
+ return;
+
+ kfree(dump_bufs->bufs);
+ free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+ memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
+ const u64 *src_blk,
+ const u64 *blk_em,
+ size_t val_cnt)
+{
+ size_t val;
+
+ for (val = 0; val < val_cnt; val++) {
+ bool val_enabled =
+ kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+ u32 src_val =
+ (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
+
+ dst_blk[val] = val_enabled ? src_val : 0;
+ }
+}
+
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(
+ struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+ size_t grp;
+ size_t clk;
+
+ if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt !=
+ src->metadata->grp_cnt) ||
+ WARN_ON(src->metadata->grp_cnt != 1) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+ src->metadata->grp_metadata[0].blk_cnt) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0]
+ .blk_metadata[0]
+ .ctr_cnt >
+ src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
+ return;
+
+ /* Don't use src metadata since src buffer is bigger than dst buffer. */
+ metadata_narrow = dst_narrow->md_narrow;
+
+ for (grp = 0;
+ grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow);
+ grp++) {
+ size_t blk;
+ size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(
+ metadata_narrow, grp);
+
+ for (blk = 0; blk < blk_cnt; blk++) {
+ size_t blk_inst;
+ size_t blk_inst_cnt =
+ kbase_hwcnt_metadata_narrow_block_instance_count(
+ metadata_narrow, grp, blk);
+
+ for (blk_inst = 0; blk_inst < blk_inst_cnt;
+ blk_inst++) {
+ /* The narrowed down buffer is only 32-bit. */
+ u32 *dst_blk =
+ kbase_hwcnt_dump_buffer_narrow_block_instance(
+ dst_narrow, grp, blk, blk_inst);
+ const u64 *src_blk =
+ kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk,
+ blk_inst);
+ size_t val_cnt =
+ kbase_hwcnt_metadata_narrow_block_values_count(
+ metadata_narrow, grp, blk);
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
+
+ kbase_hwcnt_dump_buffer_block_copy_strict_narrow(
+ dst_blk, src_blk, blk_em, val_cnt);
+ }
+ }
+ }
+
+ for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
+ bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled(
+ dst_enable_map->clk_enable_map, clk);
+
+ dst_narrow->clk_cnt_buf[clk] =
+ clk_enabled ? src->clk_cnt_buf[clk] : 0;
+ }
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.h
new file mode 100644
index 0000000..af6fa19
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_gpu_narrow.h
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_NARROW_H_
+#define _KBASE_HWCNT_GPU_NARROW_H_
+
+#include "mali_kbase_hwcnt_types.h"
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical
+ * layout of narrow dump buffers.
+ * For backward compatibility, the narrow
+ * metadata only supports 64 counters per
+ * block and 32-bit per block entry.
+ * @metadata: Non-NULL pointer to the metadata before narrow down to
+ * 32-bit per block entry, it has 64 counters per block and
+ * 64-bit per value.
+ * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block
+ * entry.
+ */
+struct kbase_hwcnt_metadata_narrow {
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_buf_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer.
+ * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to
+ * describe the layout of the narrow dump buffer.
+ * @dump_buf: Non-NULL pointer to an array of u32 values, the array size
+ * is md_narrow->dump_buf_bytes.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ * for each clock domain.
+ */
+struct kbase_hwcnt_dump_buffer_narrow {
+ const struct kbase_hwcnt_metadata_narrow *md_narrow;
+ u32 *dump_buf;
+ u64 *clk_cnt_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump
+ * buffer array.
+ * @page_addr: Address of first allocated page. A single allocation is used for
+ * all narrow dump buffers in the array.
+ * @page_order: The allocation order of the pages, the order is on a logarithmic
+ * scale.
+ * @buf_cnt: The number of allocated dump buffers.
+ * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors.
+ */
+struct kbase_hwcnt_dump_buffer_narrow_array {
+ unsigned long page_addr;
+ unsigned int page_order;
+ size_t buf_cnt;
+ struct kbase_hwcnt_dump_buffer_narrow *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from
+ * narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ *
+ * Return: Number of hardware counter groups described by narrow metadata.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_group_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+ return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group
+ * from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ *
+ * Return: Type of the group grp.
+ */
+static inline u64 kbase_hwcnt_metadata_narrow_group_type(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
+{
+ return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a
+ * group from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
+{
+ return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of
+ * instances of a block
+ * from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
+ size_t blk)
+{
+ return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata,
+ grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter
+ * headers from narrow
+ * metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of counter headers in each instance of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
+ size_t blk)
+{
+ return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata,
+ grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of
+ * counters from narrow
+ * metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of counters in each instance of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
+ size_t blk)
+{
+ return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata,
+ grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values
+ * from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of headers plus counters in each instance of block blk
+ * in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
+ size_t blk)
+{
+ return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp,
+ blk) +
+ kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp,
+ blk);
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a
+ * narrowed block instance's
+ * dump buffer.
+ * @buf: Non-NULL pointer to narrow dump buffer.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u32* to the dump buffer for the block instance.
+ */
+static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
+ const struct kbase_hwcnt_dump_buffer_narrow *buf, size_t grp,
+ size_t blk, size_t blk_inst)
+{
+ return buf->dump_buf +
+ buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
+ buf->md_narrow->metadata->grp_metadata[grp]
+ .blk_metadata[blk]
+ .dump_buf_index +
+ (buf->md_narrow->metadata->grp_metadata[grp]
+ .blk_metadata[blk]
+ .dump_buf_stride *
+ blk_inst);
+}
+
+/**
+ * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC
+ * entries per block truncated to
+ * 64 entries and block entry size
+ * narrowed down to 32-bit.
+ *
+ * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored
+ * on success.
+ * @src_md: Non-NULL pointer to the HWC metadata used as the source to
+ * create dst_md_narrow.
+ *
+ * For backward compatibility of the interface to user clients, a new metadata
+ * with entries per block truncated to 64 and block entry size narrowed down
+ * to 32-bit will be created for dst_md_narrow.
+ * The total entries per block in src_md must be 64 or 128, if it's other
+ * values, function returns error since it's not supported.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_metadata_narrow_create(
+ const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md);
+
+/**
+ * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
+ * metadata object.
+ * @md_narrow: Pointer to hardware counter narrow metadata.
+ */
+void kbase_hwcnt_gpu_metadata_narrow_destroy(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be
+ * initialised to undefined values, so must be used as a copy
+ * destination, or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_narrow_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed narrow dump buffer structure, or on an
+ * already freed narrow dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_narrow_free(
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
+ * dump buffers.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @n: Number of narrow dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array
+ * object to be initialised.
+ *
+ * A single zeroed contiguous page allocation will be used for all of the
+ * buffers inside the object, where:
+ * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer
+ * array.
+ * @dump_bufs: Narrow Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed narrow dump buffer array structure, or
+ * on an already freed narrow dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block
+ * values from source to
+ * destination.
+ * @dst_blk: Non-NULL pointer to destination block obtained from a call to
+ * kbase_hwcnt_dump_buffer_narrow_block_instance.
+ * @src_blk: Non-NULL pointer to source block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in destination will be zero, the enabled
+ * values in destination will be saturated at U32_MAX if the corresponding
+ * source value is bigger than U32_MAX, or copy the value from source if the
+ * corresponding source value is less than or equal to U32_MAX.
+ */
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
+ const u64 *src_blk,
+ const u64 *blk_em,
+ size_t val_cnt);
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
+ * narrow dump buffer.
+ * @dst_narrow: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * After the operation, all non-enabled values (including padding bytes) will be
+ * zero. Slower than the non-strict variant.
+ *
+ * The enabled values in dst_narrow will be saturated at U32_MAX if the
+ * corresponding source value is bigger than U32_MAX, or copy the value from
+ * source if the corresponding source value is less than or equal to U32_MAX.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(
+ struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c
index 0687253..5ca4c51 100644
--- a/mali_kbase/mali_kbase_hwcnt_legacy.c
+++ b/mali_kbase/mali_kbase_hwcnt_legacy.c
@@ -23,6 +23,7 @@
#include "mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_hwcnt_types.h"
#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_gpu_narrow.h"
#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include <linux/slab.h>
@@ -32,14 +33,22 @@
* struct kbase_hwcnt_legacy_client - Legacy hardware counter client.
* @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned.
* @enable_map: Counter enable map.
- * @dump_buf: Dump buffer used to manipulate dumps before copied to user.
+ * @dump_buf: Dump buffer used to manipulate dumps from virtualizer.
* @hvcli: Hardware counter virtualizer client.
+ * @dump_buf_user: Narrow dump buffer used to manipulate dumps before they are
+ * copied to user.
+ * @metadata_user: For compatibility with the user driver interface, this
+ * contains a narrowed version of the hardware counter metadata
+ * which is limited to 64 entries per block and 32-bit for each
+ * entry.
*/
struct kbase_hwcnt_legacy_client {
void __user *user_dump_buf;
struct kbase_hwcnt_enable_map enable_map;
struct kbase_hwcnt_dump_buffer dump_buf;
struct kbase_hwcnt_virtualizer_client *hvcli;
+ struct kbase_hwcnt_dump_buffer_narrow dump_buf_user;
+ const struct kbase_hwcnt_metadata_narrow *metadata_user;
};
int kbase_hwcnt_legacy_client_create(
@@ -61,6 +70,16 @@ int kbase_hwcnt_legacy_client_create(
if (!hlcli)
return -ENOMEM;
+ errcode = kbase_hwcnt_gpu_metadata_narrow_create(&hlcli->metadata_user,
+ metadata);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_dump_buffer_narrow_alloc(hlcli->metadata_user,
+ &hlcli->dump_buf_user);
+ if (errcode)
+ goto error;
+
hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer;
errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map);
@@ -99,6 +118,8 @@ void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli)
kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli);
kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf);
kbase_hwcnt_enable_map_free(&hlcli->enable_map);
+ kbase_hwcnt_dump_buffer_narrow_free(&hlcli->dump_buf_user);
+ kbase_hwcnt_gpu_metadata_narrow_destroy(hlcli->metadata_user);
kfree(hlcli);
}
@@ -123,13 +144,20 @@ int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli)
kbase_hwcnt_gpu_patch_dump_headers(
&hlcli->dump_buf, &hlcli->enable_map);
- /* Zero all non-enabled counters (current values are undefined) */
- kbase_hwcnt_dump_buffer_zero_non_enabled(
- &hlcli->dump_buf, &hlcli->enable_map);
+ /* Copy the dump buffer to the userspace visible buffer. The strict
+ * variant will explicitly zero any non-enabled counters to ensure
+ * nothing except exactly what the user asked for is made visible.
+ *
+ * A narrow copy is required since virtualizer has a bigger buffer
+ * but user only needs part of it.
+ */
+ kbase_hwcnt_dump_buffer_copy_strict_narrow(
+ &hlcli->dump_buf_user, &hlcli->dump_buf, &hlcli->enable_map);
/* Copy into the user's buffer */
- errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf,
- hlcli->dump_buf.metadata->dump_buf_bytes);
+ errcode = copy_to_user(hlcli->user_dump_buf,
+ hlcli->dump_buf_user.dump_buf,
+ hlcli->dump_buf_user.md_narrow->dump_buf_bytes);
/* Non-zero errcode implies user buf was invalid or too small */
if (errcode)
return -EFAULT;
diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/mali_kbase_hwcnt_types.c
index 492f572..d925ed7 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.c
+++ b/mali_kbase/mali_kbase_hwcnt_types.c
@@ -32,7 +32,7 @@ int kbase_hwcnt_metadata_create(
struct kbase_hwcnt_group_metadata *grp_mds;
size_t grp;
size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
- size_t dump_buf_count; /* Number of u32 values (inc padding) */
+ size_t dump_buf_count; /* Number of u64 values (inc padding) */
size_t avail_mask_bits; /* Number of availability mask bits */
size_t size;
@@ -220,7 +220,7 @@ int kbase_hwcnt_dump_buffer_alloc(
return -ENOMEM;
dump_buf->metadata = metadata;
- dump_buf->dump_buf = (u32 *)buf;
+ dump_buf->dump_buf = (u64 *)buf;
dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes);
return 0;
@@ -282,7 +282,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
(dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx);
buffers[buf_idx].metadata = metadata;
- buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset);
+ buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
buffers[buf_idx].clk_cnt_buf =
(u64 *)(addr + clk_cnt_buf_offset);
}
@@ -316,7 +316,7 @@ void kbase_hwcnt_dump_buffer_zero(
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk;
+ u64 *dst_blk;
size_t val_cnt;
if (!kbase_hwcnt_enable_map_block_enabled(
@@ -362,7 +362,7 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
@@ -406,8 +406,8 @@ void kbase_hwcnt_dump_buffer_copy(
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk;
- const u32 *src_blk;
+ u64 *dst_blk;
+ const u64 *src_blk;
size_t val_cnt;
if (!kbase_hwcnt_enable_map_block_enabled(
@@ -451,9 +451,9 @@ void kbase_hwcnt_dump_buffer_copy_strict(
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
- const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
@@ -497,8 +497,8 @@ void kbase_hwcnt_dump_buffer_accumulate(
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk;
- const u32 *src_blk;
+ u64 *dst_blk;
+ const u64 *src_blk;
size_t hdr_cnt;
size_t ctr_cnt;
@@ -546,9 +546,9 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
- u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
- const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h
index 6b7985b..f04c0ec 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.h
+++ b/mali_kbase/mali_kbase_hwcnt_types.h
@@ -61,7 +61,7 @@
* An array of u64 bitfields, where each bit either enables exactly one
* block value, or is unused (padding).
* Dump Buffer:
- * An array of u32 values, where each u32 corresponds either to one block
+ * An array of u64 values, where each u64 corresponds either to one block
* value, or is unused (padding).
* Availability Mask:
* A bitfield, where each bit corresponds to whether a block instance is
@@ -81,6 +81,7 @@
#define _KBASE_HWCNT_TYPES_H_
#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -91,8 +92,11 @@
/* Number of bits in each bitfield */
#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE)
-/* Number of bytes for each counter value */
-#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32))
+/* Number of bytes for each counter value.
+ * Use 64-bit per counter in driver to avoid HW 32-bit register values
+ * overflow after a long time accumulation.
+ */
+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64))
/* Number of bits in an availability mask (i.e. max total number of block
* instances supported in a Hardware Counter System)
@@ -119,8 +123,8 @@
* contiguous, Hardware Counter Blocks.
* @type: The arbitrary identifier used to identify the type of the block.
* @inst_cnt: The number of Instances of the block.
- * @hdr_cnt: The number of 32-bit Block Headers in the block.
- * @ctr_cnt: The number of 32-bit Block Counters in the block.
+ * @hdr_cnt: The number of 64-bit Block Headers in the block.
+ * @ctr_cnt: The number of 64-bit Block Counters in the block.
*/
struct kbase_hwcnt_block_description {
u64 type;
@@ -165,17 +169,17 @@ struct kbase_hwcnt_description {
* @type: The arbitrary identifier used to identify the type of the
* block.
* @inst_cnt: The number of Instances of the block.
- * @hdr_cnt: The number of 32-bit Block Headers in the block.
- * @ctr_cnt: The number of 32-bit Block Counters in the block.
+ * @hdr_cnt: The number of 64-bit Block Headers in the block.
+ * @ctr_cnt: The number of 64-bit Block Counters in the block.
* @enable_map_index: Index in u64s into the parent's Enable Map where the
* Enable Map bitfields of the Block Instances described by
* this metadata start.
* @enable_map_stride: Stride in u64s between the Enable Maps of each of the
* Block Instances described by this metadata.
- * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the
+ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the
* Dump Buffers of the Block Instances described by this
* metadata start.
- * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the
+ * @dump_buf_stride: Stride in u64s between the Dump Buffers of each of the
* Block Instances described by this metadata.
* @avail_mask_index: Index in bits into the parent's Availability Mask where
* the Availability Masks of the Block Instances described
@@ -208,7 +212,7 @@ struct kbase_hwcnt_block_metadata {
* @enable_map_index: Index in u64s into the parent's Enable Map where the
* Enable Maps of the blocks within the group described by
* this metadata start.
- * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the
+ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the
* Dump Buffers of the blocks within the group described by
* metadata start.
* @avail_mask_index: Index in bits into the parent's Availability Mask where
@@ -225,7 +229,7 @@ struct kbase_hwcnt_group_metadata {
};
/**
- * struct kbase_hwcnt_metadata - Metadata describing the physical layout
+ * struct kbase_hwcnt_metadata - Metadata describing the memory layout
* of Dump Buffers and Enable Maps within a
* Hardware Counter System.
* @grp_cnt: The number of Hardware Counter Groups.
@@ -264,18 +268,17 @@ struct kbase_hwcnt_enable_map {
};
/**
- * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32
- * values.
+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer.
* @metadata: Non-NULL pointer to metadata used to identify, and to describe
* the layout of the Dump Buffer.
- * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array
- * of u32 values.
+ * @dump_buf: Non-NULL pointer to an array of u64 values, the array size is
+ * metadata->dump_buf_bytes.
* @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
* for each clock domain.
*/
struct kbase_hwcnt_dump_buffer {
const struct kbase_hwcnt_metadata *metadata;
- u32 *dump_buf;
+ u64 *dump_buf;
u64 *clk_cnt_buf;
};
@@ -283,7 +286,8 @@ struct kbase_hwcnt_dump_buffer {
* struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
* @page_addr: Address of allocated pages. A single allocation is used for all
* Dump Buffers in the array.
- * @page_order: The allocation order of the pages.
+ * @page_order: The allocation order of the pages, the order is on a logarithmic
+ * scale.
* @buf_cnt: The number of allocated Dump Buffers.
* @bufs: Non-NULL pointer to the array of Dump Buffers.
*/
@@ -319,8 +323,14 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Number of hardware counter groups described by metadata.
*/
-#define kbase_hwcnt_metadata_group_count(metadata) \
- ((metadata)->grp_cnt)
+static inline size_t
+kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
+{
+ if (WARN_ON(!metadata))
+ return 0;
+
+ return metadata->grp_cnt;
+}
/**
* kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
@@ -329,8 +339,15 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Type of the group grp.
*/
-#define kbase_hwcnt_metadata_group_type(metadata, grp) \
- ((metadata)->grp_metadata[(grp)].type)
+static inline u64
+kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].type;
+}
/**
* kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
@@ -339,8 +356,15 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Number of blocks in group grp.
*/
-#define kbase_hwcnt_metadata_block_count(metadata, grp) \
- ((metadata)->grp_metadata[(grp)].blk_cnt)
+static inline size_t
+kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_cnt;
+}
/**
* kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
@@ -350,8 +374,16 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Type of the block blk in group grp.
*/
-#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \
- ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type)
+static inline u64
+kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].type;
+}
/**
* kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
@@ -362,8 +394,15 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Number of instances of block blk in group grp.
*/
-#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \
- ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt)
+static inline size_t kbase_hwcnt_metadata_block_instance_count(
+ const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt;
+}
/**
* kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
@@ -374,8 +413,15 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Number of counter headers in each instance of block blk in group grp.
*/
-#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \
- ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt)
+static inline size_t kbase_hwcnt_metadata_block_headers_count(
+ const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt;
+}
/**
* kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
@@ -385,8 +431,15 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: Number of counters in each instance of block blk in group grp.
*/
-#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \
- ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt)
+static inline size_t kbase_hwcnt_metadata_block_counters_count(
+ const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt;
+}
/**
* kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride.
@@ -396,8 +449,15 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
*
* Return: enable map stride in each instance of block blk in group grp.
*/
-#define kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk) \
- ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride)
+static inline size_t kbase_hwcnt_metadata_block_enable_map_stride(
+ const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride;
+}
/**
* kbase_hwcnt_metadata_block_values_count() - Get the number of values.
@@ -408,9 +468,16 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
* Return: Number of headers plus counters in each instance of block blk
* in group grp.
*/
-#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \
- (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \
- + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk)))
+static inline size_t kbase_hwcnt_metadata_block_values_count(
+ const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) +
+ kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+}
/**
* kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
@@ -496,19 +563,28 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
/**
* kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
* instance's enable map.
- * @map: Non-NULL pointer to (const) enable map.
+ * @map: Non-NULL pointer to enable map.
* @grp: Index of the group in the metadata.
* @blk: Index of the block in the group.
* @blk_inst: Index of the block instance in the block.
*
- * Return: (const) u64* to the bitfield(s) used as the enable map for the
+ * Return: u64* to the bitfield(s) used as the enable map for the
* block instance.
*/
-#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \
- ((map)->hwcnt_enable_map + \
- (map)->metadata->grp_metadata[(grp)].enable_map_index + \
- (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \
- (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst))
+static inline u64 *
+kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
+ size_t grp, size_t blk, size_t blk_inst)
+{
+ return map->hwcnt_enable_map +
+ map->metadata->grp_metadata[grp].enable_map_index +
+ map->metadata->grp_metadata[grp]
+ .blk_metadata[blk]
+ .enable_map_index +
+ (map->metadata->grp_metadata[grp]
+ .blk_metadata[blk]
+ .enable_map_stride *
+ blk_inst);
+}
/**
* kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
@@ -827,18 +903,24 @@ void kbase_hwcnt_dump_buffer_array_free(
/**
* kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
* instance's dump buffer.
- * @buf: Non-NULL pointer to (const) dump buffer.
+ * @buf: Non-NULL pointer to dump buffer.
* @grp: Index of the group in the metadata.
* @blk: Index of the block in the group.
* @blk_inst: Index of the block instance in the block.
*
- * Return: (const) u32* to the dump buffer for the block instance.
+ * Return: u64* to the dump buffer for the block instance.
*/
-#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \
- ((buf)->dump_buf + \
- (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \
- (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \
- (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst))
+static inline u64 *kbase_hwcnt_dump_buffer_block_instance(
+ const struct kbase_hwcnt_dump_buffer *buf, size_t grp, size_t blk,
+ size_t blk_inst)
+{
+ return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index +
+ buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
+ (buf->metadata->grp_metadata[grp]
+ .blk_metadata[blk]
+ .dump_buf_stride *
+ blk_inst);
+}
/**
* kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst.
@@ -859,9 +941,8 @@ void kbase_hwcnt_dump_buffer_zero(
* kbase_hwcnt_dump_buffer_block_instance.
* @val_cnt: Number of values in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_zero(
- u32 *dst_blk,
- size_t val_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk,
+ size_t val_cnt)
{
memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
}
@@ -904,10 +985,9 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
* kbase_hwcnt_enable_map_block_instance.
* @val_cnt: Number of values in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(
- u32 *dst_blk,
- const u64 *blk_em,
- size_t val_cnt)
+static inline void
+kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
+ size_t val_cnt)
{
size_t val;
@@ -941,10 +1021,9 @@ void kbase_hwcnt_dump_buffer_copy(
* kbase_hwcnt_dump_buffer_block_instance.
* @val_cnt: Number of values in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_copy(
- u32 *dst_blk,
- const u32 *src_blk,
- size_t val_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk,
+ const u64 *src_blk,
+ size_t val_cnt)
{
/* Copy all the counters in the block instance.
* Values of non-enabled counters are undefined.
@@ -987,11 +1066,10 @@ void kbase_hwcnt_dump_buffer_copy_strict(
*
* After the copy, any disabled values in dst will be zero.
*/
-static inline void kbase_hwcnt_dump_buffer_block_copy_strict(
- u32 *dst_blk,
- const u32 *src_blk,
- const u64 *blk_em,
- size_t val_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk,
+ const u64 *src_blk,
+ const u64 *blk_em,
+ size_t val_cnt)
{
size_t val;
@@ -1032,11 +1110,10 @@ void kbase_hwcnt_dump_buffer_accumulate(
* @hdr_cnt: Number of headers in the block.
* @ctr_cnt: Number of counters in the block.
*/
-static inline void kbase_hwcnt_dump_buffer_block_accumulate(
- u32 *dst_blk,
- const u32 *src_blk,
- size_t hdr_cnt,
- size_t ctr_cnt)
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk,
+ const u64 *src_blk,
+ size_t hdr_cnt,
+ size_t ctr_cnt)
{
size_t ctr;
/* Copy all the headers in the block instance.
@@ -1047,21 +1124,8 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate(
/* Accumulate all the counters in the block instance.
* Values of non-enabled counters are undefined.
*/
- for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
- u32 *dst_ctr = dst_blk + ctr;
- const u32 *src_ctr = src_blk + ctr;
-
- const u32 src_counter = *src_ctr;
- const u32 dst_counter = *dst_ctr;
-
- /* Saturating add */
- u32 accumulated = src_counter + dst_counter;
-
- if (accumulated < src_counter)
- accumulated = U32_MAX;
-
- *dst_ctr = accumulated;
- }
+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++)
+ dst_blk[ctr] += src_blk[ctr];
}
/**
@@ -1103,10 +1167,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
* @ctr_cnt: Number of counters in the block.
*/
static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
- u32 *dst_blk,
- const u32 *src_blk,
- const u64 *blk_em,
- size_t hdr_cnt,
+ u64 *dst_blk, const u64 *src_blk, const u64 *blk_em, size_t hdr_cnt,
size_t ctr_cnt)
{
size_t ctr;
@@ -1118,25 +1179,16 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(
blk_em, ctr);
- u32 *dst_ctr = dst_blk + ctr;
- const u32 *src_ctr = src_blk + ctr;
-
- const u32 src_counter = *src_ctr;
- const u32 dst_counter = *dst_ctr;
-
- /* Saturating add */
- u32 accumulated = src_counter + dst_counter;
-
- if (accumulated < src_counter)
- accumulated = U32_MAX;
-
- *dst_ctr = ctr_enabled ? accumulated : 0;
+ if (ctr_enabled)
+ dst_blk[ctr] += src_blk[ctr];
+ else
+ dst_blk[ctr] = 0;
}
}
-/*
- * Iterate over each clock domain in the metadata.
- *
+/**
+ * kbase_hwcnt_metadata_for_each_clock() - Iterate over each clock domain in the
+ * metadata.
* @md: Non-NULL pointer to metadata.
* @clk: size_t variable used as clock iterator.
*/
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 2b071dd..c892455 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -76,6 +76,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
kbase_kinstr_jm_atom_complete(katom);
dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n",
(void *)katom);
+ KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE(katom->kctx->kbdev, katom);
}
/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
@@ -139,7 +140,13 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
/* katom dep complete, attempt to run it */
bool resched = false;
+ KBASE_TLSTREAM_TL_RUN_ATOM_START(
+ katom->kctx->kbdev, katom,
+ kbase_jd_atom_id(katom->kctx, katom));
resched = jd_run_atom(katom);
+ KBASE_TLSTREAM_TL_RUN_ATOM_END(katom->kctx->kbdev, katom,
+ kbase_jd_atom_id(katom->kctx,
+ katom));
if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
/* The atom has already finished */
@@ -715,6 +722,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
bool need_to_try_schedule_context = false;
int i;
+ KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(kctx->kbdev, katom);
+
INIT_LIST_HEAD(&completed_jobs);
INIT_LIST_HEAD(&runnable_jobs);
@@ -736,6 +745,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
}
jd_mark_atom_complete(katom);
+
list_add_tail(&katom->jd_item, &completed_jobs);
while (!list_empty(&completed_jobs)) {
@@ -767,7 +777,13 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
!kbase_ctx_flag(kctx, KCTX_DYING)) {
+ KBASE_TLSTREAM_TL_RUN_ATOM_START(
+ kctx->kbdev, node,
+ kbase_jd_atom_id(kctx, node));
need_to_try_schedule_context |= jd_run_atom(node);
+ KBASE_TLSTREAM_TL_RUN_ATOM_END(
+ kctx->kbdev, node,
+ kbase_jd_atom_id(kctx, node));
} else {
node->event_code = katom->event_code;
@@ -811,7 +827,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
*/
wake_up(&kctx->jctx.zero_jobs_wait);
}
-
+ KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END(kctx->kbdev, katom);
return need_to_try_schedule_context;
}
@@ -984,7 +1000,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
* dependencies.
*/
jd_trace_atom_submit(kctx, katom, NULL);
-
return jd_done_nolock(katom, NULL);
}
}
@@ -1049,7 +1064,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (err >= 0)
kbase_finish_soft_job(katom);
}
-
return jd_done_nolock(katom, NULL);
}
@@ -1378,10 +1392,10 @@ while (false)
}
mutex_lock(&jctx->lock);
}
-
+ KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(kbdev, katom);
need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom,
&user_jc_incr, katom);
-
+ KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(kbdev, katom);
/* Register a completed job as a disjoint event when the GPU is in a disjoint state
* (ie. being reset).
*/
@@ -1479,7 +1493,6 @@ void kbase_jd_done_worker(struct work_struct *data)
kbasep_js_remove_job(kbdev, kctx, katom);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
- katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
jd_done_nolock(katom, &kctx->completed_jobs);
@@ -1498,22 +1511,23 @@ void kbase_jd_done_worker(struct work_struct *data)
* drop our reference. But do not call kbase_jm_idle_ctx(), as
* the context is active and fast-starting is allowed.
*
- * If an atom has been fast-started then kctx->atoms_pulled will
- * be non-zero but KCTX_ACTIVE will still be false (as the
- * previous pm reference has been inherited). Do NOT drop our
- * reference, as it has been re-used, and leave the context as
- * active.
+ * If an atom has been fast-started then
+ * kbase_jsctx_atoms_pulled(kctx) will return non-zero but
+ * KCTX_ACTIVE will still be false (as the previous pm
+ * reference has been inherited). Do NOT drop our reference, as
+ * it has been re-used, and leave the context as active.
*
- * If no new atoms have been started then KCTX_ACTIVE will still
- * be false and atoms_pulled will be zero, so drop the reference
- * and call kbase_jm_idle_ctx().
+ * If no new atoms have been started then KCTX_ACTIVE will
+ * still be false and kbase_jsctx_atoms_pulled(kctx) will
+ * return zero, so drop the reference and call
+ * kbase_jm_idle_ctx().
*
* As the checks are done under both the queue_mutex and
* hwaccess_lock is should be impossible for this to race
* with the scheduler code.
*/
if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
- !atomic_read(&kctx->atoms_pulled)) {
+ !kbase_jsctx_atoms_pulled(kctx)) {
/* Calling kbase_jm_idle_ctx() here will ensure that
* atoms are not fast-started when we drop the
* hwaccess_lock. This is not performed if
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index 6995050..898606b 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -132,6 +132,9 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n",
(void *)katom, katom->event_code);
+ KBASE_KTRACE_ADD_JM(kbdev, JM_RETURN_ATOM_TO_JS, katom->kctx, katom,
+ katom->jc, katom->event_code);
+
if (katom->event_code != BASE_JD_EVENT_STOPPED &&
katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
return kbase_js_complete_atom(katom, NULL);
diff --git a/mali_kbase/mali_kbase_jm.h b/mali_kbase/mali_kbase_jm.h
index c6b28f3..eeafcb6 100644
--- a/mali_kbase/mali_kbase_jm.h
+++ b/mali_kbase/mali_kbase_jm.h
@@ -84,7 +84,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev);
* by kbase_js_use_ctx().
*
* The context should have no atoms currently pulled from it
- * (kctx->atoms_pulled == 0).
+ * (kbase_jsctx_atoms_pulled(kctx) == 0).
*
* Caller must hold the hwaccess_lock
*/
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 3682486..799c7e5 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -372,8 +372,6 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
}
-#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
-
static void
jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
@@ -393,7 +391,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
struct kbase_jd_atom, runnable_tree_node);
parent = *new;
- if (LESS_THAN_WRAP(katom->age, entry->age))
+ if (kbase_jd_atom_is_younger(katom, entry))
new = &((*new)->rb_left);
else
new = &((*new)->rb_right);
@@ -421,6 +419,9 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ KBASE_KTRACE_ADD_JM(kctx->kbdev, JS_UNPULL_JOB, kctx, katom, katom->jc,
+ 0u);
+
jsctx_tree_add(kctx, katom);
}
@@ -434,6 +435,67 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js);
+typedef bool(katom_ordering_func)(const struct kbase_jd_atom *,
+ const struct kbase_jd_atom *);
+
+bool kbase_js_atom_runs_before(struct kbase_device *kbdev,
+ const struct kbase_jd_atom *katom_a,
+ const struct kbase_jd_atom *katom_b,
+ const kbase_atom_ordering_flag_t order_flags)
+{
+ struct kbase_context *kctx_a = katom_a->kctx;
+ struct kbase_context *kctx_b = katom_b->kctx;
+ katom_ordering_func *samectxatomprio_ordering_func =
+ kbase_jd_atom_is_younger;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (order_flags & KBASE_ATOM_ORDERING_FLAG_SEQNR)
+ samectxatomprio_ordering_func = kbase_jd_atom_is_earlier;
+
+ /* It only makes sense to make this test for atoms on the same slot */
+ WARN_ON(katom_a->slot_nr != katom_b->slot_nr);
+
+ if (kbdev->js_ctx_scheduling_mode ==
+ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) {
+ /* In local priority mode, querying either way around for "a
+ * should run before b" and "b should run before a" should
+ * always be false when they're from different contexts
+ */
+ if (kctx_a != kctx_b)
+ return false;
+ } else {
+ /* In system priority mode, ordering is done first strictly by
+ * context priority, even when katom_b might be lower priority
+ * than katom_a. This is due to scheduling of contexts in order
+ * of highest priority first, regardless of whether the atoms
+ * for a particular slot from such contexts have the highest
+ * priority or not.
+ */
+ if (kctx_a != kctx_b) {
+ if (kctx_a->priority < kctx_b->priority)
+ return true;
+ if (kctx_a->priority > kctx_b->priority)
+ return false;
+ }
+ }
+
+ /* For same contexts/contexts with the same context priority (in system
+ * priority mode), ordering is next done by atom priority
+ */
+ if (katom_a->sched_priority < katom_b->sched_priority)
+ return true;
+ if (katom_a->sched_priority > katom_b->sched_priority)
+ return false;
+ /* For atoms of same priority on the same kctx, they are
+ * ordered by seq_nr/age (dependent on caller)
+ */
+ if (kctx_a == kctx_b && samectxatomprio_ordering_func(katom_a, katom_b))
+ return true;
+
+ return false;
+}
+
/*
* Functions private to KBase ('Protected' functions)
*/
@@ -475,6 +537,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+
jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
@@ -662,6 +725,147 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
}
}
+/*
+ * Priority blocking management functions
+ */
+
+/* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */
+static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
+ int js, int sched_prio)
+{
+ struct kbase_jsctx_slot_tracking *slot_tracking =
+ &kctx->slot_tracking[js];
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ slot_tracking->blocked &= ~(((kbase_js_prio_bitmap_t)1) << sched_prio);
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_UNBLOCKED, kctx,
+ NULL, 0, js, (unsigned int)sched_prio);
+}
+
+static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
+{
+ return atomic_read(&kctx->slot_tracking[js].atoms_pulled);
+}
+
+/*
+ * A priority level on a slot is blocked when:
+ * - that priority level is blocked
+ * - or, any higher priority level is blocked
+ */
+static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
+ int sched_prio)
+{
+ struct kbase_jsctx_slot_tracking *slot_tracking =
+ &kctx->slot_tracking[js];
+ kbase_js_prio_bitmap_t prio_bit, higher_prios_mask;
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ /* done in two separate shifts to prevent future undefined behavior
+ * should the number of priority levels == (bit width of the type)
+ */
+ prio_bit = (((kbase_js_prio_bitmap_t)1) << sched_prio);
+ /* all bits of sched_prio or higher, with sched_prio = 0 being the
+ * highest priority
+ */
+ higher_prios_mask = (prio_bit << 1) - 1u;
+ return (slot_tracking->blocked & higher_prios_mask) != 0u;
+}
+
+/**
+ * kbase_jsctx_slot_atom_pulled_inc - Increase counts of atoms that have being
+ * pulled for a slot from a ctx, based on
+ * this atom
+ * @kctx: kbase context
+ * @katom: atom pulled
+ *
+ * Manages counts of atoms pulled (including per-priority-level counts), for
+ * later determining when a ctx can become unblocked on a slot.
+ *
+ * Once a slot has been blocked at @katom's priority level, it should not be
+ * pulled from, hence this function should not be called in that case.
+ *
+ * The return value is to aid tracking of when @kctx becomes runnable.
+ *
+ * Return: new total count of atoms pulled from all slots on @kctx
+ */
+static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
+ const struct kbase_jd_atom *katom)
+{
+ int js = katom->slot_nr;
+ int sched_prio = katom->sched_priority;
+ struct kbase_jsctx_slot_tracking *slot_tracking =
+ &kctx->slot_tracking[js];
+ int nr_atoms_pulled;
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio),
+ "Should not have pulled atoms for slot %d from a context that is blocked at priority %d or higher",
+ js, sched_prio);
+
+ nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots);
+ atomic_inc(&slot_tracking->atoms_pulled);
+ slot_tracking->atoms_pulled_pri[sched_prio]++;
+
+ return nr_atoms_pulled;
+}
+
+/**
+ * kbase_jsctx_slot_atom_pulled_dec- Decrease counts of atoms that have being
+ * pulled for a slot from a ctx, and
+ * re-evaluate whether a context is blocked
+ * on this slot
+ * @kctx: kbase context
+ * @katom: atom that has just been removed from a job slot
+ *
+ * @kctx can become unblocked on a slot for a priority level when it no longer
+ * has any pulled atoms at that priority level on that slot, and all higher
+ * (numerically lower) priority levels are also unblocked @kctx on that
+ * slot. The latter condition is to retain priority ordering within @kctx.
+ *
+ * Return: true if the slot was previously blocked but has now become unblocked
+ * at @katom's priority level, false otherwise.
+ */
+static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
+ const struct kbase_jd_atom *katom)
+{
+ int js = katom->slot_nr;
+ int sched_prio = katom->sched_priority;
+ int atoms_pulled_pri;
+ struct kbase_jsctx_slot_tracking *slot_tracking =
+ &kctx->slot_tracking[js];
+ bool slot_prio_became_unblocked = false;
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ atomic_dec(&kctx->atoms_pulled_all_slots);
+ atomic_dec(&slot_tracking->atoms_pulled);
+
+ atoms_pulled_pri = --(slot_tracking->atoms_pulled_pri[sched_prio]);
+
+ /* We can safely clear this priority level's blocked status even if
+ * higher priority levels are still blocked: a subsequent query to
+ * kbase_jsctx_slot_prio_is_blocked() will still return true
+ */
+ if (!atoms_pulled_pri &&
+ kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) {
+ kbase_jsctx_slot_prio_blocked_clear(kctx, js, sched_prio);
+
+ if (!kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio))
+ slot_prio_became_unblocked = true;
+ }
+
+ if (slot_prio_became_unblocked)
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev,
+ JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED,
+ kctx, katom, katom->jc, js,
+ (unsigned int)sched_prio);
+
+ return slot_prio_became_unblocked;
+}
+
/**
* kbase_js_ctx_list_add_pullable_nolock - Variant of
* kbase_jd_ctx_list_add_pullable()
@@ -694,7 +898,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
if (!kctx->slots_pullable) {
kbdev->js_data.nr_contexts_pullable++;
ret = true;
- if (!atomic_read(&kctx->atoms_pulled)) {
+ if (!kbase_jsctx_atoms_pulled(kctx)) {
WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
atomic_inc(&kbdev->js_data.nr_contexts_runnable);
@@ -736,7 +940,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
if (!kctx->slots_pullable) {
kbdev->js_data.nr_contexts_pullable++;
ret = true;
- if (!atomic_read(&kctx->atoms_pulled)) {
+ if (!kbase_jsctx_atoms_pulled(kctx)) {
WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
atomic_inc(&kbdev->js_data.nr_contexts_runnable);
@@ -809,7 +1013,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
if (kctx->slots_pullable == (1 << js)) {
kbdev->js_data.nr_contexts_pullable--;
ret = true;
- if (!atomic_read(&kctx->atoms_pulled)) {
+ if (!kbase_jsctx_atoms_pulled(kctx)) {
WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
@@ -851,7 +1055,7 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
if (kctx->slots_pullable == (1 << js)) {
kbdev->js_data.nr_contexts_pullable--;
ret = true;
- if (!atomic_read(&kctx->atoms_pulled)) {
+ if (!kbase_jsctx_atoms_pulled(kctx)) {
WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
@@ -958,9 +1162,12 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
(void *)kctx, js);
return false; /* No pullable atoms */
}
- if (kctx->blocked_js[js][katom->sched_priority]) {
+ if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(
+ kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom,
+ katom->jc, js, (unsigned int)katom->sched_priority);
dev_dbg(kbdev->dev,
- "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
(void *)kctx, katom->sched_priority, js);
return false;
}
@@ -2493,9 +2700,9 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
(void *)kctx, js);
return NULL;
}
- if (kctx->blocked_js[js][katom->sched_priority]) {
+ if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
dev_dbg(kbdev->dev,
- "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n",
+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
(void *)kctx, katom->sched_priority, js);
return NULL;
}
@@ -2509,7 +2716,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
* not allow multiple runs of fail-dep atoms from the same context to be
* present on the same slot
*/
- if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+ if (katom->pre_dep && kbase_jsctx_slot_atoms_pulled(kctx, js)) {
struct kbase_jd_atom *prev_atom =
kbase_backend_inspect_tail(kbdev, js);
@@ -2535,23 +2742,21 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
}
}
+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JS_PULL_JOB, kctx, katom,
+ katom->jc, js, katom->sched_priority);
kbase_ctx_flag_set(kctx, KCTX_PULLED);
kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js));
- pulled = atomic_inc_return(&kctx->atoms_pulled);
+ pulled = kbase_jsctx_slot_atom_pulled_inc(kctx, katom);
if (pulled == 1 && !kctx->slots_pullable) {
WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
atomic_inc(&kbdev->js_data.nr_contexts_runnable);
}
- atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
- kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
jsctx_rb_pull(kctx, katom);
kbase_ctx_sched_retain_ctx_refcount(kctx);
- katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
-
katom->ticks = 0;
dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n",
@@ -2773,15 +2978,18 @@ static void js_return_worker(struct work_struct *data)
struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
struct kbasep_js_atom_retained_state retained_state;
int js = katom->slot_nr;
- int prio = katom->sched_priority;
+ bool slot_became_unblocked;
bool timer_sync = false;
bool context_idle = false;
unsigned long flags;
base_jd_core_req core_req = katom->core_req;
+ u64 cache_jc = katom->jc;
dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n",
__func__, (void *)katom, katom->event_code);
+ KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER, kctx, katom, katom->jc, 0);
+
if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
@@ -2792,37 +3000,27 @@ static void js_return_worker(struct work_struct *data)
mutex_lock(&js_devdata->queue_mutex);
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
- atomic_dec(&kctx->atoms_pulled);
- atomic_dec(&kctx->atoms_pulled_slot[js]);
-
if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
atomic_dec(&katom->blocked);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+ slot_became_unblocked = kbase_jsctx_slot_atom_pulled_dec(kctx, katom);
- if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
- jsctx_rb_none_to_pull(kctx, js))
+ if (!kbase_jsctx_slot_atoms_pulled(kctx, js) &&
+ jsctx_rb_none_to_pull(kctx, js))
timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
- /* If this slot has been blocked due to soft-stopped atoms, and all
- * atoms have now been processed, then unblock the slot
+ /* If the context is now unblocked on this slot after soft-stopped
+ * atoms, then only mark it as pullable on this slot if it is not
+ * idle
*/
- if (!kctx->atoms_pulled_slot_pri[js][prio] &&
- kctx->blocked_js[js][prio]) {
- kctx->blocked_js[js][prio] = false;
+ if (slot_became_unblocked && kbase_jsctx_atoms_pulled(kctx) &&
+ kbase_js_ctx_pullable(kctx, js, true))
+ timer_sync |=
+ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js);
- /* Only mark the slot as pullable if the context is not idle -
- * that case is handled below
- */
- if (atomic_read(&kctx->atoms_pulled) &&
- kbase_js_ctx_pullable(kctx, js, true))
- timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
- kbdev, kctx, js);
- }
-
- if (!atomic_read(&kctx->atoms_pulled)) {
+ if (!kbase_jsctx_atoms_pulled(kctx)) {
dev_dbg(kbdev->dev,
"No atoms currently pulled from context %pK\n",
(void *)kctx);
@@ -2890,7 +3088,6 @@ static void js_return_worker(struct work_struct *data)
mutex_unlock(&kctx->jctx.lock);
}
- katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
dev_dbg(kbdev->dev, "JS: retained state %s finished",
kbasep_js_has_atom_finished(&retained_state) ?
"has" : "hasn't");
@@ -2904,6 +3101,9 @@ static void js_return_worker(struct work_struct *data)
kbase_backend_complete_wq_post_sched(kbdev, core_req);
+ KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER_END, kctx, NULL, cache_jc,
+ 0);
+
dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n",
__func__, (void *)katom);
}
@@ -3113,15 +3313,16 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+ bool slot_became_unblocked;
+
dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n",
(void *)katom);
- context_idle = !atomic_dec_return(&kctx->atoms_pulled);
- atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
- kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+ slot_became_unblocked =
+ kbase_jsctx_slot_atom_pulled_dec(kctx, katom);
+ context_idle = !kbase_jsctx_atoms_pulled(kctx);
- if (!atomic_read(&kctx->atoms_pulled) &&
- !kctx->slots_pullable) {
+ if (!kbase_jsctx_atoms_pulled(kctx) && !kctx->slots_pullable) {
WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
atomic_dec(&kbdev->js_data.nr_contexts_runnable);
@@ -3129,15 +3330,14 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
}
/* If this slot has been blocked due to soft-stopped atoms, and
- * all atoms have now been processed, then unblock the slot
+ * all atoms have now been processed at this priority level and
+ * higher, then unblock the slot
*/
- if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
- && kctx->blocked_js[atom_slot][prio]) {
+ if (slot_became_unblocked) {
dev_dbg(kbdev->dev,
- "kctx %pK is no longer blocked from submitting on slot %d at priority %d\n",
+ "kctx %pK is no longer blocked from submitting on slot %d at priority %d or higher\n",
(void *)kctx, atom_slot, prio);
- kctx->blocked_js[atom_slot][prio] = false;
if (kbase_js_ctx_pullable(kctx, atom_slot, true))
timer_sync |=
kbase_js_ctx_list_add_pullable_nolock(
@@ -3146,8 +3346,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
}
WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
- if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
- jsctx_rb_none_to_pull(kctx, atom_slot)) {
+ if (!kbase_jsctx_slot_atoms_pulled(kctx, atom_slot) &&
+ jsctx_rb_none_to_pull(kctx, atom_slot)) {
if (!list_empty(
&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
timer_sync |= kbase_js_ctx_list_remove_nolock(
@@ -3160,8 +3360,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
* re-enable submission so that context can be scheduled again.
*/
if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
- !atomic_read(&kctx->atoms_pulled) &&
- !kbase_ctx_flag(kctx, KCTX_DYING)) {
+ !kbase_jsctx_atoms_pulled(kctx) &&
+ !kbase_ctx_flag(kctx, KCTX_DYING)) {
int js;
kbasep_js_set_submit_allowed(js_devdata, kctx);
@@ -3297,7 +3497,9 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
trace_sysgraph_gpu(SGR_COMPLETE, kctx->id,
kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr);
+ KBASE_TLSTREAM_TL_JD_DONE_START(kbdev, katom);
kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+ KBASE_TLSTREAM_TL_JD_DONE_END(kbdev, katom);
/* Unblock cross dependency if present */
if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
@@ -3405,6 +3607,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
int js;
+ KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0);
+
dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n",
__func__, (void *)kbdev, (unsigned int)js_mask);
@@ -3460,6 +3664,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
&kctx->jctx.sched_info.ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
up(&js_devdata->schedule_sem);
+ KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev,
+ 0);
return;
}
kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
@@ -3604,6 +3810,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
mutex_unlock(&js_devdata->queue_mutex);
up(&js_devdata->schedule_sem);
+ KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, 0);
}
void kbase_js_zap_context(struct kbase_context *kctx)
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
new file mode 100644
index 0000000..ce996ca
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -0,0 +1,1184 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_kinstr_prfcnt.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
+#include "mali_kbase_hwcnt_gpu.h"
+#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+/* The minimum allowed interval between dumps, in nanoseconds
+ * (equivalent to 10KHz)
+ */
+#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC)
+
+/* The minimum allowed interval between dumps, in microseconds
+ * (equivalent to 10KHz)
+ */
+#define DUMP_INTERVAL_MIN_US (DUMP_INTERVAL_MIN_NS / 1000)
+
+/* The maximum allowed buffers per client */
+#define MAX_BUFFER_COUNT 32
+
+/**
+ * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware
+ * counters.
+ * @hvirt: Hardware counter virtualizer used by kinstr_prfcnt.
+ * @info_item_count: Number of metadata elements.
+ * @metadata: Hardware counter metadata provided by virtualizer.
+ * @lock: Lock protecting kinstr_prfcnt state.
+ * @suspend_count: Suspend reference count. If non-zero, timer and worker
+ * are prevented from being re-scheduled.
+ * @client_count: Number of kinstr_prfcnt clients.
+ * @clients: List of kinstr_prfcnt clients.
+ * @dump_timer: Timer that enqueues dump_work to a workqueue.
+ * @dump_work: Worker for performing periodic counter dumps.
+ */
+struct kbase_kinstr_prfcnt_context {
+ struct kbase_hwcnt_virtualizer *hvirt;
+ u32 info_item_count;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct mutex lock;
+ size_t suspend_count;
+ size_t client_count;
+ struct list_head clients;
+ struct hrtimer dump_timer;
+ struct work_struct dump_work;
+};
+
+/**
+ * struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data.
+ * @sample_meta: Pointer to samle metadata.
+ * @dump_buf: Dump buffer containing sample data.
+ */
+struct kbase_kinstr_prfcnt_sample {
+ u64 *sample_meta;
+ struct kbase_hwcnt_dump_buffer dump_buf;
+};
+
+/**
+ * struct kbase_kinstr_prfcnt_sample_array - Array of sample data.
+ * @page_addr: Address of allocated pages. A single allocation is used
+ * for all Dump Buffers in the array.
+ * @page_order: The allocation order of the pages.
+ * @sample_count: Number of allocated samples.
+ * @samples: Non-NULL pointer to the array of Dump Buffers.
+ */
+struct kbase_kinstr_prfcnt_sample_array {
+ u64 page_addr;
+ unsigned int page_order;
+ size_t sample_count;
+ struct kbase_kinstr_prfcnt_sample *samples;
+};
+
+/**
+ * struct kbase_kinstr_prfcnt_client_config - Client session configuration.
+ * @prfcnt_mode: Sampling mode: either manual or periodic.
+ * @counter_set: Set of performance counter blocks.
+ * @buffer_count: Number of buffers used to store samples.
+ * @period_us: Sampling period, in microseconds, or 0 if manual mode.
+ * @phys_em: Enable map used by the GPU.
+ */
+struct kbase_kinstr_prfcnt_client_config {
+ u8 prfcnt_mode;
+ u8 counter_set;
+ u16 buffer_count;
+ u64 period_us;
+ struct kbase_hwcnt_physical_enable_map phys_em;
+};
+
+/**
+ * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached
+ * to a kinstr_prfcnt context.
+ * @kinstr_ctx: kinstr_prfcnt context client is attached to.
+ * @hvcli: Hardware counter virtualizer client.
+ * @node: Node used to attach this client to list in kinstr_prfcnt
+ * context.
+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must
+ * occur. If 0, not a periodic client.
+ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
+ * client.
+ * @config: Configuration of the client session.
+ * @enable_map: Counters enable map.
+ * @tmp_buf: Temporary buffer to use before handing over dump to
+ * client.
+ * @sample_arr: Array of dump buffers allocated by this client.
+ * @dump_bufs_meta: Metadata of dump buffers.
+ * @meta_idx: Index of metadata being accessed by userspace.
+ * @read_idx: Index of buffer read by userspace.
+ * @write_idx: Index of buffer being written by dump worker.
+ * @waitq: Client's notification queue.
+ * @sample_size: Size of the data required for one sample, in bytes.
+ * @sample_count: Number of samples the client is able to capture.
+ */
+struct kbase_kinstr_prfcnt_client {
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx;
+ struct kbase_hwcnt_virtualizer_client *hvcli;
+ struct list_head node;
+ u64 next_dump_time_ns;
+ u32 dump_interval_ns;
+ struct kbase_kinstr_prfcnt_client_config config;
+ struct kbase_hwcnt_enable_map enable_map;
+ struct kbase_hwcnt_dump_buffer tmp_buf;
+ struct kbase_kinstr_prfcnt_sample_array sample_arr;
+ struct kbase_hwcnt_reader_metadata *dump_bufs_meta;
+ atomic_t meta_idx;
+ atomic_t read_idx;
+ atomic_t write_idx;
+ wait_queue_head_t waitq;
+ size_t sample_size;
+ size_t sample_count;
+};
+
+static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
+ {
+ /* Request description for MODE request */
+ .hdr = {
+ .item_type = PRFCNT_ENUM_TYPE_REQUEST,
+ .item_version = PRFCNT_READER_API_VERSION,
+ },
+ .u.request = {
+ .request_item_type = PRFCNT_REQUEST_MODE,
+ .versions_mask = 0x1,
+ },
+ },
+ {
+ /* Request description for ENABLE request */
+ .hdr = {
+ .item_type = PRFCNT_ENUM_TYPE_REQUEST,
+ .item_version = PRFCNT_READER_API_VERSION,
+ },
+ .u.request = {
+ .request_item_type = PRFCNT_REQUEST_ENABLE,
+ .versions_mask = 0x1,
+ },
+ },
+};
+
+/**
+ * kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready() - Check if client has ready
+ * buffers.
+ * @cli: Non-NULL pointer to kinstr_prfcnt client.
+ *
+ * Return: Non-zero if client has at least one dumping buffer filled that was
+ * not notified to user yet.
+ */
+static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(
+ struct kbase_kinstr_prfcnt_client *cli)
+{
+ WARN_ON(!cli);
+ return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll.
+ * @filp: Non-NULL pointer to file structure.
+ * @wait: Non-NULL pointer to poll table.
+ *
+ * Return: POLLIN if data can be read without blocking, 0 if data can not be
+ * read without blocking, else error code.
+ */
+static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
+ poll_table *wait)
+{
+ struct kbase_kinstr_prfcnt_client *cli;
+
+ if (!filp || !wait)
+ return -EINVAL;
+
+ cli = filp->private_data;
+
+ if (!cli)
+ return -EINVAL;
+
+ poll_wait(filp, &cli->waitq, wait);
+
+ if (kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(cli))
+ return POLLIN;
+
+ return 0;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
+ * @filp: Non-NULL pointer to file structure.
+ * @cmd: User command.
+ * @arg: Command's argument.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ long rcode;
+ struct kbase_kinstr_prfcnt_client *cli;
+
+ if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER))
+ return -EINVAL;
+
+ cli = filp->private_data;
+
+ if (!cli)
+ return -EINVAL;
+
+ switch (_IOC_NR(cmd)) {
+ default:
+ pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
+ rcode = -EINVAL;
+ break;
+ }
+
+ return rcode;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_hwcnt_reader_mmap() - hwcnt reader's mmap.
+ * @filp: Non-NULL pointer to file structure.
+ * @vma: Non-NULL pointer to vma structure.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp,
+ struct vm_area_struct *vma)
+{
+ struct kbase_kinstr_prfcnt_client *cli;
+ unsigned long vm_size, size, addr, pfn, offset;
+
+ if (!filp || !vma)
+ return -EINVAL;
+
+ cli = filp->private_data;
+
+ if (!cli)
+ return -EINVAL;
+
+ vm_size = vma->vm_end - vma->vm_start;
+
+ /* The mapping is allowed to span the entirety of the page allocation,
+ * not just the chunk where the dump buffers are allocated.
+ * This accommodates the corner case where the combined size of the
+ * dump buffers is smaller than a single page.
+ * This does not pose a security risk as the pages are zeroed on
+ * allocation, and anything out of bounds of the dump buffers is never
+ * written to.
+ */
+ size = (1ull << cli->sample_arr.page_order) * PAGE_SIZE;
+
+ if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+ return -EINVAL;
+
+ offset = vma->vm_pgoff << PAGE_SHIFT;
+
+ if (vm_size > size - offset)
+ return -EINVAL;
+
+ addr = __pa(cli->sample_arr.page_addr + offset);
+ pfn = addr >> PAGE_SHIFT;
+
+ return remap_pfn_range(vma, vma->vm_start, pfn, vm_size,
+ vma->vm_page_prot);
+}
+
+static void kbasep_kinstr_prfcnt_sample_array_free(
+ struct kbase_kinstr_prfcnt_sample_array *sample_arr)
+{
+ if (!sample_arr)
+ return;
+
+ kfree((void *)sample_arr->samples);
+ kfree((void *)(size_t)sample_arr->page_addr);
+ memset(sample_arr, 0, sizeof(*sample_arr));
+}
+
+/**
+ * kbasep_kinstr_prfcnt_client_destroy() - Destroy a kinstr_prfcnt client.
+ * @cli: kinstr_prfcnt client. Must not be attached to a kinstr_prfcnt context.
+ */
+static void
+kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+{
+ if (!cli)
+ return;
+
+ kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
+ kfree(cli->dump_bufs_meta);
+ kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
+ kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
+ kbase_hwcnt_enable_map_free(&cli->enable_map);
+ kfree(cli);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release.
+ * @inode: Non-NULL pointer to inode structure.
+ * @filp: Non-NULL pointer to file structure.
+ *
+ * Return: 0 always.
+ */
+static int kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode *inode,
+ struct file *filp)
+{
+ struct kbase_kinstr_prfcnt_client *cli = filp->private_data;
+
+ mutex_lock(&cli->kinstr_ctx->lock);
+
+ WARN_ON(cli->kinstr_ctx->client_count == 0);
+ if (cli->kinstr_ctx->client_count > 0)
+ cli->kinstr_ctx->client_count--;
+ list_del(&cli->node);
+
+ mutex_unlock(&cli->kinstr_ctx->lock);
+
+ kbasep_kinstr_prfcnt_client_destroy(cli);
+
+ return 0;
+}
+
+/* kinstr_prfcnt client file operations */
+static const struct file_operations kinstr_prfcnt_client_fops = {
+ .owner = THIS_MODULE,
+ .poll = kbasep_kinstr_prfcnt_hwcnt_reader_poll,
+ .unlocked_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl,
+ .compat_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl,
+ .mmap = kbasep_kinstr_prfcnt_hwcnt_reader_mmap,
+ .release = kbasep_kinstr_prfcnt_hwcnt_reader_release,
+};
+
+static size_t kbasep_kinstr_prfcnt_get_sample_size(
+ const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ size_t sample_meta_bytes;
+ size_t block_count = 0;
+ size_t grp, blk, blk_inst;
+
+ if (!metadata)
+ return 0;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ block_count++;
+
+ /* Reserve one for last sentinel item. */
+ block_count++;
+
+ sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count;
+ dump_buf_bytes = metadata->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
+
+ return (sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_dump_worker()- Dump worker, that dumps all periodic
+ * clients that need to be dumped, then
+ * reschedules itself.
+ * @work: Work structure.
+ */
+static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
+{
+ /* Do nothing. */
+}
+
+/**
+ * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for
+ * execution as soon as possible.
+ * @timer: Timer structure.
+ */
+static enum hrtimer_restart
+kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer)
+{
+ return HRTIMER_NORESTART;
+}
+
+int kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_kinstr_prfcnt_context **out_kinstr_ctx)
+{
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ if (!hvirt || !out_kinstr_ctx)
+ return -EINVAL;
+
+ metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+
+ if (!metadata)
+ return -EINVAL;
+
+ kinstr_ctx = kzalloc(sizeof(*kinstr_ctx), GFP_KERNEL);
+
+ if (!kinstr_ctx)
+ return -ENOMEM;
+
+ kinstr_ctx->hvirt = hvirt;
+ kinstr_ctx->metadata = metadata;
+
+ mutex_init(&kinstr_ctx->lock);
+ INIT_LIST_HEAD(&kinstr_ctx->clients);
+ hrtimer_init(&kinstr_ctx->dump_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ kinstr_ctx->dump_timer.function = kbasep_kinstr_prfcnt_dump_timer;
+ INIT_WORK(&kinstr_ctx->dump_work, kbasep_kinstr_prfcnt_dump_worker);
+
+ *out_kinstr_ctx = kinstr_ctx;
+ return 0;
+}
+
+void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
+{
+ if (!kinstr_ctx)
+ return;
+
+ cancel_work_sync(&kinstr_ctx->dump_work);
+
+ /* Non-zero client count implies client leak */
+ if (WARN_ON(kinstr_ctx->client_count > 0)) {
+ struct kbase_kinstr_prfcnt_client *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &kinstr_ctx->clients, node) {
+ list_del(&pos->node);
+ kinstr_ctx->client_count--;
+ kbasep_kinstr_prfcnt_client_destroy(pos);
+ }
+ }
+
+ WARN_ON(kinstr_ctx->client_count > 0);
+ kfree(kinstr_ctx);
+}
+
+void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
+{
+ if (WARN_ON(!kinstr_ctx))
+ return;
+
+ mutex_lock(&kinstr_ctx->lock);
+
+ if (!WARN_ON(kinstr_ctx->suspend_count == SIZE_MAX))
+ kinstr_ctx->suspend_count++;
+
+ mutex_unlock(&kinstr_ctx->lock);
+
+ /* Always sync cancel the timer and then the worker, regardless of the
+ * new suspend count.
+ *
+ * This ensures concurrent calls to kbase_kinstr_prfcnt_suspend() always block
+ * until kinstr_prfcnt is fully suspended.
+ *
+ * The timer is canceled before the worker, as the timer
+ * unconditionally re-enqueues the worker, but the worker checks the
+ * suspend_count that we just incremented before rescheduling the timer.
+ *
+ * Therefore if we cancel the worker first, the timer might re-enqueue
+ * the worker before we cancel the timer, but the opposite is not
+ * possible.
+ */
+ hrtimer_cancel(&kinstr_ctx->dump_timer);
+ cancel_work_sync(&kinstr_ctx->dump_work);
+}
+
+void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
+{
+ if (WARN_ON(!kinstr_ctx))
+ return;
+
+ mutex_lock(&kinstr_ctx->lock);
+
+ if (!WARN_ON(kinstr_ctx->suspend_count == 0)) {
+ kinstr_ctx->suspend_count--;
+
+ /* Last resume, so re-enqueue the worker if we have any periodic
+ * clients.
+ */
+ if (kinstr_ctx->suspend_count == 0) {
+ struct kbase_kinstr_prfcnt_client *pos;
+ bool has_periodic_clients = false;
+
+ list_for_each_entry(pos, &kinstr_ctx->clients, node) {
+ if (pos->dump_interval_ns != 0) {
+ has_periodic_clients = true;
+ break;
+ }
+ }
+
+ if (has_periodic_clients)
+ kbase_hwcnt_virtualizer_queue_work(
+ kinstr_ctx->hvirt,
+ &kinstr_ctx->dump_work);
+ }
+ }
+
+ mutex_unlock(&kinstr_ctx->lock);
+}
+
+static int kbasep_kinstr_prfcnt_sample_array_alloc(
+ const struct kbase_hwcnt_metadata *metadata, size_t n,
+ struct kbase_kinstr_prfcnt_sample_array *sample_arr)
+{
+ struct kbase_kinstr_prfcnt_sample *samples;
+ size_t sample_idx;
+ u64 addr;
+ unsigned int order;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ size_t sample_meta_bytes;
+ size_t block_count = 0;
+ size_t sample_size;
+ size_t grp, blk, blk_inst;
+
+ if (!metadata || !sample_arr)
+ return -EINVAL;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ block_count++;
+
+ /* Reserve one for last sentinel item. */
+ block_count++;
+
+ sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count;
+ dump_buf_bytes = metadata->dump_buf_bytes;
+ clk_cnt_buf_bytes =
+ sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt;
+ sample_size = sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes;
+
+ samples = kmalloc_array(n, sizeof(*samples), GFP_KERNEL);
+
+ if (!samples)
+ return -ENOMEM;
+
+ order = get_order(sample_size * n);
+ addr = (u64)(uintptr_t)kzalloc(sample_size * n, GFP_KERNEL);
+
+ if (!addr) {
+ kfree((void *)samples);
+ return -ENOMEM;
+ }
+
+ sample_arr->page_addr = addr;
+ sample_arr->page_order = order;
+ sample_arr->sample_count = n;
+ sample_arr->samples = samples;
+
+ for (sample_idx = 0; sample_idx < n; sample_idx++) {
+ const size_t sample_meta_offset = sample_size * sample_idx;
+ const size_t dump_buf_offset =
+ sample_meta_offset + sample_meta_bytes;
+ const size_t clk_cnt_buf_offset =
+ dump_buf_offset + dump_buf_bytes;
+
+ /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */
+ samples[sample_idx].dump_buf.metadata = metadata;
+ samples[sample_idx].sample_meta =
+ (u64 *)(uintptr_t)(addr + sample_meta_offset);
+ samples[sample_idx].dump_buf.dump_buf =
+ (u64 *)(uintptr_t)(addr + dump_buf_offset);
+ samples[sample_idx].dump_buf.clk_cnt_buf =
+ (u64 *)(uintptr_t)(addr + clk_cnt_buf_offset);
+ }
+
+ return 0;
+}
+
+static bool prfcnt_mode_supported(u8 mode)
+{
+ return (mode == PRFCNT_MODE_MANUAL) || (mode == PRFCNT_MODE_PERIODIC);
+}
+
+static void
+kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em,
+ const uint64_t *enable_mask)
+{
+ *phys_em |= kbase_hwcnt_backend_gpu_block_map_to_physical(
+ enable_mask[0], enable_mask[1]);
+}
+
+/**
+ * kbasep_kinstr_prfcnt_parse_request_enable - Parse an enable request
+ * @req_enable: Performance counters enable request to parse.
+ * @config: Client object the session configuration should be written to.
+ *
+ * This function parses a performance counters enable request.
+ * This type of request specifies a bitmask of HW counters to enable
+ * for one performance counters block type. In addition to that,
+ * a performance counters enable request may also set "global"
+ * configuration properties that affect the whole session, like the
+ * performance counters set, which shall be compatible with the same value
+ * set by other performance request items.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_kinstr_prfcnt_parse_request_enable(
+ const struct prfcnt_request_enable *req_enable,
+ struct kbase_kinstr_prfcnt_client_config *config)
+{
+ int err = 0;
+ u8 req_set = KBASE_HWCNT_SET_UNDEFINED, default_set;
+
+ switch (req_enable->set) {
+ case PRFCNT_SET_PRIMARY:
+ req_set = KBASE_HWCNT_SET_PRIMARY;
+ break;
+ case PRFCNT_SET_SECONDARY:
+ req_set = KBASE_HWCNT_SET_SECONDARY;
+ break;
+ case PRFCNT_SET_TERTIARY:
+ req_set = KBASE_HWCNT_SET_TERTIARY;
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ /* The performance counter set is a "global" property that affects
+ * the whole session. Either this is the first request that sets
+ * the value, or it shall be identical to all previous requests.
+ */
+ if (!err) {
+ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED)
+ config->counter_set = req_set;
+ else if (config->counter_set != req_set)
+ err = -EINVAL;
+ }
+
+ /* Temporarily, the requested set cannot be different from the default
+ * set because it's the only one to be supported. This will change in
+ * the future.
+ */
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+ default_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+ default_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+ /* Default to primary */
+ default_set = KBASE_HWCNT_SET_PRIMARY;
+#endif
+
+ if (req_set != default_set)
+ err = -EINVAL;
+
+ if (err < 0)
+ return err;
+
+ /* Enable the performance counters based on the bitmask provided
+ * by the user space client.
+ * It is possible to receive multiple requests for the same counter
+ * block, in which case the bitmask will be a logical OR of all the
+ * bitmasks given by the client.
+ */
+ switch (req_enable->block_type) {
+ case PRFCNT_BLOCK_TYPE_FE:
+ kbasep_kinstr_prfcnt_block_enable_to_physical(
+ &config->phys_em.fe_bm, req_enable->enable_mask);
+ break;
+ case PRFCNT_BLOCK_TYPE_TILER:
+ kbasep_kinstr_prfcnt_block_enable_to_physical(
+ &config->phys_em.tiler_bm, req_enable->enable_mask);
+ break;
+ case PRFCNT_BLOCK_TYPE_MEMORY:
+ kbasep_kinstr_prfcnt_block_enable_to_physical(
+ &config->phys_em.mmu_l2_bm, req_enable->enable_mask);
+ break;
+ case PRFCNT_BLOCK_TYPE_SHADER_CORE:
+ kbasep_kinstr_prfcnt_block_enable_to_physical(
+ &config->phys_em.shader_bm, req_enable->enable_mask);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_parse_setup - Parse session setup
+ * @kinstr_ctx: Pointer to the kinstr_prfcnt context.
+ * @setup: Session setup information to parse.
+ * @config: Client object the session configuration should be written to.
+ *
+ * This function parses the list of "request" items sent by the user space
+ * client, and writes the configuration for the new client to be created
+ * for the session.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_kinstr_prfcnt_parse_setup(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ union kbase_ioctl_kinstr_prfcnt_setup *setup,
+ struct kbase_kinstr_prfcnt_client_config *config)
+{
+ uint32_t i;
+ struct prfcnt_request_item *req_arr;
+ int err = 0;
+
+ if (!setup->in.requests_ptr || (setup->in.request_item_count == 0) ||
+ (setup->in.request_item_size == 0)) {
+ return -EINVAL;
+ }
+
+ req_arr =
+ (struct prfcnt_request_item *)(uintptr_t)setup->in.requests_ptr;
+
+ if (req_arr[setup->in.request_item_count - 1].hdr.item_type !=
+ FLEX_LIST_TYPE_NONE) {
+ return -EINVAL;
+ }
+
+ if (req_arr[setup->in.request_item_count - 1].hdr.item_version != 0)
+ return -EINVAL;
+
+ /* The session configuration can only feature one value for some
+ * properties (like capture mode and block counter set), but the client
+ * may potential issue multiple requests and try to set more than one
+ * value for those properties. While issuing multiple requests for the
+ * same property is allowed by the protocol, asking for different values
+ * is illegal. Leaving these properties as undefined is illegal, too.
+ */
+ config->prfcnt_mode = PRFCNT_MODE_RESERVED;
+ config->counter_set = KBASE_HWCNT_SET_UNDEFINED;
+
+ for (i = 0; i < setup->in.request_item_count - 1; i++) {
+ if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) {
+ err = -EINVAL;
+ break;
+ }
+
+ switch (req_arr[i].hdr.item_type) {
+ /* Capture mode is initialized as undefined.
+ * The first request of this type sets the capture mode.
+ * The protocol allows the client to send redundant requests,
+ * but only if they replicate the same value that has already
+ * been set by the first request.
+ */
+ case PRFCNT_REQUEST_TYPE_MODE:
+ if (!prfcnt_mode_supported(req_arr[i].u.req_mode.mode))
+ err = -EINVAL;
+ else if (config->prfcnt_mode == PRFCNT_MODE_RESERVED)
+ config->prfcnt_mode =
+ req_arr[i].u.req_mode.mode;
+ else if (req_arr[i].u.req_mode.mode !=
+ config->prfcnt_mode)
+ err = -EINVAL;
+
+ if (err < 0)
+ break;
+
+ if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) {
+ config->period_us =
+ req_arr[i]
+ .u.req_mode.mode_config.periodic
+ .period_us;
+
+ if ((config->period_us != 0) &&
+ (config->period_us <
+ DUMP_INTERVAL_MIN_US)) {
+ config->period_us =
+ DUMP_INTERVAL_MIN_US;
+ }
+ }
+ break;
+
+ case PRFCNT_REQUEST_TYPE_ENABLE:
+ err = kbasep_kinstr_prfcnt_parse_request_enable(
+ &req_arr[i].u.req_enable, config);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ if (err < 0)
+ break;
+ }
+
+ /* Verify that properties (like capture mode and block counter set)
+ * have been defined by the user space client.
+ */
+ if (config->prfcnt_mode == PRFCNT_MODE_RESERVED)
+ err = -EINVAL;
+
+ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED)
+ err = -EINVAL;
+
+ return err;
+}
+
+/**
+ * kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client.
+ * Does not attach to the kinstr_prfcnt
+ * context.
+ * @kinstr_ctx: Non-NULL pointer to kinstr_prfcnt context.
+ * @setup: Non-NULL pointer to hardware counter ioctl setup structure.
+ * @out_vcli: Non-NULL pointer to where created client will be stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_kinstr_prfcnt_client_create(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ union kbase_ioctl_kinstr_prfcnt_setup *setup,
+ struct kbase_kinstr_prfcnt_client **out_vcli)
+{
+ int err;
+ struct kbase_kinstr_prfcnt_client *cli;
+ struct kbase_hwcnt_physical_enable_map phys_em;
+
+ WARN_ON(!kinstr_ctx);
+ WARN_ON(!setup);
+
+ cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+
+ if (!cli)
+ return -ENOMEM;
+
+ cli->kinstr_ctx = kinstr_ctx;
+ err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config);
+
+ if (err < 0)
+ goto error;
+
+ cli->config.buffer_count = MAX_BUFFER_COUNT;
+ cli->dump_interval_ns = cli->config.period_us * NSEC_PER_USEC;
+ cli->next_dump_time_ns = 0;
+ err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata,
+ &cli->enable_map);
+
+ if (err < 0)
+ goto error;
+
+ phys_em.fe_bm = 0;
+ phys_em.shader_bm = 0;
+ phys_em.tiler_bm = 0;
+ phys_em.mmu_l2_bm = 0;
+
+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
+
+ cli->sample_count = cli->config.buffer_count;
+ cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(
+ kinstr_ctx->metadata, &cli->tmp_buf);
+
+ /* Use virtualizer's metadata to alloc tmp buffer which interacts with
+ * the HWC virtualizer.
+ */
+ err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata,
+ &cli->tmp_buf);
+
+ if (err < 0)
+ goto error;
+
+ /* Enable all the available clk_enable_map. */
+ cli->enable_map.clk_enable_map =
+ (1ull << kinstr_ctx->metadata->clk_cnt) - 1;
+
+ /* Use metadata from virtualizer to allocate dump buffers if
+ * kinstr_prfcnt doesn't have the truncated metadata.
+ */
+ err = kbasep_kinstr_prfcnt_sample_array_alloc(kinstr_ctx->metadata,
+ cli->config.buffer_count,
+ &cli->sample_arr);
+
+ if (err < 0)
+ goto error;
+
+ err = -ENOMEM;
+
+ cli->dump_bufs_meta =
+ kmalloc_array(cli->config.buffer_count,
+ sizeof(*cli->dump_bufs_meta), GFP_KERNEL);
+
+ if (!cli->dump_bufs_meta)
+ goto error;
+
+ err = kbase_hwcnt_virtualizer_client_create(
+ kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli);
+
+ if (err < 0)
+ goto error;
+
+ init_waitqueue_head(&cli->waitq);
+ *out_vcli = cli;
+
+ return 0;
+
+error:
+ kbasep_kinstr_prfcnt_client_destroy(cli);
+ return err;
+}
+
+static size_t kbasep_kinstr_prfcnt_get_block_info_count(
+ const struct kbase_hwcnt_metadata *metadata)
+{
+ size_t grp;
+ size_t block_info_count = 0;
+
+ if (!metadata)
+ return 0;
+
+ for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) {
+ block_info_count +=
+ kbase_hwcnt_metadata_block_count(metadata, grp);
+ }
+
+ return block_info_count;
+}
+
+static void kbasep_kinstr_prfcnt_get_request_info_list(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ struct prfcnt_enum_item *item_arr, size_t *arr_idx)
+{
+ memcpy(&item_arr[*arr_idx], kinstr_prfcnt_supported_requests,
+ sizeof(kinstr_prfcnt_supported_requests));
+ *arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests);
+}
+
+static enum prfcnt_block_type
+kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)
+{
+ enum prfcnt_block_type block_type;
+
+ switch (type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+ block_type = PRFCNT_BLOCK_TYPE_FE;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ block_type = PRFCNT_BLOCK_TYPE_TILER;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+ block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ block_type = PRFCNT_BLOCK_TYPE_MEMORY;
+ break;
+
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+ default:
+ block_type = PRFCNT_BLOCK_TYPE_RESERVED;
+ break;
+ }
+
+ return block_type;
+}
+
+static int kbasep_kinstr_prfcnt_get_block_info_list(
+ const struct kbase_hwcnt_metadata *metadata, size_t block_set,
+ struct prfcnt_enum_item *item_arr, size_t *arr_idx)
+{
+ size_t grp;
+ size_t blk;
+
+ if (!metadata || !item_arr || !arr_idx)
+ return -EINVAL;
+
+ for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) {
+ for (blk = 0;
+ blk < kbase_hwcnt_metadata_block_count(metadata, grp);
+ blk++, (*arr_idx)++) {
+ item_arr[*arr_idx].hdr.item_type =
+ PRFCNT_ENUM_TYPE_BLOCK;
+ item_arr[*arr_idx].hdr.item_version =
+ PRFCNT_READER_API_VERSION;
+ item_arr[*arr_idx].u.block_counter.set = block_set;
+
+ item_arr[*arr_idx].u.block_counter.block_type =
+ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
+ kbase_hwcnt_metadata_block_type(
+ metadata, grp, blk));
+ item_arr[*arr_idx].u.block_counter.num_instances =
+ kbase_hwcnt_metadata_block_instance_count(
+ metadata, grp, blk);
+ item_arr[*arr_idx].u.block_counter.num_values =
+ kbase_hwcnt_metadata_block_values_count(
+ metadata, grp, blk);
+
+ /* The bitmask of available counters should be dynamic.
+ * Temporarily, it is set to U64_MAX, waiting for the
+ * required functionality to be available in the future.
+ */
+ item_arr[*arr_idx].u.block_counter.counter_mask[0] =
+ U64_MAX;
+ item_arr[*arr_idx].u.block_counter.counter_mask[1] =
+ U64_MAX;
+ }
+ }
+
+ return 0;
+}
+
+static int kbasep_kinstr_prfcnt_enum_info_count(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
+{
+ int err = 0;
+ uint32_t count = 0;
+ size_t block_info_count = 0;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ count = ARRAY_SIZE(kinstr_prfcnt_supported_requests);
+ metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
+ block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata);
+ count += block_info_count;
+
+ /* Reserve one for the last sentinel item. */
+ count++;
+ enum_info->info_item_count = count;
+ enum_info->info_item_size = sizeof(struct prfcnt_enum_item);
+ kinstr_ctx->info_item_count = count;
+
+ return err;
+}
+
+static int kbasep_kinstr_prfcnt_enum_info_list(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
+{
+ struct prfcnt_enum_item *prfcnt_item_arr;
+ size_t arr_idx = 0;
+ int err = 0;
+ size_t block_info_count = 0;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ if ((enum_info->info_item_size == 0) ||
+ (enum_info->info_item_count == 0) || !enum_info->info_list_ptr)
+ return -EINVAL;
+
+ if (enum_info->info_item_count != kinstr_ctx->info_item_count)
+ return -EINVAL;
+
+ prfcnt_item_arr =
+ (struct prfcnt_enum_item *)(uintptr_t)enum_info->info_list_ptr;
+ kbasep_kinstr_prfcnt_get_request_info_list(kinstr_ctx, prfcnt_item_arr,
+ &arr_idx);
+ metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
+ block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata);
+
+ if (arr_idx + block_info_count >= enum_info->info_item_count)
+ err = -EINVAL;
+
+ if (!err) {
+ size_t counter_set;
+
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+ counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+ counter_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+ /* Default to primary */
+ counter_set = KBASE_HWCNT_SET_PRIMARY;
+#endif
+ kbasep_kinstr_prfcnt_get_block_info_list(
+ metadata, counter_set, prfcnt_item_arr, &arr_idx);
+ if (arr_idx != enum_info->info_item_count - 1)
+ err = -EINVAL;
+ }
+
+ /* The last sentinel item. */
+ prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_type =
+ FLEX_LIST_TYPE_NONE;
+ prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0;
+
+ return err;
+}
+
+int kbase_kinstr_prfcnt_enum_info(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
+{
+ int err;
+
+ if (!kinstr_ctx || !enum_info)
+ return -EINVAL;
+
+ if (!enum_info->info_list_ptr)
+ err = kbasep_kinstr_prfcnt_enum_info_count(kinstr_ctx,
+ enum_info);
+ else
+ err = kbasep_kinstr_prfcnt_enum_info_list(kinstr_ctx,
+ enum_info);
+
+ return err;
+}
+
+int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ union kbase_ioctl_kinstr_prfcnt_setup *setup)
+{
+ int err;
+ struct kbase_kinstr_prfcnt_client *cli = NULL;
+
+ if (!kinstr_ctx || !setup)
+ return -EINVAL;
+
+ err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli);
+
+ if (err < 0)
+ goto error;
+
+ mutex_lock(&kinstr_ctx->lock);
+ kinstr_ctx->client_count++;
+ list_add(&cli->node, &kinstr_ctx->clients);
+ mutex_unlock(&kinstr_ctx->lock);
+
+ setup->out.prfcnt_metadata_item_size = sizeof(struct prfcnt_metadata);
+ setup->out.prfcnt_mmap_size_bytes =
+ cli->sample_size * cli->sample_count;
+
+ /* Expose to user-space only once the client is fully initialized */
+ err = anon_inode_getfd("[mali_kinstr_prfcnt_desc]",
+ &kinstr_prfcnt_client_fops, cli,
+ O_RDONLY | O_CLOEXEC);
+
+ if (err < 0)
+ goto client_installed_error;
+
+ return err;
+
+client_installed_error:
+ mutex_lock(&kinstr_ctx->lock);
+ kinstr_ctx->client_count--;
+ list_del(&cli->node);
+ mutex_unlock(&kinstr_ctx->lock);
+error:
+ kbasep_kinstr_prfcnt_client_destroy(cli);
+ return err;
+}
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h
new file mode 100644
index 0000000..83d76be
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Kinstr_prfcnt, used to provide an ioctl for userspace access to
+ * performance counters.
+ */
+#ifndef _KBASE_KINSTR_PRFCNT_H_
+#define _KBASE_KINSTR_PRFCNT_H_
+
+struct kbase_kinstr_prfcnt_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_ioctl_hwcnt_reader_setup;
+struct kbase_ioctl_kinstr_prfcnt_enum_info;
+union kbase_ioctl_kinstr_prfcnt_setup;
+
+/**
+ * kbase_kinstr_prfcnt_init() - Initialize a kinstr_prfcnt context.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @out_kinstr_ctx: Non-NULL pointer to where the pointer to the created
+ * kinstr_prfcnt context will be stored on success.
+ *
+ * On creation, the suspend count of the context will be 0.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_kinstr_prfcnt_init(
+ struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_kinstr_prfcnt_context **out_kinstr_ctx);
+
+/**
+ * kbase_kinstr_prfcnt_term() - Terminate a kinstr_prfcnt context.
+ * @kinstr_ctx: Pointer to the kinstr_prfcnt context to be terminated.
+ */
+void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx);
+
+/**
+ * kbase_kinstr_prfcnt_suspend() - Increment the suspend count of the context.
+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context to be suspended.
+ *
+ * After this function call returns, it is guaranteed that all timers and
+ * workers in kinstr_prfcnt will be canceled, and will not be re-triggered until
+ * after the context has been resumed. In effect, this means no new counter
+ * dumps will occur for any existing or subsequently added periodic clients.
+ */
+void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx);
+
+/**
+ * kbase_kinstr_prfcnt_resume() - Decrement the suspend count of the context.
+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context to be resumed.
+ *
+ * If a call to this function decrements the suspend count from 1 to 0, then
+ * normal operation of kinstr_prfcnt will be resumed (i.e. counter dumps will once
+ * again be automatically triggered for all periodic clients).
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_kinstr_prfcnt_suspend.
+ */
+void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx);
+
+/**
+ * kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information.
+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context.
+ * @enum_info: Non-NULL pointer to the enumeration information.
+ *
+ * Enumerate which counter blocks and banks exist, and what counters are
+ * available within them.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_kinstr_prfcnt_enum_info(
+ struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info);
+
+/**
+ * kbase_kinstr_prfcnt_setup() - Set up a new hardware counter reader client.
+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context.
+ * @setup: Non-NULL pointer to the hwcnt reader configuration.
+ *
+ * Start a session between a user client and the kinstr_prfcnt component.
+ * A file descriptor shall be provided to the client as a handle to the
+ * hardware counter reader client that represents the session.
+ *
+ * Return: file descriptor on success, else error code.
+ */
+int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+ union kbase_ioctl_kinstr_prfcnt_setup *setup);
+
+#endif /* _KBASE_KINSTR_PRFCNT_H_ */
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index a68e4ea..320ffef 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -351,6 +351,7 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
/**
* Remove a region object from the global list.
+ * @kbdev: The kbase device
* @reg: Region object to remove
*
* The region reg is removed, possibly by merging with other free and
@@ -358,7 +359,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
* region lock held. The associated memory is not released (see
* kbase_free_alloced_region). Internal use only.
*/
-int kbase_remove_va_region(struct kbase_va_region *reg)
+void kbase_remove_va_region(struct kbase_device *kbdev,
+ struct kbase_va_region *reg)
{
struct rb_node *rbprev;
struct kbase_va_region *prev = NULL;
@@ -368,20 +370,26 @@ int kbase_remove_va_region(struct kbase_va_region *reg)
int merged_front = 0;
int merged_back = 0;
- int err = 0;
reg_rbtree = reg->rbtree;
+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
+ return;
+
/* Try to merge with the previous block first */
rbprev = rb_prev(&(reg->rblink));
if (rbprev) {
prev = rb_entry(rbprev, struct kbase_va_region, rblink);
if (prev->flags & KBASE_REG_FREE) {
/* We're compatible with the previous VMA, merge with
- * it
+ * it, handling any gaps for robustness.
*/
+ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
+
WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
(reg->flags & KBASE_REG_ZONE_MASK));
+ if (!WARN_ON(reg->start_pfn < prev_end_pfn))
+ prev->nr_pages += reg->start_pfn - prev_end_pfn;
prev->nr_pages += reg->nr_pages;
rb_erase(&(reg->rblink), reg_rbtree);
reg = prev;
@@ -393,11 +401,17 @@ int kbase_remove_va_region(struct kbase_va_region *reg)
/* Note we do the lookup here as the tree may have been rebalanced. */
rbnext = rb_next(&(reg->rblink));
if (rbnext) {
- /* We're compatible with the next VMA, merge with it */
next = rb_entry(rbnext, struct kbase_va_region, rblink);
if (next->flags & KBASE_REG_FREE) {
+ /* We're compatible with the next VMA, merge with it,
+ * handling any gaps for robustness.
+ */
+ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
+
WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
(reg->flags & KBASE_REG_ZONE_MASK));
+ if (!WARN_ON(next->start_pfn < reg_end_pfn))
+ next->nr_pages += next->start_pfn - reg_end_pfn;
next->start_pfn = reg->start_pfn;
next->nr_pages += reg->nr_pages;
rb_erase(&(reg->rblink), reg_rbtree);
@@ -412,8 +426,8 @@ int kbase_remove_va_region(struct kbase_va_region *reg)
/* If we failed to merge then we need to add a new block */
if (!(merged_front || merged_back)) {
/*
- * We didn't merge anything. Add a new free
- * placeholder and remove the original one.
+ * We didn't merge anything. Try to add a new free
+ * placeholder, and in any case, remove the original one.
*/
struct kbase_va_region *free_reg;
@@ -421,14 +435,37 @@ int kbase_remove_va_region(struct kbase_va_region *reg)
reg->start_pfn, reg->nr_pages,
reg->flags & KBASE_REG_ZONE_MASK);
if (!free_reg) {
- err = -ENOMEM;
+ /* In case of failure, we cannot allocate a replacement
+ * free region, so we will be left with a 'gap' in the
+ * region tracker's address range (though, the rbtree
+ * will itself still be correct after erasing
+ * 'reg').
+ *
+ * The gap will be rectified when an adjacent region is
+ * removed by one of the above merging paths. Other
+ * paths will gracefully fail to allocate if they try
+ * to allocate in the gap.
+ *
+ * There is nothing that the caller can do, since free
+ * paths must not fail. The existing 'reg' cannot be
+ * repurposed as the free region as callers must have
+ * freedom of use with it by virtue of it being owned
+ * by them, not the region tracker insert/remove code.
+ */
+ dev_warn(
+ kbdev->dev,
+ "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx",
+ (unsigned long long)reg->start_pfn << PAGE_SHIFT,
+ (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT);
+ rb_erase(&(reg->rblink), reg_rbtree);
+
goto out;
}
rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
}
- out:
- return err;
+out:
+ return;
}
KBASE_EXPORT_TEST_API(kbase_remove_va_region);
@@ -456,6 +493,9 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
/* at least nr_pages from start_pfn should be contained within at_reg */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+ /* having at_reg means the rb_tree should not be empty */
+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
+ return -ENOMEM;
new_reg->start_pfn = start_pfn;
new_reg->nr_pages = nr_pages;
@@ -862,6 +902,8 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
unsigned long reg_zone;
+ if (!kbase_is_ctx_reg_zone(zone_bits))
+ continue;
zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
zone_base_addr = zone->base_pfn << PAGE_SHIFT;
@@ -1457,7 +1499,9 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
-int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 addr, size_t nr_pages, size_t align,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
int err;
size_t i = 0;
@@ -1494,14 +1538,16 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
for (i = 0; i < alloc->imported.alias.nents; i++) {
if (alloc->imported.alias.aliased[i].alloc) {
- err = kbase_mmu_insert_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn + (i * stride),
- alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
- alloc->imported.alias.aliased[i].length,
- reg->flags & gwt_mask,
- kctx->as_nr,
- group_id);
+ err = kbase_mmu_insert_pages(
+ kctx->kbdev, &kctx->mmu,
+ reg->start_pfn + (i * stride),
+ alloc->imported.alias.aliased[i]
+ .alloc->pages +
+ alloc->imported.alias.aliased[i]
+ .offset,
+ alloc->imported.alias.aliased[i].length,
+ reg->flags & gwt_mask, kctx->as_nr,
+ group_id, mmu_sync_info);
if (err)
goto bad_insert;
@@ -1509,26 +1555,24 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
* creation time
*/
} else {
- err = kbase_mmu_insert_single_page(kctx,
- reg->start_pfn + i * stride,
+ err = kbase_mmu_insert_single_page(
+ kctx, reg->start_pfn + i * stride,
kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
(reg->flags & mask & gwt_mask) | attr,
- group_id);
+ group_id, mmu_sync_info);
if (err)
goto bad_insert;
}
}
} else {
- err = kbase_mmu_insert_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask,
- kctx->as_nr,
- group_id);
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr,
+ group_id, mmu_sync_info);
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1548,13 +1592,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
* Assume reg->gpu_alloc->nents is the number of actual pages
* in the dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(kctx,
- reg->start_pfn + reg->gpu_alloc->nents,
- kctx->aliasing_sink_page,
- reg->nr_pages - reg->gpu_alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) &
- ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK);
+ err = kbase_mmu_insert_single_page(
+ kctx, reg->start_pfn + reg->gpu_alloc->nents,
+ kctx->aliasing_sink_page,
+ reg->nr_pages - reg->gpu_alloc->nents,
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
+ KBASE_MEM_GROUP_SINK, mmu_sync_info);
if (err)
goto bad_insert;
}
@@ -1566,7 +1609,7 @@ bad_insert:
reg->start_pfn, reg->nr_pages,
kctx->as_nr);
- kbase_remove_va_region(reg);
+ kbase_remove_va_region(kctx->kbdev, reg);
return err;
}
@@ -1588,7 +1631,28 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
/* Tear down down GPU page tables, depending on memory type. */
switch (reg->gpu_alloc->type) {
- case KBASE_MEM_TYPE_ALIAS: /* Fall-through */
+ case KBASE_MEM_TYPE_ALIAS: {
+ size_t i = 0;
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+
+ /* Due to the way the number of valid PTEs and ATEs are tracked
+ * currently, only the GPU virtual range that is backed & mapped
+ * should be passed to the kbase_mmu_teardown_pages() function,
+ * hence individual aliased regions needs to be unmapped
+ * separately.
+ */
+ for (i = 0; i < alloc->imported.alias.nents; i++) {
+ if (alloc->imported.alias.aliased[i].alloc) {
+ err = kbase_mmu_teardown_pages(
+ kctx->kbdev, &kctx->mmu,
+ reg->start_pfn +
+ (i *
+ alloc->imported.alias.stride),
+ alloc->imported.alias.aliased[i].length,
+ kctx->as_nr);
+ }
+ }
+ } break;
case KBASE_MEM_TYPE_IMPORTED_UMM:
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
reg->start_pfn, reg->nr_pages, kctx->as_nr);
@@ -1622,7 +1686,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
}
}
}
- /* Fall-through */
+ fallthrough;
default:
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
break;
@@ -3698,7 +3762,8 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
static int kbase_jit_grow(struct kbase_context *kctx,
const struct base_jit_alloc_info *info,
struct kbase_va_region *reg,
- struct kbase_sub_alloc **prealloc_sas)
+ struct kbase_sub_alloc **prealloc_sas,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
size_t delta;
size_t pages_required;
@@ -3795,7 +3860,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
spin_unlock(&kctx->mem_partials_lock);
ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
- old_size);
+ old_size, mmu_sync_info);
/*
* The grow failed so put the allocation back in the
* pool and return failure.
@@ -4010,6 +4075,11 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
+ /* Calls to this function are inherently synchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
+
#if MALI_USE_CSF
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
#else
@@ -4102,7 +4172,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
* so any state protected by that lock might need to be
* re-evaluated if more code is added here in future.
*/
- ret = kbase_jit_grow(kctx, info, reg, prealloc_sas);
+ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas,
+ mmu_sync_info);
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit)
@@ -4150,7 +4221,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
flags |= BASE_MEM_TILER_ALIGN_TOP;
#endif /* !MALI_USE_CSF */
- flags |= base_mem_group_id_set(kctx->jit_group_id);
+ flags |= kbase_mem_group_id_set(kctx->jit_group_id);
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit) {
flags |= BASEP_MEM_PERFORM_JIT_TRIM;
@@ -4166,7 +4237,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
kbase_gpu_vm_unlock(kctx);
reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
- info->extension, &flags, &gpu_addr);
+ info->extension, &flags, &gpu_addr,
+ mmu_sync_info);
if (!reg) {
/* Most likely not enough GPU virtual space left for
* the new JIT allocation.
@@ -4455,6 +4527,15 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx,
}
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+void kbase_unpin_user_buf_page(struct page *page)
+{
+#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
+ put_page(page);
+#else
+ unpin_user_page(page);
+#endif
+}
+
#if MALI_USE_CSF
static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
{
@@ -4465,7 +4546,7 @@ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages);
for (i = 0; i < alloc->nents; i++)
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
}
}
#endif
@@ -4524,11 +4605,10 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
pages, NULL, NULL);
#else
- pinned_pages = get_user_pages_remote(mm,
- address,
- alloc->imported.user_buf.nr_pages,
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
- pages, NULL, NULL);
+ pinned_pages = pin_user_pages_remote(
+ mm, address, alloc->imported.user_buf.nr_pages,
+ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL,
+ NULL);
#endif
if (pinned_pages <= 0)
@@ -4536,7 +4616,7 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
if (pinned_pages != alloc->imported.user_buf.nr_pages) {
for (i = 0; i < pinned_pages; i++)
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
return -ENOMEM;
}
@@ -4560,6 +4640,11 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
unsigned long gwt_mask = ~0;
int err = kbase_jd_user_buf_pin_pages(kctx, reg);
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
if (err)
return err;
@@ -4596,9 +4681,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
#endif
err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- pa, kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- alloc->group_id);
+ pa, kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr,
+ alloc->group_id, mmu_sync_info);
if (err == 0)
return 0;
@@ -4612,7 +4697,7 @@ unwind:
}
while (++i < pinned_pages) {
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
pages[i] = NULL;
}
@@ -4642,7 +4727,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
pages[i] = NULL;
#endif
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index e9ac809..95533f5 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -506,6 +506,21 @@ struct kbase_va_region {
int va_refcnt;
};
+/**
+ * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a
+ * context or for a device
+ * @zone_bits: A KBASE_REG_ZONE_<...> to query
+ *
+ * Return: True if the zone for @zone_bits is a context zone, False otherwise
+ */
+static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits)
+{
+ WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+ return (zone_bits == KBASE_REG_ZONE_SAME_VA ||
+ zone_bits == KBASE_REG_ZONE_CUSTOM_VA ||
+ zone_bits == KBASE_REG_ZONE_EXEC_VA);
+}
+
/* Special marker for failed JIT allocations that still must be marked as
* in-use
*/
@@ -529,12 +544,14 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg)
return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg));
}
-int kbase_remove_va_region(struct kbase_va_region *reg);
-static inline void kbase_region_refcnt_free(struct kbase_va_region *reg)
+void kbase_remove_va_region(struct kbase_device *kbdev,
+ struct kbase_va_region *reg);
+static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
+ struct kbase_va_region *reg)
{
/* If region was mapped then remove va region*/
if (reg->start_pfn)
- kbase_remove_va_region(reg);
+ kbase_remove_va_region(kbdev, reg);
/* To detect use-after-free in debug builds */
KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
@@ -569,7 +586,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
region->va_refcnt, (void *)region);
if (!region->va_refcnt)
- kbase_region_refcnt_free(region);
+ kbase_region_refcnt_free(kctx->kbdev, region);
return NULL;
}
@@ -1167,10 +1184,13 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size
* @addr: the address to insert the region at
* @nr_pages: the number of pages in the region
* @align: the minimum alignment in pages
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*
* Call kbase_add_va_region() and map the region on the GPU.
*/
-int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 addr, size_t nr_pages, size_t align,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
/**
* Remove the region from the GPU and unregister it.
@@ -1798,6 +1818,11 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource(
void kbase_unmap_external_resource(struct kbase_context *kctx,
struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+/**
+ * kbase_unpin_user_buf_page - Unpin a page of a user buffer.
+ * @page: page to unpin
+ */
+void kbase_unpin_user_buf_page(struct page *page);
/**
* kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer.
@@ -2025,7 +2050,7 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
unsigned int *target_page_nr, size_t offset);
/**
- * kbase_ctx_reg_zone_end_pfn - return the end Page Frame Number of @zone
+ * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone
* @zone: zone to query
*
* Return: The end of the zone corresponding to @zone
@@ -2050,7 +2075,7 @@ static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx,
struct kbase_reg_zone *zone;
lockdep_assert_held(&kctx->reg_lock);
- WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
*zone = (struct kbase_reg_zone){
@@ -2073,7 +2098,7 @@ static inline struct kbase_reg_zone *
kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx,
unsigned long zone_bits)
{
- WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
}
@@ -2091,9 +2116,60 @@ static inline struct kbase_reg_zone *
kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
{
lockdep_assert_held(&kctx->reg_lock);
- WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
}
+/**
+ * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
+ * @kctx: Pointer to kbase context
+ *
+ * Don't allow the allocation of GPU memory until user space has set up the
+ * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * from the forked child process using the mali device file fd inherited from
+ * the parent process.
+ */
+static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
+{
+ bool allow_alloc = true;
+
+ rcu_read_lock();
+ allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
+ rcu_read_unlock();
+
+ return allow_alloc;
+}
+
+/**
+ * kbase_mem_group_id_get - Get group ID from flags
+ * @flags: Flags to pass to base_mem_alloc
+ *
+ * This inline function extracts the encoded group ID from flags
+ * and converts it into numeric value (0~15).
+ *
+ * Return: group ID(0~15) extracted from the parameter
+ */
+static inline int kbase_mem_group_id_get(base_mem_alloc_flags flags)
+{
+ KBASE_DEBUG_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
+ return (int)BASE_MEM_GROUP_ID_GET(flags);
+}
+
+/**
+ * kbase_mem_group_id_set - Set group ID into base_mem_alloc_flags
+ * @id: group ID(0~15) you want to encode
+ *
+ * This inline function encodes specific group ID into base_mem_alloc_flags.
+ * Parameter 'id' should lie in-between 0 to 15.
+ *
+ * Return: base_mem_alloc_flags with the group ID (id) encoded
+ *
+ * The return value can be combined with other flags against base_mem_alloc
+ * to identify a specific memory group.
+ */
+static inline base_mem_alloc_flags kbase_mem_group_id_set(int id)
+{
+ return BASE_MEM_GROUP_ID_SET(id);
+}
#endif /* _KBASE_MEM_H_ */
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 21302c1..527bec4 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -291,9 +291,10 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
*/
}
-struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
- u64 va_pages, u64 commit_pages,
- u64 extension, u64 *flags, u64 *gpu_va)
+struct kbase_va_region *
+kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
+ u64 extension, u64 *flags, u64 *gpu_va,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
int zone;
struct kbase_va_region *reg;
@@ -387,7 +388,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
goto invalid_flags;
if (kbase_reg_prepare_native(reg, kctx,
- base_mem_group_id_get(*flags)) != 0) {
+ kbase_mem_group_id_get(*flags)) != 0) {
dev_err(dev, "Failed to prepare region");
goto prepare_failed;
}
@@ -469,7 +470,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
*gpu_va = (u64) cookie;
} else /* we control the VA */ {
- if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) {
+ if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1,
+ mmu_sync_info) != 0) {
dev_warn(dev, "Failed to map memory on GPU");
kbase_gpu_vm_unlock(kctx);
goto no_mmap;
@@ -604,7 +606,7 @@ int kbase_mem_query(struct kbase_context *kctx,
if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
- *out |= base_mem_group_id_set(reg->cpu_alloc->group_id);
+ *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id);
WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
@@ -827,6 +829,11 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
int err = 0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
lockdep_assert_held(&kctx->reg_lock);
mutex_lock(&kctx->jit_evict_lock);
@@ -856,9 +863,9 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
* pre-eviction size.
*/
if (!err)
- err = kbase_mem_grow_gpu_mapping(kctx,
- gpu_alloc->reg,
- gpu_alloc->evicted, 0);
+ err = kbase_mem_grow_gpu_mapping(
+ kctx, gpu_alloc->reg,
+ gpu_alloc->evicted, 0, mmu_sync_info);
gpu_alloc->evicted = 0;
}
@@ -1215,6 +1222,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
struct kbase_mem_phy_alloc *alloc;
unsigned long gwt_mask = ~0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
lockdep_assert_held(&kctx->reg_lock);
alloc = reg->gpu_alloc;
@@ -1241,14 +1253,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
kbase_get_gpu_phy_pages(reg),
kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask,
- kctx->as_nr,
- alloc->group_id);
+ reg->flags & gwt_mask, kctx->as_nr,
+ alloc->group_id, mmu_sync_info);
if (err)
goto bad_insert;
@@ -1261,13 +1270,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
* Assume alloc->nents is the number of actual pages in the
* dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(kctx,
- reg->start_pfn + alloc->nents,
- kctx->aliasing_sink_page,
- reg->nr_pages - alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) &
- ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK);
+ err = kbase_mmu_insert_single_page(
+ kctx, reg->start_pfn + alloc->nents,
+ kctx->aliasing_sink_page, reg->nr_pages - alloc->nents,
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
+ KBASE_MEM_GROUP_SINK, mmu_sync_info);
if (err)
goto bad_pad_insert;
}
@@ -1640,9 +1647,12 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
write, 0, pages, NULL);
-#else
+#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
write ? FOLL_WRITE : 0, pages, NULL);
+#else
+ faulted_pages = pin_user_pages(address, *va_pages,
+ write ? FOLL_WRITE : 0, pages, NULL);
#endif
up_read(kbase_mem_get_process_mmap_lock());
@@ -1694,7 +1704,7 @@ unwind_dma_map:
fault_mismatch:
if (pages) {
for (i = 0; i < faulted_pages; i++)
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
}
no_page_array:
invalid_flags:
@@ -1718,6 +1728,11 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
size_t i;
bool coherent;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(flags);
KBASE_DEBUG_ASSERT(ai);
@@ -1891,7 +1906,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
#else
if (1) {
#endif
- if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,
+ mmu_sync_info) != 0) {
dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
goto no_mmap;
}
@@ -1936,6 +1952,11 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
{
struct kbase_va_region *reg;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(gpu_va);
KBASE_DEBUG_ASSERT(va_pages);
@@ -2035,7 +2056,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) {
/* we control the VA, mmap now to the GPU */
- if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) !=
+ 0)
goto no_gpu_va;
/* return real GPU VA */
*gpu_va = reg->start_pfn << PAGE_SHIFT;
@@ -2069,8 +2091,9 @@ bad_flags:
}
int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages)
+ struct kbase_va_region *reg, u64 new_pages,
+ u64 old_pages,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
struct tagged_addr *phy_pages;
u64 delta = new_pages - old_pages;
@@ -2081,8 +2104,10 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
/* Map the new pages into the GPU */
phy_pages = kbase_get_gpu_phy_pages(reg);
ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn + old_pages, phy_pages + old_pages, delta,
- reg->flags, kctx->as_nr, reg->gpu_alloc->group_id);
+ reg->start_pfn + old_pages,
+ phy_pages + old_pages, delta, reg->flags,
+ kctx->as_nr, reg->gpu_alloc->group_id,
+ mmu_sync_info);
return ret;
}
@@ -2136,6 +2161,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
struct kbase_va_region *reg;
bool read_locked = false;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(gpu_addr != 0);
@@ -2227,8 +2257,8 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
/* No update required for CPU mappings, that's done on fault. */
/* Update GPU mapping. */
- res = kbase_mem_grow_gpu_mapping(kctx, reg,
- new_pages, old_pages);
+ res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages,
+ old_pages, mmu_sync_info);
/* On error free the new pages */
if (res) {
@@ -2647,6 +2677,11 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
struct kbase_va_region *reg;
int err = 0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
*aligned_offset = 0;
dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
@@ -2681,7 +2716,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
*nr_pages = kbase_reg_current_backed_size(reg);
if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
- reg->nr_pages, 1) != 0) {
+ reg->nr_pages, 1, mmu_sync_info) != 0) {
dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
/* Unable to map in GPU space. */
WARN_ON(1);
@@ -2747,17 +2782,10 @@ int kbase_context_mmap(struct kbase_context *const kctx,
goto out_unlock;
}
- /* if not the MTP, verify that the MTP has been mapped */
- rcu_read_lock();
- /* catches both when the special page isn't present or
- * when we've forked
- */
- if (rcu_dereference(kctx->process_mm) != current->mm) {
+ if (!kbase_mem_allow_alloc(kctx)) {
err = -EINVAL;
- rcu_read_unlock();
goto out_unlock;
}
- rcu_read_unlock();
switch (vma->vm_pgoff) {
case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 36159c1..f123d17 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -45,12 +45,14 @@ struct kbase_hwc_dma_mapping {
* properties for the new allocation.
* @gpu_va: Start address of the memory region which was allocated from GPU
* virtual address space.
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*
* Return: 0 on success or error code
*/
-struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
- u64 va_pages, u64 commit_pages,
- u64 extension, u64 *flags, u64 *gpu_va);
+struct kbase_va_region *
+kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
+ u64 extension, u64 *flags, u64 *gpu_va,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
/**
* kbase_mem_query - Query properties of a GPU memory region
@@ -169,6 +171,7 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx);
* @reg: The GPU region
* @new_pages: The number of pages after the grow
* @old_pages: The number of pages before the grow
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*
* Return: 0 on success, -errno on error.
*
@@ -178,8 +181,9 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx);
* Note: Caller must be holding the region lock.
*/
int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages);
+ struct kbase_va_region *reg, u64 new_pages,
+ u64 old_pages,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
/**
* kbase_mem_evictable_make - Make a physical allocation eligible for eviction
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs.c b/mali_kbase/mali_kbase_mem_profile_debugfs.c
index 201ff51..7e77963 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs.c
+++ b/mali_kbase/mali_kbase_mem_profile_debugfs.c
@@ -84,9 +84,9 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
err = -ENOMEM;
- } else if (!debugfs_create_file("mem_profile", mode,
- kctx->kctx_dentry, kctx,
- &kbasep_mem_profile_debugfs_fops)) {
+ } else if (IS_ERR_OR_NULL(debugfs_create_file("mem_profile",
+ mode, kctx->kctx_dentry, kctx,
+ &kbasep_mem_profile_debugfs_fops))) {
err = -EAGAIN;
} else {
kbase_ctx_flag_set(kctx,
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h b/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
index 3184a98..1210ed5 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -30,8 +30,7 @@
* The size of the buffer to accumulate the histogram report text in
* @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
*/
-#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
- ((size_t) (64 + ((80 + (56 * 64)) * 53) + 56))
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 54) + 56))
#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c
new file mode 100644
index 0000000..3e58a7b
--- /dev/null
+++ b/mali_kbase/mali_kbase_pbha.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_pbha.h"
+
+#include <device/mali_kbase_device.h>
+#include <mali_kbase.h>
+#define DTB_SET_SIZE 2
+
+static bool read_setting_valid(unsigned int id, unsigned int read_setting)
+{
+ switch (id) {
+ /* Valid ID - fall through all */
+ case SYSC_ALLOC_ID_R_OTHER:
+ case SYSC_ALLOC_ID_R_CSF:
+ case SYSC_ALLOC_ID_R_MMU:
+ case SYSC_ALLOC_ID_R_TILER_VERT:
+ case SYSC_ALLOC_ID_R_TILER_PTR:
+ case SYSC_ALLOC_ID_R_TILER_INDEX:
+ case SYSC_ALLOC_ID_R_TILER_OTHER:
+ case SYSC_ALLOC_ID_R_IC:
+ case SYSC_ALLOC_ID_R_ATTR:
+ case SYSC_ALLOC_ID_R_SCM:
+ case SYSC_ALLOC_ID_R_FSDC:
+ case SYSC_ALLOC_ID_R_VL:
+ case SYSC_ALLOC_ID_R_PLR:
+ case SYSC_ALLOC_ID_R_TEX:
+ case SYSC_ALLOC_ID_R_LSC:
+ switch (read_setting) {
+ /* Valid setting value - fall through all */
+ case SYSC_ALLOC_L2_ALLOC:
+ case SYSC_ALLOC_NEVER_ALLOC:
+ case SYSC_ALLOC_ALWAYS_ALLOC:
+ case SYSC_ALLOC_PTL_ALLOC:
+ case SYSC_ALLOC_L2_PTL_ALLOC:
+ return true;
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+
+ /* Unreachable */
+ return false;
+}
+
+static bool write_setting_valid(unsigned int id, unsigned int write_setting)
+{
+ switch (id) {
+ /* Valid ID - fall through all */
+ case SYSC_ALLOC_ID_W_OTHER:
+ case SYSC_ALLOC_ID_W_CSF:
+ case SYSC_ALLOC_ID_W_PCB:
+ case SYSC_ALLOC_ID_W_TILER_PTR:
+ case SYSC_ALLOC_ID_W_TILER_VERT_PLIST:
+ case SYSC_ALLOC_ID_W_TILER_OTHER:
+ case SYSC_ALLOC_ID_W_L2_EVICT:
+ case SYSC_ALLOC_ID_W_L2_FLUSH:
+ case SYSC_ALLOC_ID_W_TIB_COLOR:
+ case SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH:
+ case SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB:
+ case SYSC_ALLOC_ID_W_TIB_CRC:
+ case SYSC_ALLOC_ID_W_TIB_DS:
+ case SYSC_ALLOC_ID_W_TIB_DS_AFBCH:
+ case SYSC_ALLOC_ID_W_TIB_DS_AFBCB:
+ case SYSC_ALLOC_ID_W_LSC:
+ switch (write_setting) {
+ /* Valid setting value - fall through all */
+ case SYSC_ALLOC_L2_ALLOC:
+ case SYSC_ALLOC_NEVER_ALLOC:
+ case SYSC_ALLOC_ALWAYS_ALLOC:
+ case SYSC_ALLOC_PTL_ALLOC:
+ case SYSC_ALLOC_L2_PTL_ALLOC:
+ return true;
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+
+ /* Unreachable */
+ return false;
+}
+
+static bool settings_valid(unsigned int id, unsigned int read_setting,
+ unsigned int write_setting)
+{
+ bool settings_valid = false;
+
+ if (id < SYSC_ALLOC_COUNT * sizeof(u32)) {
+ settings_valid = read_setting_valid(id, read_setting) &&
+ write_setting_valid(id, write_setting);
+ }
+
+ return settings_valid;
+}
+
+bool kbasep_pbha_supported(struct kbase_device *kbdev)
+{
+ const u32 arch_maj_rev =
+ ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id);
+
+ return (arch_maj_rev >= GPU_ID2_ARCH_MAJOR_REV_MAKE(11, 3));
+}
+
+int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime,
+ unsigned int id, unsigned int read_setting,
+ unsigned int write_setting)
+{
+ bool const valid = settings_valid(id, read_setting, write_setting);
+
+ if (valid) {
+ unsigned int const sysc_alloc_num = id / sizeof(u32);
+ u32 modified_reg;
+ if (runtime) {
+ int i;
+
+ kbase_pm_context_active(kbdev);
+ /* Ensure host copy of SYSC_ALLOC is up to date */
+ for (i = 0; i < SYSC_ALLOC_COUNT; i++)
+ kbdev->sysc_alloc[i] = kbase_reg_read(
+ kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)));
+ kbase_pm_context_idle(kbdev);
+ }
+
+ modified_reg = kbdev->sysc_alloc[sysc_alloc_num];
+
+ switch (id % sizeof(u32)) {
+ case 0:
+ modified_reg = SYSC_ALLOC_R_SYSC_ALLOC0_SET(
+ modified_reg, read_setting);
+ modified_reg = SYSC_ALLOC_W_SYSC_ALLOC0_SET(
+ modified_reg, write_setting);
+ break;
+ case 1:
+ modified_reg = SYSC_ALLOC_R_SYSC_ALLOC1_SET(
+ modified_reg, read_setting);
+ modified_reg = SYSC_ALLOC_W_SYSC_ALLOC1_SET(
+ modified_reg, write_setting);
+ break;
+ case 2:
+ modified_reg = SYSC_ALLOC_R_SYSC_ALLOC2_SET(
+ modified_reg, read_setting);
+ modified_reg = SYSC_ALLOC_W_SYSC_ALLOC2_SET(
+ modified_reg, write_setting);
+ break;
+ case 3:
+ modified_reg = SYSC_ALLOC_R_SYSC_ALLOC3_SET(
+ modified_reg, read_setting);
+ modified_reg = SYSC_ALLOC_W_SYSC_ALLOC3_SET(
+ modified_reg, write_setting);
+ break;
+ }
+
+ kbdev->sysc_alloc[sysc_alloc_num] = modified_reg;
+ }
+
+ return valid ? 0 : -EINVAL;
+}
+
+void kbase_pbha_write_settings(struct kbase_device *kbdev)
+{
+ if (kbasep_pbha_supported(kbdev)) {
+ int i;
+ for (i = 0; i < SYSC_ALLOC_COUNT; ++i)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)),
+ kbdev->sysc_alloc[i]);
+ }
+}
+
+int kbase_pbha_read_dtb(struct kbase_device *kbdev)
+{
+ u32 dtb_data[SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE];
+ const struct device_node *pbha_node;
+ int sz, i;
+ bool valid = true;
+
+ if (!kbasep_pbha_supported(kbdev))
+ return 0;
+
+ pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha");
+ if (!pbha_node)
+ return 0;
+
+ sz = of_property_count_elems_of_size(pbha_node, "int_id_override",
+ sizeof(u32));
+ if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) {
+ dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n");
+ return -EINVAL;
+ }
+ if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data,
+ sz) != 0) {
+ dev_err(kbdev->dev,
+ "Failed to read DTB pbha.int_id_override\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) {
+ unsigned int rdset =
+ SYSC_ALLOC_R_SYSC_ALLOC0_GET(dtb_data[i + 1]);
+ unsigned int wrset =
+ SYSC_ALLOC_W_SYSC_ALLOC0_GET(dtb_data[i + 1]);
+ valid = valid &&
+ (kbase_pbha_record_settings(kbdev, false, dtb_data[i],
+ rdset, wrset) == 0);
+ if (valid)
+ dev_info(kbdev->dev,
+ "pbha.int_id_override 0x%x r0x%x w0x%x\n",
+ dtb_data[i], rdset, wrset);
+ }
+ if (i != sz || (!valid)) {
+ dev_err(kbdev->dev,
+ "Failed recording DTB data (pbha.int_id_override)\n");
+ return -EINVAL;
+ }
+ return 0;
+}
diff --git a/mali_kbase/mali_kbase_pbha.h b/mali_kbase/mali_kbase_pbha.h
new file mode 100644
index 0000000..6861773
--- /dev/null
+++ b/mali_kbase/mali_kbase_pbha.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_PBHA_H
+#define _KBASE_PBHA_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbasep_pbha_supported - check whether PBHA registers are
+ * available
+ *
+ * Should only be used in mali_kbase_pbha* files - thus the
+ * kbase[p] prefix.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: True if pbha is supported, false otherwise
+ */
+bool kbasep_pbha_supported(struct kbase_device *kbdev);
+
+/**
+ * kbase_pbha_record_settings - record PBHA settings to be applied when
+ * L2 is powered down
+ *
+ * @kbdev: Device pointer
+ * @runtime: true if it's called at runtime and false if it's called on init.
+ * @id: memory access source ID
+ * @read_setting: Read setting
+ * @write_setting: Write setting
+ *
+ * Return: 0 on success, otherwise error code.
+ */
+int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime,
+ unsigned int id, unsigned int read_setting,
+ unsigned int write_setting);
+
+/**
+ * kbase_pbha_write_settings - write recorded PBHA settings to GPU
+ * registers
+ *
+ * Only valid to call this function when L2 is powered down, otherwise
+ * this will not affect PBHA settings.
+ *
+ * @kbdev: Device pointer
+ */
+void kbase_pbha_write_settings(struct kbase_device *kbdev);
+
+/**
+ * kbase_pbha_read_dtb - read PBHA settings from DTB and record it to be
+ * applied when L2 is powered down
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: 0 on success, otherwise error code.
+ */
+int kbase_pbha_read_dtb(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PBHA_H */
diff --git a/mali_kbase/mali_kbase_pbha_debugfs.c b/mali_kbase/mali_kbase_pbha_debugfs.c
new file mode 100644
index 0000000..47eab63
--- /dev/null
+++ b/mali_kbase/mali_kbase_pbha_debugfs.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_pbha_debugfs.h"
+
+#include "mali_kbase_pbha.h"
+
+#include <device/mali_kbase_device.h>
+#include <mali_kbase_reset_gpu.h>
+#include <mali_kbase.h>
+
+static int int_id_overrides_show(struct seq_file *sfile, void *data)
+{
+ struct kbase_device *kbdev = sfile->private;
+ int i;
+
+ kbase_pm_context_active(kbdev);
+
+ /* Minimal header for readability */
+ seq_puts(sfile, "// R W\n");
+ for (i = 0; i < SYSC_ALLOC_COUNT; ++i) {
+ int j;
+ u32 reg = kbase_reg_read(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)));
+
+ for (j = 0; j < sizeof(u32); ++j) {
+ u8 r_val;
+ u8 w_val;
+
+ switch (j) {
+ case 0:
+ r_val = SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg);
+ w_val = SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg);
+ break;
+ case 1:
+ r_val = SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg);
+ w_val = SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg);
+ break;
+ case 2:
+ r_val = SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg);
+ w_val = SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg);
+ break;
+ case 3:
+ r_val = SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg);
+ w_val = SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg);
+ break;
+ }
+ seq_printf(sfile, "%2zu 0x%x 0x%x\n",
+ (i * sizeof(u32)) + j, r_val, w_val);
+ }
+ }
+ kbase_pm_context_idle(kbdev);
+
+ return 0;
+}
+
+static ssize_t int_id_overrides_write(struct file *file,
+ const char __user *ubuf, size_t count,
+ loff_t *ppos)
+{
+ struct seq_file *sfile = file->private_data;
+ struct kbase_device *kbdev = sfile->private;
+ char raw_str[128];
+ unsigned int id;
+ unsigned int r_val;
+ unsigned int w_val;
+
+ if (count >= sizeof(raw_str))
+ return -E2BIG;
+ if (copy_from_user(raw_str, ubuf, count))
+ return -EINVAL;
+ raw_str[count] = '\0';
+
+ if (sscanf(raw_str, "%u %x %x", &id, &r_val, &w_val) != 3)
+ return -EINVAL;
+
+ if (kbase_pbha_record_settings(kbdev, true, id, r_val, w_val))
+ return -EINVAL;
+
+ /* This is a debugfs config write, so reset GPU such that changes take effect ASAP */
+ kbase_pm_context_active(kbdev);
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kbdev);
+ kbase_pm_context_idle(kbdev);
+
+ return count;
+}
+
+static int int_id_overrides_open(struct inode *in, struct file *file)
+{
+ return single_open(file, int_id_overrides_show, in->i_private);
+}
+
+static const struct file_operations pbha_int_id_overrides_fops = {
+ .owner = THIS_MODULE,
+ .open = int_id_overrides_open,
+ .read = seq_read,
+ .write = int_id_overrides_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kbase_pbha_debugfs_init(struct kbase_device *kbdev)
+{
+ if (kbasep_pbha_supported(kbdev)) {
+#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
+ /* only for newer kernel version debug file system is safe */
+ const mode_t mode = 0644;
+#else
+ const mode_t mode = 0600;
+#endif
+ struct dentry *debugfs_pbha_dir = debugfs_create_dir(
+ "pbha", kbdev->mali_debugfs_directory);
+ if (IS_ERR_OR_NULL(debugfs_pbha_dir)) {
+ dev_err(kbdev->dev,
+ "Couldn't create mali debugfs page-based hardware attributes directory\n");
+ return;
+ }
+
+ debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir,
+ kbdev, &pbha_int_id_overrides_fops);
+ }
+}
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h b/mali_kbase/mali_kbase_pbha_debugfs.h
index b62a8b0..3f477b4 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h
+++ b/mali_kbase/mali_kbase_pbha_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,14 +19,16 @@
*
*/
-/*
- * This header was autogenerated, it should not be edited.
- */
+#ifndef _KBASE_PBHA_DEBUGFS_H
+#define _KBASE_PBHA_DEBUGFS_H
-#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
-#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
+#include <mali_kbase.h>
-/* GPU_REGISTERS register offsets */
-#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */
+/**
+ * kbasep_pbha_debugfs_init - Initialize pbha debugfs directory
+ *
+ * @kbdev: Device pointer
+ */
+void kbase_pbha_debugfs_init(struct kbase_device *kbdev);
-#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */
+#endif /* _KBASE_PBHA_DEBUGFS_H */
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index de100dd..4078da1 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -26,6 +26,7 @@
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <mali_kbase_vinstr.h>
+#include <mali_kbase_kinstr_prfcnt.h>
#include <mali_kbase_hwcnt_context.h>
#include <mali_kbase_pm.h>
@@ -76,13 +77,13 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
if (kbdev->pm.active_count != 0)
break;
- /* FALLTHROUGH */
+ fallthrough;
case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
kbase_pm_unlock(kbdev);
return 1;
case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
- /* FALLTHROUGH */
+ fallthrough;
default:
KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
break;
@@ -147,10 +148,11 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
- /* Suspend vinstr. This blocks until the vinstr worker and timer are
- * no longer running.
+ /* Suspend HW counter intermediaries. This blocks until workers and timers
+ * are no longer running.
*/
kbase_vinstr_suspend(kbdev->vinstr_ctx);
+ kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx);
/* Disable GPU hardware counters.
* This call will block until counters are disabled.
@@ -266,8 +268,9 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif
- /* Resume vinstr */
+ /* Resume HW counters intermediaries. */
kbase_vinstr_resume(kbdev->vinstr_ctx);
+ kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx);
}
void kbase_pm_suspend(struct kbase_device *kbdev)
diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.h b/mali_kbase/mali_kbase_regs_history_debugfs.h
index 3b181d3..26decb4 100644
--- a/mali_kbase/mali_kbase_regs_history_debugfs.h
+++ b/mali_kbase/mali_kbase_regs_history_debugfs.h
@@ -70,6 +70,15 @@ void kbase_io_history_dump(struct kbase_device *kbdev);
void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
#else /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */
#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/mali_kbase/mali_kbase_reset_gpu.h b/mali_kbase/mali_kbase_reset_gpu.h
index 897b732..7502fe8 100644
--- a/mali_kbase/mali_kbase_reset_gpu.h
+++ b/mali_kbase/mali_kbase_reset_gpu.h
@@ -91,7 +91,8 @@ int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev);
* Refer to kbase_reset_gpu_prevent_and_wait() for more information.
*
* Return: 0 on success. -EAGAIN if a reset is currently happening. Other
- * negative error codes on failure.
+ * negative error codes on failure, where -ENOMEM indicates that GPU reset
+ * had failed.
*/
int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev);
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index d00bc00..6a1e782 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -24,6 +24,7 @@
#include "mali_kbase_hwcnt_types.h"
#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_gpu_narrow.h"
#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
#include "mali_malisw.h"
#include "mali_kbase_debug.h"
@@ -55,8 +56,8 @@
* @metadata: Hardware counter metadata provided by virtualizer.
* @metadata_user: API compatible hardware counter metadata provided by vinstr.
* For compatibility with the user driver interface, this
- * contains a "truncated" version of the HWCNT metadata limited
- * to 64 entries per block. NULL when not required.
+ * contains a narrowed version of the HWCNT metadata limited
+ * to 64 entries per block of 32 bits each.
* @lock: Lock protecting all vinstr state.
* @suspend_count: Suspend reference count. If non-zero, timer and worker are
* prevented from being re-scheduled.
@@ -68,7 +69,7 @@
struct kbase_vinstr_context {
struct kbase_hwcnt_virtualizer *hvirt;
const struct kbase_hwcnt_metadata *metadata;
- const struct kbase_hwcnt_metadata *metadata_user;
+ const struct kbase_hwcnt_metadata_narrow *metadata_user;
struct mutex lock;
size_t suspend_count;
size_t client_count;
@@ -89,8 +90,8 @@ struct kbase_vinstr_context {
* occur. If 0, not a periodic client.
* @enable_map: Counters enable map.
* @tmp_buf: Temporary buffer to use before handing dump to client.
- * @dump_bufs: Array of dump buffers allocated by this client.
- * @dump_bufs_meta: Metadata of dump buffers.
+ * @dump_bufs: Array of narrow dump buffers allocated by this client.
+ * @dump_bufs_meta: Metadata of hwcnt reader client buffers.
* @meta_idx: Index of metadata being accessed by userspace.
* @read_idx: Index of buffer read by userspace.
* @write_idx: Index of buffer being written by dump worker.
@@ -104,7 +105,7 @@ struct kbase_vinstr_client {
u32 dump_interval_ns;
struct kbase_hwcnt_enable_map enable_map;
struct kbase_hwcnt_dump_buffer tmp_buf;
- struct kbase_hwcnt_dump_buffer_array dump_bufs;
+ struct kbase_hwcnt_dump_buffer_narrow_array dump_bufs;
struct kbase_hwcnt_reader_metadata *dump_bufs_meta;
atomic_t meta_idx;
atomic_t read_idx;
@@ -190,7 +191,7 @@ static int kbasep_vinstr_client_dump(
unsigned int write_idx;
unsigned int read_idx;
struct kbase_hwcnt_dump_buffer *tmp_buf;
- struct kbase_hwcnt_dump_buffer *dump_buf;
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf;
struct kbase_hwcnt_reader_metadata *meta;
u8 clk_cnt;
@@ -223,17 +224,11 @@ static int kbasep_vinstr_client_dump(
* variant will explicitly zero any non-enabled counters to ensure
* nothing except exactly what the user asked for is made visible.
*
- * If the metadata in vinstr (vctx->metadata_user) is not NULL, it means
- * vinstr has the truncated metadata, so do a narrow copy since
- * virtualizer has a bigger buffer but user only needs part of it.
- * otherwise we do a full copy.
+ * A narrow copy is required since virtualizer has a bigger buffer
+ * but user only needs part of it.
*/
- if (vcli->vctx->metadata_user)
- kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf,
- &vcli->enable_map);
- else
- kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf,
- &vcli->enable_map);
+ kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf,
+ &vcli->enable_map);
clk_cnt = vcli->vctx->metadata->clk_cnt;
@@ -388,7 +383,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli)
kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli);
kfree(vcli->dump_bufs_meta);
- kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs);
+ kbase_hwcnt_dump_buffer_narrow_array_free(&vcli->dump_bufs);
kbase_hwcnt_dump_buffer_free(&vcli->tmp_buf);
kbase_hwcnt_enable_map_free(&vcli->enable_map);
kfree(vcli);
@@ -446,20 +441,11 @@ static int kbasep_vinstr_client_create(
/* Enable all the available clk_enable_map. */
vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1;
- if (vctx->metadata_user)
- /* Use vinstr's truncated metadata to alloc dump buffers which
- * interact with clients.
- */
- errcode =
- kbase_hwcnt_dump_buffer_array_alloc(vctx->metadata_user,
- setup->buffer_count,
- &vcli->dump_bufs);
- else
- /* Use metadata from virtualizer to allocate dump buffers if
- * vinstr doesn't have the truncated metadata.
- */
- errcode = kbase_hwcnt_dump_buffer_array_alloc(
- vctx->metadata, setup->buffer_count, &vcli->dump_bufs);
+ /* Use vinstr's narrowed metadata to alloc narrow dump buffers which
+ * interact with clients.
+ */
+ errcode = kbase_hwcnt_dump_buffer_narrow_array_alloc(
+ vctx->metadata_user, setup->buffer_count, &vcli->dump_bufs);
if (errcode)
goto error;
@@ -504,9 +490,8 @@ int kbase_vinstr_init(
vctx->hvirt = hvirt;
vctx->metadata = metadata;
- vctx->metadata_user = NULL;
- errcode = kbase_hwcnt_gpu_metadata_create_truncate_64(
- &vctx->metadata_user, metadata);
+ errcode = kbase_hwcnt_gpu_metadata_narrow_create(&vctx->metadata_user,
+ metadata);
if (errcode)
goto err_metadata_create;
@@ -543,8 +528,7 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
}
}
- if (vctx->metadata_user)
- kbase_hwcnt_metadata_destroy(vctx->metadata_user);
+ kbase_hwcnt_gpu_metadata_narrow_destroy(vctx->metadata_user);
WARN_ON(vctx->client_count != 0);
kfree(vctx);
@@ -1007,14 +991,8 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(
cli, (u32 __user *)arg);
break;
case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE):
- if (cli->vctx->metadata_user)
- rcode = put_user(
- (u32)cli->vctx->metadata_user->dump_buf_bytes,
- (u32 __user *)arg);
- else
- rcode = put_user(
- (u32)cli->vctx->metadata->dump_buf_bytes,
- (u32 __user *)arg);
+ rcode = put_user((u32)cli->vctx->metadata_user->dump_buf_bytes,
+ (u32 __user *)arg);
break;
case _IOC_NR(KBASE_HWCNT_READER_DUMP):
rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli);
diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h
index c0649f2..3ddfcd9 100644
--- a/mali_kbase/mali_malisw.h
+++ b/mali_kbase/mali_malisw.h
@@ -96,4 +96,9 @@
*/
#define CSTD_STR2(x) CSTD_STR1(x)
+/* LINUX_VERSION_CODE < 5.4 */
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
+#define fallthrough CSTD_NOP(...) /* fallthrough */
+#endif
+
#endif /* _MALISW_H_ */
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 05253ae..c9ba3fc 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -130,6 +130,7 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev,
if (kbase_prepare_to_reset_gpu(kbdev,
RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
+
}
KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset);
@@ -482,8 +483,6 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data)
kbase_csf_ctx_handle_fault(kctx, fault);
kbase_ctx_sched_release_ctx_lock(kctx);
- atomic_dec(&kbdev->faults_pending);
-
/* A work for GPU fault is complete.
* Till reaching here, no further GPU fault will be reported.
* Now clear the GPU fault to allow next GPU fault interrupt report.
@@ -492,6 +491,8 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CLEAR_FAULT);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ atomic_dec(&kbdev->faults_pending);
}
/**
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index 01ca419..b050be8 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -185,6 +185,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
}
/**
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index e3c5b15..5f6cc7a 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -43,7 +43,6 @@
#include <device/mali_kbase_device.h>
#include <mali_kbase_trace_gpu_mem.h>
-#define KBASE_MMU_PAGE_ENTRIES 512
/**
* kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
@@ -62,9 +61,12 @@
* If sync is set then accesses in the flushed region will be drained
* before data is flush and invalidated through L1, L2 and into memory,
* after which point this function will return.
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*/
-static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
- u64 vpfn, size_t nr, bool sync);
+static void
+kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr,
+ bool sync,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
/**
* kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
@@ -73,11 +75,13 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
* @nr: The number of pages to flush.
* @sync: Set if the operation should be synchronous or not.
* @as_nr: GPU address space number for which flush + invalidate is required.
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*
* This is used for MMU tables which do not belong to a user space context.
*/
-static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
- u64 vpfn, size_t nr, bool sync, int as_nr);
+static void kbase_mmu_flush_invalidate_no_ctx(
+ struct kbase_device *kbdev, u64 vpfn, size_t nr, bool sync, int as_nr,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
/**
* kbase_mmu_sync_pgd() - sync page directory to memory when needed.
@@ -112,6 +116,31 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
unsigned long flags, int group_id);
/**
+ * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
+ * free memory of the page directories
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @pgds: Physical addresses of page directories to be freed.
+ * @vpfn: The virtual page frame number.
+ * @level: The level of MMU page table.
+ */
+static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ phys_addr_t *pgds, u64 vpfn,
+ int level);
+/**
+ * kbase_mmu_free_pgd() - Free memory of the page directory
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @pgd: Physical address of page directory to be freed.
+ * @dirty: Flag to indicate whether the page may be dirty in the cache.
+ */
+static void kbase_mmu_free_pgd(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, phys_addr_t pgd,
+ bool dirty);
+/**
* reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
* a region on a GPU page fault
* @kbdev: KBase device
@@ -191,17 +220,31 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
}
#ifdef CONFIG_MALI_CINSTR_GWT
-static void kbase_gpu_mmu_handle_write_faulting_as(
- struct kbase_device *kbdev,
- struct kbase_as *faulting_as,
- u64 start_pfn, size_t nr, u32 op)
+static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
+ struct kbase_as *faulting_as,
+ u64 start_pfn, size_t nr,
+ u32 kctx_id)
{
+ /* Calls to this function are inherently synchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
+ struct kbase_mmu_hw_op_param op_param;
+
mutex_lock(&kbdev->mmu_hw_mutex);
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
- kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
- nr, op, 1);
+
+ /* flush L2 and unlock the VA (resumes the MMU) */
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = start_pfn,
+ .nr = nr,
+ .op = KBASE_MMU_OP_FLUSH_PT,
+ .kctx_id = kctx_id,
+ .mmu_sync_info = mmu_sync_info,
+ };
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -217,7 +260,6 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
struct kbase_device *kbdev;
struct kbase_fault *fault;
u64 fault_pfn, pfn_offset;
- u32 op;
int ret;
int as_no;
@@ -280,11 +322,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
&kbase_get_gpu_phy_pages(region)[pfn_offset],
1, region->flags, region->gpu_alloc->group_id);
- /* flush L2 and unlock the VA (resumes the MMU) */
- op = AS_COMMAND_FLUSH_PT;
-
- kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
- fault_pfn, 1, op);
+ kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
+ kctx->id);
kbase_gpu_vm_unlock(kctx);
}
@@ -554,6 +593,11 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
size_t pages_trimmed = 0;
#endif
+ /* Calls to this function are inherently synchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
+
faulting_as = container_of(data, struct kbase_as, work_pagefault);
fault = &faulting_as->pf_data;
fault_pfn = fault->addr >> PAGE_SHIFT;
@@ -720,6 +764,8 @@ page_fault_retry:
current_backed_size = kbase_reg_current_backed_size(region);
if (fault_rel_pfn < current_backed_size) {
+ struct kbase_mmu_hw_op_param op_param;
+
dev_dbg(kbdev->dev,
"Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
fault->addr, region->start_pfn,
@@ -738,8 +784,14 @@ page_fault_retry:
* transaction (which should cause the other page fault to be
* raised again).
*/
- kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
- AS_COMMAND_UNLOCK, 1);
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = 0,
+ .nr = 0,
+ .op = KBASE_MMU_OP_UNLOCK,
+ .kctx_id = kctx->id,
+ .mmu_sync_info = mmu_sync_info,
+ };
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -758,14 +810,23 @@ page_fault_retry:
new_pages);
if (new_pages == 0) {
+ struct kbase_mmu_hw_op_param op_param;
+
mutex_lock(&kbdev->mmu_hw_mutex);
/* Duplicate of a fault we've already handled, nothing to do */
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
+
/* See comment [1] about UNLOCK usage */
- kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
- AS_COMMAND_UNLOCK, 1);
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = 0,
+ .nr = 0,
+ .op = KBASE_MMU_OP_UNLOCK,
+ .kctx_id = kctx->id,
+ .mmu_sync_info = mmu_sync_info,
+ };
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -791,7 +852,7 @@ page_fault_retry:
if (grown) {
u64 pfn_offset;
- u32 op;
+ struct kbase_mmu_hw_op_param op_param;
/* alloc success */
WARN_ON(kbase_reg_current_backed_size(region) >
@@ -854,9 +915,6 @@ page_fault_retry:
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
- /* flush L2 and unlock the VA (resumes the MMU) */
- op = AS_COMMAND_FLUSH_PT;
-
/* clear MMU interrupt - this needs to be done after updating
* the page tables but before issuing a FLUSH command. The
* FLUSH cmd has a side effect that it restarts stalled memory
@@ -868,9 +926,15 @@ page_fault_retry:
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
- kbase_mmu_hw_do_operation(kbdev, faulting_as,
- fault->addr >> PAGE_SHIFT,
- new_pages, op, 1);
+ /* flush L2 and unlock the VA (resumes the MMU) */
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = fault->addr >> PAGE_SHIFT,
+ .nr = new_pages,
+ .op = KBASE_MMU_OP_FLUSH_PT,
+ .kctx_id = kctx->id,
+ .mmu_sync_info = mmu_sync_info,
+ };
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
@@ -1073,7 +1137,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev,
return -ENOMEM;
}
- kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+ kbdev->mmu_mode->entry_set_pte(page, vpfn, target_pgd);
kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
/* Rely on the caller to update the address space flags. */
@@ -1149,6 +1213,8 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
unsigned int left = to_vpfn - vpfn;
int level;
u64 *page;
+ register unsigned int num_of_valid_entries;
+ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
if (count > left)
count = left;
@@ -1159,6 +1225,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
for (level = MIDGARD_MMU_TOPLEVEL;
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+ pgds[level] = pgd;
page = kmap(phys_to_page(pgd));
if (mmu_mode->ate_is_valid(page[idx], level))
break; /* keep the mapping */
@@ -1181,15 +1248,33 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
goto next;
}
+ num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
+ if (WARN_ON_ONCE(num_of_valid_entries < pcount))
+ num_of_valid_entries = 0;
+ else
+ num_of_valid_entries -= pcount;
+
+ if (!num_of_valid_entries) {
+ kunmap(phys_to_page(pgd));
+
+ kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+
+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds,
+ vpfn, level);
+ vpfn += count;
+ continue;
+ }
+
/* Invalidate the entries we added */
for (i = 0; i < pcount; i++)
mmu_mode->entry_invalidate(&page[idx + i]);
+ mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
+
kbase_mmu_sync_pgd(kbdev,
kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
8 * pcount);
kunmap(phys_to_page(pgd));
-
next:
vpfn += count;
}
@@ -1199,8 +1284,9 @@ next:
* Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
*/
int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int const group_id)
+ struct tagged_addr phys, size_t nr,
+ unsigned long flags, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
phys_addr_t pgd;
u64 *pgd_page;
@@ -1233,12 +1319,13 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
unsigned int index = vpfn & 0x1FF;
unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
struct page *p;
+ register unsigned int num_of_valid_entries;
if (count > remain)
count = remain;
/*
- * Repeatedly calling mmu_get_bottom_pte() is clearly
+ * Repeatedly calling mmu_get_bottom_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
@@ -1264,7 +1351,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
mutex_lock(&kctx->mmu.mmu_lock);
} while (!err);
if (err) {
- dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+ dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n",
+ __func__);
if (recover_required) {
/* Invalidate the pages we have partially
* completed
@@ -1280,7 +1368,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
p = pfn_to_page(PFN_DOWN(pgd));
pgd_page = kmap(p);
if (!pgd_page) {
- dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+ dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
if (recover_required) {
/* Invalidate the pages we have partially
* completed
@@ -1294,6 +1382,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
goto fail_unlock;
}
+ num_of_valid_entries =
+ kbdev->mmu_mode->get_num_valid_entries(pgd_page);
+
for (i = 0; i < count; i++) {
unsigned int ofs = index + i;
@@ -1304,6 +1395,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id);
}
+ kbdev->mmu_mode->set_num_valid_entries(
+ pgd_page, num_of_valid_entries + count);
+
vpfn += count;
remain -= count;
@@ -1320,38 +1414,41 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
recover_count += count;
}
mutex_unlock(&kctx->mmu.mmu_lock);
- kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false);
+ kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false, mmu_sync_info);
return 0;
fail_unlock:
mutex_unlock(&kctx->mmu.mmu_lock);
- kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false);
+ kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false, mmu_sync_info);
return err;
}
-static inline void cleanup_empty_pte(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 *pte)
+static void kbase_mmu_free_pgd(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, phys_addr_t pgd,
+ bool dirty)
{
- phys_addr_t tmp_pgd;
- struct page *tmp_p;
+ struct page *p;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ p = pfn_to_page(PFN_DOWN(pgd));
- tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
- tmp_p = phys_to_page(tmp_pgd);
#ifdef CONFIG_MALI_2MB_ALLOC
kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
#else
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
#endif
- tmp_p, false);
+ p, dirty);
+
+ atomic_sub(1, &kbdev->memdev.used_pages);
- /* If the MMU tables belong to a context then we accounted the memory
- * usage to that context, so decrement here.
+ /* If MMU tables belong to a context then pages will have been accounted
+ * against it, so we must decrement the usage counts here.
*/
if (mmut->kctx) {
kbase_process_page_usage_dec(mmut->kctx, 1);
atomic_sub(1, &mmut->kctx->used_pages);
}
- atomic_sub(1, &kbdev->memdev.used_pages);
kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
}
@@ -1399,6 +1496,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
int cur_level;
+ register unsigned int num_of_valid_entries;
if (count > remain)
count = remain;
@@ -1463,14 +1561,25 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
goto fail_unlock;
}
+ num_of_valid_entries =
+ mmu_mode->get_num_valid_entries(pgd_page);
+
if (cur_level == MIDGARD_MMU_LEVEL(2)) {
int level_index = (insert_vpfn >> 9) & 0x1FF;
u64 *target = &pgd_page[level_index];
- if (mmu_mode->pte_is_valid(*target, cur_level))
- cleanup_empty_pte(kbdev, mmut, target);
+ if (mmu_mode->pte_is_valid(*target, cur_level)) {
+ kbase_mmu_free_pgd(
+ kbdev, mmut,
+ kbdev->mmu_mode->pte_to_phy_addr(
+ *target),
+ false);
+ num_of_valid_entries--;
+ }
*target = kbase_mmu_create_ate(kbdev, *phys, flags,
cur_level, group_id);
+
+ num_of_valid_entries++;
} else {
for (i = 0; i < count; i++) {
unsigned int ofs = vindex + i;
@@ -1488,8 +1597,11 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
*target = kbase_mmu_create_ate(kbdev,
phys[i], flags, cur_level, group_id);
}
+ num_of_valid_entries += count;
}
+ mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+
phys += count;
insert_vpfn += count;
remain -= count;
@@ -1513,9 +1625,10 @@ fail_unlock:
* number 'as_nr'.
*/
int kbase_mmu_insert_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int const group_id)
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
int err;
@@ -1523,10 +1636,11 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
phys, nr, flags, group_id);
if (mmut->kctx)
- kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
+ kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false,
+ mmu_sync_info);
else
- kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false,
- as_nr);
+ kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr,
+ mmu_sync_info);
return err;
}
@@ -1539,30 +1653,36 @@ KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
* @kctx: The KBase context.
* @vpfn: The virtual page frame number to start the flush on.
* @nr: The number of pages to flush.
- * @sync: Set if the operation should be synchronous or not.
*
* As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
* other locking.
*/
static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
- u64 vpfn, size_t nr, bool sync)
+ u64 vpfn, size_t nr)
{
struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_mmu_hw_op_param op_param;
int err;
- u32 op;
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
/* Early out if there is nothing to do */
if (nr == 0)
return;
- if (sync)
- op = AS_COMMAND_FLUSH_MEM;
- else
- op = AS_COMMAND_FLUSH_PT;
-
- err = kbase_mmu_hw_do_operation(kbdev,
- &kbdev->as[kctx->as_nr],
- vpfn, nr, op, 0);
+ /* flush L2 and unlock the VA (resumes the MMU) */
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = vpfn,
+ .nr = nr,
+ .op = KBASE_MMU_OP_FLUSH_MEM,
+ .kctx_id = kctx->id,
+ .mmu_sync_info = mmu_sync_info,
+ };
+ err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr],
+ &op_param);
if (err) {
/* Flush failed to complete, assume the
* GPU has hung and perform a reset to recover
@@ -1576,14 +1696,15 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
/* Perform a flush/invalidate on a particular address space
*/
-static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
- struct kbase_as *as,
- u64 vpfn, size_t nr, bool sync)
+static void
+kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
+ u64 vpfn, size_t nr, bool sync, u32 kctx_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
int err;
- u32 op;
bool gpu_powered;
unsigned long flags;
+ struct kbase_mmu_hw_op_param op_param;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
gpu_powered = kbdev->pm.backend.gpu_powered;
@@ -1611,13 +1732,19 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = vpfn,
+ .nr = nr,
+ .kctx_id = kctx_id,
+ .mmu_sync_info = mmu_sync_info,
+ };
+
if (sync)
- op = AS_COMMAND_FLUSH_MEM;
+ op_param.op = KBASE_MMU_OP_FLUSH_MEM;
else
- op = AS_COMMAND_FLUSH_PT;
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
- err = kbase_mmu_hw_do_operation(kbdev,
- as, vpfn, nr, op, 0);
+ err = kbase_mmu_hw_do_operation(kbdev, as, &op_param);
if (err) {
/* Flush failed to complete, assume the GPU has hung and
@@ -1636,18 +1763,23 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
kbase_pm_context_idle(kbdev);
}
-static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
- u64 vpfn, size_t nr, bool sync, int as_nr)
+static void
+kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, u64 vpfn,
+ size_t nr, bool sync, int as_nr,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
/* Skip if there is nothing to do */
if (nr) {
kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
- nr, sync);
+ nr, sync, 0xFFFFFFFF,
+ mmu_sync_info);
}
}
-static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
- u64 vpfn, size_t nr, bool sync)
+static void
+kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr,
+ bool sync,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
struct kbase_device *kbdev;
bool ctx_is_in_runpool;
@@ -1669,7 +1801,8 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
- vpfn, nr, sync);
+ vpfn, nr, sync, kctx->id,
+ mmu_sync_info);
release_ctx(kbdev, kctx);
}
@@ -1714,17 +1847,58 @@ void kbase_mmu_disable(struct kbase_context *kctx)
* The job scheduler code will already be holding the locks and context
* so just do the flush.
*/
- kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+ kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0);
kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
}
KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ phys_addr_t *pgds, u64 vpfn,
+ int level)
+{
+ int current_level;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0);
+ current_level--) {
+ u64 *current_page = kmap(phys_to_page(pgds[current_level]));
+ unsigned int current_valid_entries =
+ kbdev->mmu_mode->get_num_valid_entries(current_page);
+
+ if (current_valid_entries == 1 &&
+ current_level != MIDGARD_MMU_LEVEL(0)) {
+ kunmap(phys_to_page(pgds[current_level]));
+
+ kbase_mmu_free_pgd(kbdev, mmut, pgds[current_level],
+ true);
+ } else {
+ int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
+
+ kbdev->mmu_mode->entry_invalidate(&current_page[index]);
+
+ current_valid_entries--;
+
+ kbdev->mmu_mode->set_num_valid_entries(
+ current_page, current_valid_entries);
+
+ kbase_mmu_sync_pgd(kbdev,
+ kbase_dma_addr(phys_to_page(
+ pgds[current_level])) +
+ 8 * index,
+ 8 * 1);
+
+ kunmap(phys_to_page(pgds[current_level]));
+ break;
+ }
+ }
+}
+
/*
- * We actually only discard the ATE, and not the page table
- * pages. There is a potential DoS here, as we'll leak memory by
- * having PTEs that are potentially unused. Will require physical
- * page accounting, so MMU pages are part of the process allocation.
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in PGD.
*
* IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
* currently scheduled into the runpool, and so potentially uses a lot of locks.
@@ -1741,6 +1915,11 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
struct kbase_mmu_mode const *mmu_mode;
int err = -EFAULT;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
if (nr == 0) {
/* early out if nothing to do */
return 0;
@@ -1757,6 +1936,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
unsigned int pcount;
int level;
u64 *page;
+ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ register unsigned int num_of_valid_entries;
if (count > nr)
count = nr;
@@ -1793,6 +1974,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
goto next;
}
next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+ pgds[level] = pgd;
kunmap(phys_to_page(pgd));
pgd = next_pgd;
}
@@ -1829,14 +2011,34 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
continue;
}
+ num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
+ if (WARN_ON_ONCE(num_of_valid_entries < pcount))
+ num_of_valid_entries = 0;
+ else
+ num_of_valid_entries -= pcount;
+
+ if (!num_of_valid_entries) {
+ kunmap(phys_to_page(pgd));
+
+ kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+
+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds,
+ vpfn, level);
+
+ vpfn += count;
+ nr -= count;
+ continue;
+ }
+
/* Invalidate the entries we added */
for (i = 0; i < pcount; i++)
mmu_mode->entry_invalidate(&page[index + i]);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(phys_to_page(pgd)) +
- 8 * index, 8*pcount);
+ mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
+ kbase_mmu_sync_pgd(
+ kbdev, kbase_dma_addr(phys_to_page(pgd)) + 8 * index,
+ 8 * pcount);
next:
kunmap(phys_to_page(pgd));
vpfn += count;
@@ -1848,10 +2050,11 @@ out:
if (mmut->kctx)
kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr,
- true);
+ true, mmu_sync_info);
else
- kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr,
- true, as_nr);
+ kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn,
+ requested_nr, true, as_nr,
+ mmu_sync_info);
return err;
}
@@ -1903,6 +2106,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
unsigned int index = vpfn & 0x1FF;
size_t count = KBASE_MMU_PAGE_ENTRIES - index;
struct page *p;
+ register unsigned int num_of_valid_entries;
if (count > nr)
count = nr;
@@ -1940,10 +2144,22 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
goto fail_unlock;
}
- for (i = 0; i < count; i++)
+ num_of_valid_entries =
+ kbdev->mmu_mode->get_num_valid_entries(pgd_page);
+
+ for (i = 0; i < count; i++) {
+#ifdef CONFIG_MALI_DEBUG
+ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
+ pgd_page[index + i],
+ MIDGARD_MMU_BOTTOMLEVEL));
+#endif
pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
group_id);
+ }
+
+ kbdev->mmu_mode->set_num_valid_entries(pgd_page,
+ num_of_valid_entries);
phys += count;
vpfn += count;
@@ -1970,9 +2186,14 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
{
int err;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
group_id);
- kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
+ kbase_mmu_flush_invalidate(kctx, vpfn, nr, true, mmu_sync_info);
return err;
}
@@ -1981,13 +2202,18 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
int level, u64 *pgd_page_buffer)
{
phys_addr_t target_pgd;
- struct page *p;
u64 *pgd_page;
int i;
struct kbase_mmu_mode const *mmu_mode;
lockdep_assert_held(&mmut->mmu_lock);
+ /* Early-out. No need to kmap to check entries for L3 PGD. */
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+ return;
+ }
+
pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
/* kmap_atomic should NEVER fail. */
if (WARN_ON(pgd_page == NULL))
@@ -2015,25 +2241,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
}
}
- p = pfn_to_page(PFN_DOWN(pgd));
-#ifdef CONFIG_MALI_2MB_ALLOC
- kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
-#else
- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
-#endif
- p, true);
-
- atomic_sub(1, &kbdev->memdev.used_pages);
-
- /* If MMU tables belong to a context then pages will have been accounted
- * against it, so we must decrement the usage counts here.
- */
- if (mmut->kctx) {
- kbase_process_page_usage_dec(mmut->kctx, 1);
- atomic_sub(1, &mmut->kctx->used_pages);
- }
-
- kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+ kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
}
int kbase_mmu_init(struct kbase_device *const kbdev,
@@ -2293,6 +2501,13 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
}
+#if MALI_USE_CSF
+ /* Before the GPU power off, wait is done for the completion of
+ * in-flight MMU fault work items. So GPU is expected to remain
+ * powered up whilst the bus fault handling is being done.
+ */
+ kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
+#else
/* NOTE: If GPU already powered off for suspend,
* we don't need to switch to unmapped
*/
@@ -2301,6 +2516,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
kbase_pm_context_idle(kbdev);
}
+#endif
release_ctx(kbdev, kctx);
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index a2d1a8e..45a628c 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -22,6 +22,29 @@
#ifndef _KBASE_MMU_H_
#define _KBASE_MMU_H_
+#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+struct kbase_context;
+struct kbase_mmu_table;
+
+/**
+ * MMU-synchronous caller info. A pointer to this type is passed down from the outer-most callers
+ * in the kbase module - where the information resides as to the synchronous / asynchronous
+ * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to
+ * existing GPU work does it come from requests (like ioctl) from user-space, power management,
+ * etc.
+ */
+enum kbase_caller_mmu_sync_info {
+ /* default value must be invalid to avoid accidental choice ov a 'valid' value. */
+ CALLER_MMU_UNSET_SYNCHRONICITY,
+ /* Arbitrary value for 'synchronous that isn't easy to choose by accident. */
+ CALLER_MMU_SYNC = 0x02,
+ /* Also hard to choose by accident */
+ CALLER_MMU_ASYNC
+};
+
/**
* kbase_mmu_as_init() - Initialising GPU address space object.
*
@@ -111,10 +134,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
int kbase_mmu_insert_pages(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int group_id);
+ unsigned long flags, int as_nr, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int group_id);
+ struct tagged_addr phys, size_t nr,
+ unsigned long flags, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info);
int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 vpfn,
@@ -152,4 +177,22 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status,
void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
u32 as_nr, u64 address, bool as_valid);
+/**
+ * kbase_context_mmu_group_id_get - Decode a memory group ID from
+ * base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the returned group.
+ *
+ * @flags: Bitmask of flags to pass to base_context_init.
+ *
+ * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
+ */
+static inline int
+kbase_context_mmu_group_id_get(base_context_create_flags const flags)
+{
+ KBASE_DEBUG_ASSERT(flags ==
+ (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
+ return (int)BASE_CONTEXT_MMU_GROUP_ID_GET(flags);
+}
+
#endif /* _KBASE_MMU_H_ */
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index d1f1ff2..7c0e95e 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -31,6 +31,8 @@
#ifndef _KBASE_MMU_HW_H_
#define _KBASE_MMU_HW_H_
+#include "mali_kbase_mmu.h"
+
/* Forward declarations */
struct kbase_device;
struct kbase_as;
@@ -53,6 +55,42 @@ enum kbase_mmu_fault_type {
};
/**
+ * enum kbase_mmu_cache_flush_type - enum for MMU operations
+ * @KBASE_MMU_OP_NONE: To help catch uninitialized struct
+ * @KBASE_MMU_OP_FIRST: The lower boundary of enum
+ * @KBASE_MMU_OP_LOCK: Lock memory region
+ * @KBASE_MMU_OP_UNLOCK: Unlock memory region
+ * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only)
+ * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC)
+ * @KBASE_MMU_OP_COUNT: The upper boundary of enum
+ */
+enum kbase_mmu_op_type {
+ KBASE_MMU_OP_NONE = 0, /* Must be zero */
+ KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
+ KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
+ KBASE_MMU_OP_UNLOCK,
+ KBASE_MMU_OP_FLUSH_PT,
+ KBASE_MMU_OP_FLUSH_MEM,
+ KBASE_MMU_OP_COUNT /* Must be the last in enum */
+};
+
+/**
+ * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_operation()
+ * @vpfn: MMU Virtual Page Frame Number to start the operation on.
+ * @nr: Number of pages to work on.
+ * @type: Operation type (written to ASn_COMMAND).
+ * @kctx_id: Kernel context ID for MMU command tracepoint
+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
+ */
+struct kbase_mmu_hw_op_param {
+ u64 vpfn;
+ u32 nr;
+ enum kbase_mmu_op_type op;
+ u32 kctx_id;
+ enum kbase_caller_mmu_sync_info mmu_sync_info;
+};
+
+/**
* kbase_mmu_hw_configure - Configure an address space for use.
* @kbdev: kbase device to configure.
* @as: address space to configure.
@@ -67,11 +105,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
* kbase_mmu_hw_do_operation - Issue an operation to the MMU.
* @kbdev: kbase device to issue the MMU operation on.
* @as: address space to issue the MMU operation on.
- * @vpfn: MMU Virtual Page Frame Number to start the operation on.
- * @nr: Number of pages to work on.
- * @type: Operation type (written to ASn_COMMAND).
- * @handling_irq: Is this operation being called during the handling
- * of an interrupt?
+ * @op_param: parameters for the operation.
*
* Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
* is associated with the provided kbase_context over the specified range
@@ -79,8 +113,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
* Return: Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
- u64 vpfn, u32 nr, u32 type,
- unsigned int handling_irq);
+ struct kbase_mmu_hw_op_param *op_param);
/**
* kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index a99b988..6306946 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -19,12 +19,13 @@
*
*/
+#include <device/mali_kbase_device.h>
#include <linux/bitops.h>
#include <mali_kbase.h>
+#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_mem.h>
#include <mmu/mali_kbase_mmu_hw.h>
#include <tl/mali_kbase_tracepoints.h>
-#include <device/mali_kbase_device.h>
/**
* lock_region() - Generate lockaddr to lock memory region in MMU
@@ -35,47 +36,87 @@
* The lockaddr value is a combination of the starting address and
* the size of the region that encompasses all the memory pages to lock.
*
- * The size is expressed as a logarithm: it is represented in a way
- * that is compatible with the HW specification and it also determines
- * how many of the lowest bits of the address are cleared.
+ * Bits 5:0 are used to represent the size, which must be a power of 2.
+ * The smallest amount of memory to be locked corresponds to 32 kB,
+ * i.e. 8 memory pages, because a MMU cache line is made of 64 bytes
+ * and every page table entry is 8 bytes. Therefore it is not possible
+ * to lock less than 8 memory pages at a time.
+ *
+ * The size is expressed as a logarithm minus one:
+ * - A value of 14 is thus interpreted as log(32 kB) = 15, where 32 kB
+ * is the smallest possible size.
+ * - Likewise, a value of 47 is interpreted as log(256 TB) = 48, where 256 TB
+ * is the largest possible size (implementation defined value according
+ * to the HW spec).
+ *
+ * Bits 11:6 are reserved.
+ *
+ * Bits 63:12 are used to represent the base address of the region to lock.
+ * Only the upper bits of the address are used; lowest bits are cleared
+ * to avoid confusion.
+ *
+ * The address is aligned to a multiple of the region size. This has profound
+ * implications on the region size itself: often the MMU will lock a region
+ * larger than the given number of pages, because the lock region cannot start
+ * from any arbitrary address.
*
* Return: 0 if success, or an error code on failure.
*/
static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr)
{
const u64 lockaddr_base = pfn << PAGE_SHIFT;
- u64 lockaddr_size_log2, region_frame_number_start,
- region_frame_number_end;
+ const u64 lockaddr_end = ((pfn + num_pages) << PAGE_SHIFT) - 1;
+ u64 lockaddr_size_log2;
if (num_pages == 0)
return -EINVAL;
- /* The size is expressed as a logarithm and should take into account
- * the possibility that some pages might spill into the next region.
+ /* The MMU lock region is a self-aligned region whose size
+ * is a power of 2 and that contains both start and end
+ * of the address range determined by pfn and num_pages.
+ * The size of the MMU lock region can be defined as the
+ * largest divisor that yields the same result when both
+ * start and end addresses are divided by it.
+ *
+ * For instance: pfn=0x4F000 num_pages=2 describe the
+ * address range between 0x4F000 and 0x50FFF. It is only
+ * 2 memory pages. However there isn't a single lock region
+ * of 8 kB that encompasses both addresses because 0x4F000
+ * would fall into the [0x4E000, 0x4FFFF] region while
+ * 0x50000 would fall into the [0x50000, 0x51FFF] region.
+ * The minimum lock region size that includes the entire
+ * address range is 128 kB, and the region would be
+ * [0x40000, 0x5FFFF].
+ *
+ * The region size can be found by comparing the desired
+ * start and end addresses and finding the highest bit
+ * that differs. The smallest naturally aligned region
+ * must include this bit change, hence the desired region
+ * starts with this bit (and subsequent bits) set to 0
+ * and ends with the bit (and subsequent bits) set to 1.
+ *
+ * In the example above: 0x4F000 ^ 0x50FFF = 0x1FFFF
+ * therefore the highest bit that differs is bit #16
+ * and the region size (as a logarithm) is 16 + 1 = 17, i.e. 128 kB.
*/
- lockaddr_size_log2 = fls(num_pages) + PAGE_SHIFT - 1;
-
- /* Round up if the number of pages is not a power of 2. */
- if (num_pages != ((u32)1 << (lockaddr_size_log2 - PAGE_SHIFT)))
- lockaddr_size_log2 += 1;
-
- /* Round up if some memory pages spill into the next region. */
- region_frame_number_start = pfn >> (lockaddr_size_log2 - PAGE_SHIFT);
- region_frame_number_end =
- (pfn + num_pages - 1) >> (lockaddr_size_log2 - PAGE_SHIFT);
-
- if (region_frame_number_start < region_frame_number_end)
- lockaddr_size_log2 += 1;
-
- /* Represent the size according to the HW specification. */
- lockaddr_size_log2 = MAX(lockaddr_size_log2,
- KBASE_LOCK_REGION_MIN_SIZE_LOG2);
+ lockaddr_size_log2 = fls(lockaddr_base ^ lockaddr_end);
+ /* Cap the size against minimum and maximum values allowed. */
if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2)
return -EINVAL;
- /* The lowest bits are cleared and then set to size - 1 to represent
- * the size in a way that is compatible with the HW specification.
+ lockaddr_size_log2 =
+ MAX(lockaddr_size_log2, KBASE_LOCK_REGION_MIN_SIZE_LOG2);
+
+ /* Represent the result in a way that is compatible with HW spec.
+ *
+ * Upper bits are used for the base address, whose lower bits
+ * are cleared to avoid confusion because they are going to be ignored
+ * by the MMU anyway, since lock regions shall be aligned with
+ * a multiple of their size and cannot start from any address.
+ *
+ * Lower bits are used for the size, which is represented as
+ * logarithm minus one of the actual size.
*/
*lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1);
*lockaddr |= lockaddr_size_log2 - 1;
@@ -170,20 +211,30 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
}
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
- u64 vpfn, u32 nr, u32 op,
- unsigned int handling_irq)
+ struct kbase_mmu_hw_op_param *op_param)
{
int ret;
+ u64 lock_addr = 0x0;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
- if (op == AS_COMMAND_UNLOCK) {
+ if (op_param->op == KBASE_MMU_OP_UNLOCK) {
/* Unlock doesn't require a lock first */
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
- } else {
- u64 lock_addr;
- ret = lock_region(vpfn, nr, &lock_addr);
+ /* Wait for UNLOCK command to complete */
+ ret = wait_ready(kbdev, as->number);
+
+ if (!ret) {
+ /* read MMU_AS_CONTROL.LOCKADDR register */
+ lock_addr |= (u64)kbase_reg_read(kbdev,
+ MMU_AS_REG(as->number, AS_LOCKADDR_HI)) << 32;
+ lock_addr |= (u64)kbase_reg_read(kbdev,
+ MMU_AS_REG(as->number, AS_LOCKADDR_LO));
+ }
+ } else if (op_param->op >= KBASE_MMU_OP_FIRST &&
+ op_param->op < KBASE_MMU_OP_COUNT) {
+ ret = lock_region(op_param->vpfn, op_param->nr, &lock_addr);
if (!ret) {
/* Lock the region that needs to be updated */
@@ -195,12 +246,49 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
(lock_addr >> 32) & 0xFFFFFFFFUL);
write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
- /* Run the MMU operation */
- write_cmd(kbdev, as->number, op);
-
- /* Wait for the flush to complete */
+ /* Translate and send operation to HW */
+ switch (op_param->op) {
+ case KBASE_MMU_OP_FLUSH_PT:
+ write_cmd(kbdev, as->number,
+ AS_COMMAND_FLUSH_PT);
+ break;
+ case KBASE_MMU_OP_FLUSH_MEM:
+ write_cmd(kbdev, as->number,
+ AS_COMMAND_FLUSH_MEM);
+ break;
+ case KBASE_MMU_OP_LOCK:
+ /* No further operation. */
+ break;
+ default:
+ dev_warn(kbdev->dev,
+ "Unsupported MMU operation (op=%d).\n",
+ op_param->op);
+ return -EINVAL;
+ };
+
+ /* Wait for the command to complete */
ret = wait_ready(kbdev, as->number);
}
+ } else {
+ /* Code should not reach here. */
+ dev_warn(kbdev->dev, "Invalid mmu operation (op=%d).\n",
+ op_param->op);
+ return -EINVAL;
+ }
+
+ /* MMU command instrumentation */
+ if (!ret) {
+ u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr);
+ u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr);
+
+ bool is_mmu_synchronous = false;
+
+ if (op_param->mmu_sync_info == CALLER_MMU_SYNC)
+ is_mmu_synchronous = true;
+
+ KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, op_param->kctx_id,
+ op_param->op, is_mmu_synchronous,
+ lock_addr_base, lock_addr_size);
}
return ret;
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
index 16b928d..6ef4c9d 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -42,6 +42,9 @@
#define ENTRY_ACCESS_BIT (1ULL << 10)
#define ENTRY_NX_BIT (1ULL << 54)
+#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55)
+#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)
+
/* Helper Function to perform assignment of page table entries, to
* ensure the use of strd, which is required on LPAE systems.
*/
@@ -85,6 +88,7 @@ static phys_addr_t pte_to_phy_addr(u64 entry)
if (!(entry & 1))
return 0;
+ entry &= ~VALID_ENTRY_MASK;
return entry & ~0xFFF;
}
@@ -151,10 +155,48 @@ static void entry_set_ate(u64 *entry,
ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02);
}
-static void entry_set_pte(u64 *entry, phys_addr_t phy)
+static unsigned int get_num_valid_entries(u64 *pgd)
+{
+ register unsigned int num_of_valid_entries;
+
+ num_of_valid_entries =
+ (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >>
+ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8));
+ num_of_valid_entries |=
+ (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >>
+ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4));
+ num_of_valid_entries |=
+ (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >>
+ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR));
+
+ return num_of_valid_entries;
+}
+
+static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries)
+{
+ WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES);
+
+ pgd[0] &= ~VALID_ENTRY_MASK;
+ pgd[0] |= ((u64)(num_of_valid_entries & 0xF)
+ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR);
+
+ pgd[1] &= ~VALID_ENTRY_MASK;
+ pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF)
+ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR);
+
+ pgd[2] &= ~VALID_ENTRY_MASK;
+ pgd[2] |= ((u64)((num_of_valid_entries >> 8) & 0xF)
+ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR);
+}
+
+static void entry_set_pte(u64 *pgd, u64 vpfn, phys_addr_t phy)
{
- page_table_entry_set(entry, (phy & PAGE_MASK) |
- ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+ unsigned int nr_entries = get_num_valid_entries(pgd);
+
+ page_table_entry_set(&pgd[vpfn], (phy & PAGE_MASK) | ENTRY_ACCESS_BIT |
+ ENTRY_IS_PTE);
+
+ set_num_valid_entries(pgd, nr_entries + 1);
}
static void entry_invalidate(u64 *entry)
@@ -172,6 +214,8 @@ static struct kbase_mmu_mode const aarch64_mode = {
.entry_set_ate = entry_set_ate,
.entry_set_pte = entry_set_pte,
.entry_invalidate = entry_invalidate,
+ .get_num_valid_entries = get_num_valid_entries,
+ .set_num_valid_entries = set_num_valid_entries,
.flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE
};
diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
index 3b84d74..9ae2c02 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
@@ -77,13 +77,28 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
{
int ret = 1; /* Assume GPU has been powered off */
int error;
+ unsigned long flags;
- dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+ dev_dbg(kbdev->dev, "%s %p\n", __func__,
(void *)kbdev->dev->pm_domain);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(kbdev->pm.backend.gpu_powered);
+#if MALI_USE_CSF
+ if (likely(kbdev->csf.firmware_inited)) {
+ WARN_ON(!kbdev->pm.active_count);
+ WARN_ON(kbdev->pm.runtime_active);
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
enable_gpu_power_control(kbdev);
+ CSTD_UNUSED(error);
+#else
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ enable_gpu_power_control(kbdev);
error = pm_runtime_get_sync(kbdev->dev);
+
if (error == 1) {
/*
* Let core know that the chip has not been
@@ -93,22 +108,93 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
}
dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+#endif /* MALI_USE_CSF */
return ret;
}
static void pm_callback_power_off(struct kbase_device *kbdev)
{
- dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+ unsigned long flags;
+
+ dev_dbg(kbdev->dev, "%s\n", __func__);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(kbdev->pm.backend.gpu_powered);
+#if MALI_USE_CSF
+ if (likely(kbdev->csf.firmware_inited)) {
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF);
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ /* Power down the GPU immediately */
+ disable_gpu_power_control(kbdev);
+#else /* MALI_USE_CSF */
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#ifdef KBASE_PM_RUNTIME
pm_runtime_mark_last_busy(kbdev->dev);
pm_runtime_put_autosuspend(kbdev->dev);
-
-#ifndef KBASE_PM_RUNTIME
+#else
+ /* Power down the GPU immediately as runtime PM is disabled */
disable_gpu_power_control(kbdev);
#endif
+#endif /* MALI_USE_CSF */
+}
+
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+static void pm_callback_runtime_gpu_active(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ int error;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(!kbdev->pm.backend.gpu_powered);
+ WARN_ON(!kbdev->pm.active_count);
+ WARN_ON(kbdev->pm.runtime_active);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (pm_runtime_status_suspended(kbdev->dev)) {
+ error = pm_runtime_get_sync(kbdev->dev);
+ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d", error);
+ } else {
+ /* Call the async version here, otherwise there could be
+ * a deadlock if the runtime suspend operation is ongoing.
+ * Caller would have taken the kbdev->pm.lock and/or the
+ * scheduler lock, and the runtime suspend callback function
+ * will also try to acquire the same lock(s).
+ */
+ error = pm_runtime_get(kbdev->dev);
+ dev_dbg(kbdev->dev, "pm_runtime_get returned %d", error);
+ }
+
+ kbdev->pm.runtime_active = true;
}
+static void pm_callback_runtime_gpu_idle(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ dev_dbg(kbdev->dev, "%s", __func__);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(!kbdev->pm.backend.gpu_powered);
+ WARN_ON(kbdev->pm.backend.l2_state != KBASE_L2_OFF);
+ WARN_ON(kbdev->pm.active_count);
+ WARN_ON(!kbdev->pm.runtime_active);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ pm_runtime_mark_last_busy(kbdev->dev);
+ pm_runtime_put_autosuspend(kbdev->dev);
+ kbdev->pm.runtime_active = false;
+}
+#endif
+
#ifdef KBASE_PM_RUNTIME
static int kbase_device_runtime_init(struct kbase_device *kbdev)
{
@@ -124,7 +210,12 @@ static int kbase_device_runtime_init(struct kbase_device *kbdev)
if (!pm_runtime_enabled(kbdev->dev)) {
dev_warn(kbdev->dev, "pm_runtime not enabled");
- ret = -ENOSYS;
+ ret = -EINVAL;
+ } else if (atomic_read(&kbdev->dev->power.usage_count)) {
+ dev_warn(kbdev->dev,
+ "%s: Device runtime usage count unexpectedly non zero %d",
+ __func__, atomic_read(&kbdev->dev->power.usage_count));
+ ret = -EINVAL;
}
return ret;
@@ -133,9 +224,15 @@ static int kbase_device_runtime_init(struct kbase_device *kbdev)
static void kbase_device_runtime_disable(struct kbase_device *kbdev)
{
dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+
+ if (atomic_read(&kbdev->dev->power.usage_count))
+ dev_warn(kbdev->dev,
+ "%s: Device runtime usage count unexpectedly non zero %d",
+ __func__, atomic_read(&kbdev->dev->power.usage_count));
+
pm_runtime_disable(kbdev->dev);
}
-#endif
+#endif /* KBASE_PM_RUNTIME */
static int pm_callback_runtime_on(struct kbase_device *kbdev)
{
@@ -180,6 +277,14 @@ struct kbase_pm_callback_conf pm_callbacks = {
.power_runtime_on_callback = NULL,
.power_runtime_off_callback = NULL,
#endif /* KBASE_PM_RUNTIME */
+
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+ .power_runtime_gpu_idle_callback = pm_callback_runtime_gpu_idle,
+ .power_runtime_gpu_active_callback = pm_callback_runtime_gpu_active,
+#else
+ .power_runtime_gpu_idle_callback = NULL,
+ .power_runtime_gpu_active_callback = NULL,
+#endif
};
diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h
index c4c713c..79b1eac 100644
--- a/mali_kbase/tests/include/kutf/kutf_helpers.h
+++ b/mali_kbase/tests/include/kutf/kutf_helpers.h
@@ -81,4 +81,17 @@ int kutf_helper_input_enqueue(struct kutf_context *context,
*/
void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+/* kutf_helper_external_reset_gpu() - Mimic power-on-reset using external reset
+ *
+ * Reset GPU using FPGA SYSCTL register.
+ *
+ * Note that
+ * - It must be called on the platform that has FPGA SYSCTL
+ * register available such as Juno board.
+ * - It won't reinitialize GPU related settings such as interrupt for kbase.
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+int kutf_helper_external_reset_gpu(void);
+
#endif /* _KERNEL_UTF_HELPERS_H_ */
diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c
index c075428..d76cebe 100644
--- a/mali_kbase/tests/kutf/kutf_helpers.c
+++ b/mali_kbase/tests/kutf/kutf_helpers.c
@@ -21,7 +21,6 @@
/* Kernel UTF test helpers */
#include <kutf/kutf_helpers.h>
-
#include <linux/err.h>
#include <linux/jiffies.h>
#include <linux/sched.h>
@@ -29,6 +28,10 @@
#include <linux/wait.h>
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include "gpu/mali_kbase_gpu_regmap.h"
+#include <device/mali_kbase_device.h>
static DEFINE_SPINLOCK(kutf_input_lock);
@@ -128,3 +131,44 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
{
kutf_helper_input_enqueue(context, NULL, 0);
}
+
+/* Values are taken from juno-fpga.dtsi */
+#define FPGA_SYSCTL_START_ADDR ((resource_size_t)0x6f020000)
+#define FPGA_SYSCTL_SIZE ((size_t)0xCC)
+
+/* Offset of FPGA_SYSCTL_GPU_RESET_REG register */
+#define FPGA_SYSCTL_GPU_RESET_REG 0x64
+#define GPU_RESET_HIGH 0x1
+#define GPU_RESET_LOW 0x0
+
+int kutf_helper_external_reset_gpu(void)
+{
+ void __iomem *regs = NULL;
+ void __iomem *gpu_reset_reg = NULL;
+ int error = -ENXIO;
+ int repeat = 100;
+
+ regs = ioremap(FPGA_SYSCTL_START_ADDR, FPGA_SYSCTL_SIZE);
+ if (!regs)
+ return -ENOMEM;
+
+ /* Reset GPU via SYSCTL_GPU_RESET by rising & falling the reset signal */
+ gpu_reset_reg = regs + FPGA_SYSCTL_GPU_RESET_REG;
+ while (error && repeat--) {
+ writel(GPU_RESET_HIGH, gpu_reset_reg);
+ if (readl(gpu_reset_reg) == GPU_RESET_HIGH) {
+ mdelay(100);
+ writel(GPU_RESET_LOW, gpu_reset_reg);
+ mdelay(100);
+
+ /* Succeed in resetting GPU */
+ if (readl(gpu_reset_reg) == GPU_RESET_LOW)
+ error = 0;
+ }
+ }
+
+ iounmap(regs);
+
+ return error;
+}
+EXPORT_SYMBOL(kutf_helper_external_reset_gpu);
diff --git a/mali_kbase/tests/kutf/kutf_suite.c b/mali_kbase/tests/kutf/kutf_suite.c
index 6745299..d45d9df 100644
--- a/mali_kbase/tests/kutf/kutf_suite.c
+++ b/mali_kbase/tests/kutf/kutf_suite.c
@@ -582,7 +582,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func,
snprintf(name, sizeof(name), "%d", fixture_index);
test_fix->dir = debugfs_create_dir(name, test_func->dir);
- if (!test_func->dir) {
+ if (IS_ERR_OR_NULL(test_func->dir)) {
pr_err("Failed to create debugfs directory when adding fixture\n");
/* Might not be the right error, we don't get it passed back to us */
err = -EEXIST;
@@ -591,7 +591,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func,
tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
&kutf_debugfs_const_string_ops);
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
/* Might not be the right error, we don't get it passed back to us */
err = -EEXIST;
@@ -606,7 +606,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func,
"run", 0600, test_fix->dir,
test_fix,
&kutf_debugfs_run_ops);
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"run\" when adding fixture\n");
/* Might not be the right error, we don't get it passed back to us */
err = -EEXIST;
@@ -666,14 +666,14 @@ void kutf_add_test_with_filters_and_data(
INIT_LIST_HEAD(&test_func->variant_list);
test_func->dir = debugfs_create_dir(name, suite->dir);
- if (!test_func->dir) {
+ if (IS_ERR_OR_NULL(test_func->dir)) {
pr_err("Failed to create debugfs directory when adding test %s\n", name);
goto fail_dir;
}
tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
&kutf_debugfs_const_string_ops);
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
goto fail_file;
}
@@ -686,7 +686,7 @@ void kutf_add_test_with_filters_and_data(
tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
&test_func->filters);
#endif
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
goto fail_file;
}
@@ -698,7 +698,7 @@ void kutf_add_test_with_filters_and_data(
#else
tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
&test_func->test_id);
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
goto fail_file;
}
@@ -805,14 +805,14 @@ struct kutf_suite *kutf_create_suite_with_filters_and_data(
}
suite->dir = debugfs_create_dir(name, app->dir);
- if (!suite->dir) {
+ if (IS_ERR_OR_NULL(suite->dir)) {
pr_err("Failed to create debugfs directory when adding test %s\n", name);
goto fail_debugfs;
}
tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
&kutf_debugfs_const_string_ops);
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
goto fail_file;
}
@@ -913,14 +913,14 @@ struct kutf_application *kutf_create_application(const char *name)
}
app->dir = debugfs_create_dir(name, base_dir);
- if (!app->dir) {
+ if (IS_ERR_OR_NULL(app->dir)) {
pr_err("Failed to create debugfs direcotry when creating application %s\n", name);
goto fail_debugfs;
}
tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
&kutf_debugfs_const_string_ops);
- if (!tmp) {
+ if (IS_ERR_OR_NULL(tmp)) {
pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
goto fail_file;
}
@@ -1172,7 +1172,7 @@ static int __init init_kutf_core(void)
return -ENOMEM;
base_dir = debugfs_create_dir("kutf_tests", NULL);
- if (!base_dir) {
+ if (IS_ERR_OR_NULL(base_dir)) {
destroy_workqueue(kutf_workq);
kutf_workq = NULL;
return -ENOMEM;
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index 5e9a2e7..87bcb31 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -530,7 +530,7 @@ static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context,
errmsg = kutf_clk_trace_do_get_platform(context, cmd);
break;
case PORTAL_CMD_GET_CLK_RATE_MGR:
- /* Fall through */
+ fallthrough;
case PORTAL_CMD_GET_CLK_RATE_TRACE:
errmsg = kutf_clk_trace_do_get_rate(context, cmd);
break;
@@ -538,7 +538,7 @@ static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context,
errmsg = kutf_clk_trace_do_get_snapshot(context, cmd);
break;
case PORTAL_CMD_INC_PM_CTX_CNT:
- /* Fall through */
+ fallthrough;
case PORTAL_CMD_DEC_PM_CTX_CNT:
errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd);
break;
diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
index a2868da..c101563 100644
--- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
+++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
@@ -25,6 +25,8 @@
#include <mali_kbase.h>
+#define GPU_FEATURES_CROSS_STREAM_SYNC_MASK (1ull << 3ull)
+
void kbase_create_timeline_objects(struct kbase_device *kbdev)
{
unsigned int as_nr;
@@ -33,6 +35,15 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
struct kbase_timeline *timeline = kbdev->timeline;
struct kbase_tlstream *summary =
&kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY];
+ u32 const kbdev_has_cross_stream_sync =
+ (kbdev->gpu_props.props.raw_props.gpu_features &
+ GPU_FEATURES_CROSS_STREAM_SYNC_MASK) ?
+ 1 :
+ 0;
+ u32 const arch_maj = (kbdev->gpu_props.props.raw_props.gpu_id &
+ GPU_ID2_ARCH_MAJOR) >>
+ GPU_ID2_ARCH_MAJOR_SHIFT;
+ u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8;
/* Summarize the Address Space objects. */
for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
@@ -51,10 +62,11 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
kbdev);
/* Trace the creation of a new kbase device and set its properties. */
- __kbase_tlstream_tl_kbase_new_device(summary,
- kbdev->gpu_props.props.raw_props.gpu_id,
+ __kbase_tlstream_tl_kbase_new_device(
+ summary, kbdev->gpu_props.props.raw_props.gpu_id,
kbdev->gpu_props.num_cores, kbdev->csf.global_iface.group_num,
- kbdev->nr_hw_address_spaces);
+ kbdev->nr_hw_address_spaces, num_sb_entries,
+ kbdev_has_cross_stream_sync);
/* Lock the context list, to ensure no changes to the list are made
* while we're summarizing the contexts and their contents.
@@ -74,9 +86,10 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
kbdev->csf.scheduler.csg_slots[slot_i].resident_group;
if (group)
- __kbase_tlstream_tl_kbase_device_program_csg(summary,
+ __kbase_tlstream_tl_kbase_device_program_csg(
+ summary,
kbdev->gpu_props.props.raw_props.gpu_id,
- group->handle, slot_i);
+ group->kctx->id, group->handle, slot_i);
}
/* Reset body stream buffers while holding the kctx lock.
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index 09818a5..af10cf5 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -224,13 +224,6 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
timeline->obj_header_btc = obj_desc_header_size;
timeline->aux_header_btc = aux_desc_header_size;
- /* Start autoflush timer. */
- atomic_set(&timeline->autoflush_timer_active, 1);
- rcode = mod_timer(
- &timeline->autoflush_timer,
- jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
- CSTD_UNUSED(rcode);
-
#if !MALI_USE_CSF
/* If job dumping is enabled, readjust the software event's
* timeout as the default value of 3 seconds is often
@@ -258,6 +251,16 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
kbase_tlstream_current_devfreq_target(kbdev);
#endif /* CONFIG_MALI_DEVFREQ */
+ /* Start the autoflush timer.
+ * We must do this after creating timeline objects to ensure we
+ * don't auto-flush the streams which will be reset during the
+ * summarization process.
+ */
+ atomic_set(&timeline->autoflush_timer_active, 1);
+ rcode = mod_timer(&timeline->autoflush_timer,
+ jiffies +
+ msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+ CSTD_UNUSED(rcode);
} else {
ret = -EBUSY;
}
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index 2c0de01..54e51f8 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -74,6 +74,7 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_NEW_DEVICE,
KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
+ KBASE_TL_KBASE_DEVICE_HALT_CSG,
KBASE_TL_KBASE_NEW_CTX,
KBASE_TL_KBASE_DEL_CTX,
KBASE_TL_KBASE_CTX_ASSIGN_AS,
@@ -121,6 +122,17 @@ enum tl_msg_id_obj {
KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END,
KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW,
KBASE_TL_KBASE_CSFFW_RESET,
+ KBASE_TL_JS_SCHED_START,
+ KBASE_TL_JS_SCHED_END,
+ KBASE_TL_JD_SUBMIT_ATOM_START,
+ KBASE_TL_JD_SUBMIT_ATOM_END,
+ KBASE_TL_JD_DONE_NO_LOCK_START,
+ KBASE_TL_JD_DONE_NO_LOCK_END,
+ KBASE_TL_JD_DONE_START,
+ KBASE_TL_JD_DONE_END,
+ KBASE_TL_JD_ATOM_COMPLETE,
+ KBASE_TL_RUN_ATOM_START,
+ KBASE_TL_RUN_ATOM_END,
KBASE_OBJ_MSG_COUNT,
};
@@ -137,6 +149,7 @@ enum tl_msg_id_aux {
KBASE_AUX_JIT_STATS,
KBASE_AUX_TILER_HEAP_STATS,
KBASE_AUX_EVENT_JOB_SLOT,
+ KBASE_AUX_MMU_COMMAND,
KBASE_AUX_MSG_COUNT,
};
@@ -299,16 +312,20 @@ enum tl_msg_id_aux {
"gpu") \
TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \
"New KBase Device", \
- "@IIII", \
- "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count") \
+ "@IIIIII", \
+ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
"CSG is programmed to a slot", \
- "@III", \
- "kbase_device_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \
+ "@IIII", \
+ "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \
TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
"CSG is deprogrammed from a slot", \
"@II", \
"kbase_device_id,kbase_device_csg_slot_index") \
+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALT_CSG, \
+ "CSG is halted", \
+ "@II", \
+ "kbase_device_id,kbase_device_csg_slot_index") \
TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \
"New KBase Context", \
"@II", \
@@ -497,6 +514,50 @@ enum tl_msg_id_aux {
"A reset has happened with the CSFFW", \
"@L", \
"csffw_cycle") \
+ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \
+ "Scheduling starts", \
+ "@I", \
+ "dummy") \
+ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_END, \
+ "Scheduling ends", \
+ "@I", \
+ "dummy") \
+ TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_START, \
+ "Submitting an atom starts", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_END, \
+ "Submitting an atom ends", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \
+ "Within function jd_done_nolock", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \
+ "Within function jd_done_nolock - end", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \
+ "Start of kbase_jd_done", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_END, \
+ "End of kbase_jd_done", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_JD_ATOM_COMPLETE, \
+ "Atom marked complete", \
+ "@p", \
+ "atom") \
+ TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_START, \
+ "Running of atom starts", \
+ "@pI", \
+ "atom,atom_nr") \
+ TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_END, \
+ "Running of atom ends", \
+ "@pI", \
+ "atom,atom_nr") \
#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header
#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL
@@ -554,6 +615,10 @@ const size_t obj_desc_header_size = sizeof(__obj_desc_header);
"event on a given job slot", \
"@pIII", \
"ctx,slot_nr,atom_nr,event") \
+ TRACEPOINT_DESC(KBASE_AUX_MMU_COMMAND, \
+ "mmu commands with synchronicity info", \
+ "@IIILI", \
+ "kernel_ctx_id,mmu_cmd_id,mmu_synchronicity,mmu_lock_addr,mmu_lock_page_num") \
#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header
#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL
@@ -1936,12 +2001,52 @@ void __kbase_tlstream_aux_event_job_slot(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_aux_mmu_command(
+ struct kbase_tlstream *stream,
+ u32 kernel_ctx_id,
+ u32 mmu_cmd_id,
+ u32 mmu_synchronicity,
+ u64 mmu_lock_addr,
+ u32 mmu_lock_page_num)
+{
+ const u32 msg_id = KBASE_AUX_MMU_COMMAND;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kernel_ctx_id)
+ + sizeof(mmu_cmd_id)
+ + sizeof(mmu_synchronicity)
+ + sizeof(mmu_lock_addr)
+ + sizeof(mmu_lock_page_num)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &mmu_cmd_id, sizeof(mmu_cmd_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &mmu_synchronicity, sizeof(mmu_synchronicity));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &mmu_lock_addr, sizeof(mmu_lock_addr));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_new_device(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_gpu_core_count,
u32 kbase_device_max_num_csgs,
- u32 kbase_device_as_count)
+ u32 kbase_device_as_count,
+ u32 kbase_device_sb_entry_count,
+ u32 kbase_device_has_cross_stream_sync)
{
const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1949,6 +2054,8 @@ void __kbase_tlstream_tl_kbase_new_device(
+ sizeof(kbase_device_gpu_core_count)
+ sizeof(kbase_device_max_num_csgs)
+ sizeof(kbase_device_as_count)
+ + sizeof(kbase_device_sb_entry_count)
+ + sizeof(kbase_device_has_cross_stream_sync)
;
char *buffer;
unsigned long acq_flags;
@@ -1966,6 +2073,10 @@ void __kbase_tlstream_tl_kbase_new_device(
pos, &kbase_device_max_num_csgs, sizeof(kbase_device_max_num_csgs));
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_as_count, sizeof(kbase_device_as_count));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_sb_entry_count, sizeof(kbase_device_sb_entry_count));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_has_cross_stream_sync, sizeof(kbase_device_has_cross_stream_sync));
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
@@ -1973,12 +2084,14 @@ void __kbase_tlstream_tl_kbase_new_device(
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
+ u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
u32 kbase_device_csg_slot_index)
{
const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ sizeof(kbase_device_id)
+ + sizeof(kernel_ctx_id)
+ sizeof(gpu_cmdq_grp_handle)
+ sizeof(kbase_device_csg_slot_index)
;
@@ -1993,6 +2106,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_id, sizeof(kbase_device_id));
pos = kbasep_serialize_bytes(buffer,
+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
+ pos = kbasep_serialize_bytes(buffer,
pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle));
pos = kbasep_serialize_bytes(buffer,
pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
@@ -2026,6 +2141,32 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_kbase_device_halt_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index)
+{
+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(kbase_device_id)
+ + sizeof(kbase_device_csg_slot_index)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_id, sizeof(kbase_device_id));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
void __kbase_tlstream_tl_kbase_new_ctx(
struct kbase_tlstream *stream,
u32 kernel_ctx_id,
@@ -3216,4 +3357,254 @@ void __kbase_tlstream_tl_kbase_csffw_reset(
kbase_tlstream_msgbuf_release(stream, acq_flags);
}
+void __kbase_tlstream_tl_js_sched_start(
+ struct kbase_tlstream *stream,
+ u32 dummy)
+{
+ const u32 msg_id = KBASE_TL_JS_SCHED_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(dummy)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &dummy, sizeof(dummy));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_js_sched_end(
+ struct kbase_tlstream *stream,
+ u32 dummy)
+{
+ const u32 msg_id = KBASE_TL_JS_SCHED_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(dummy)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &dummy, sizeof(dummy));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_submit_atom_start(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_submit_atom_end(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_done_no_lock_start(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_done_no_lock_end(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_done_start(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_DONE_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_done_end(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_DONE_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jd_atom_complete(
+ struct kbase_tlstream *stream,
+ const void *atom)
+{
+ const u32 msg_id = KBASE_TL_JD_ATOM_COMPLETE;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_run_atom_start(
+ struct kbase_tlstream *stream,
+ const void *atom,
+ u32 atom_nr)
+{
+ const u32 msg_id = KBASE_TL_RUN_ATOM_START;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ + sizeof(atom_nr)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom_nr, sizeof(atom_nr));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_run_atom_end(
+ struct kbase_tlstream *stream,
+ const void *atom,
+ u32 atom_nr)
+{
+ const u32 msg_id = KBASE_TL_RUN_ATOM_END;
+ const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+ + sizeof(atom)
+ + sizeof(atom_nr)
+ ;
+ char *buffer;
+ unsigned long acq_flags;
+ size_t pos = 0;
+
+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_serialize_timestamp(buffer, pos);
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom, sizeof(atom));
+ pos = kbasep_serialize_bytes(buffer,
+ pos, &atom_nr, sizeof(atom_nr));
+
+ kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
/* clang-format on */
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index 887a1aa..3fc871c 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -296,21 +296,35 @@ void __kbase_tlstream_aux_event_job_slot(
u32 slot_nr,
u32 atom_nr,
u32 event);
+void __kbase_tlstream_aux_mmu_command(
+ struct kbase_tlstream *stream,
+ u32 kernel_ctx_id,
+ u32 mmu_cmd_id,
+ u32 mmu_synchronicity,
+ u64 mmu_lock_addr,
+ u32 mmu_lock_page_num);
void __kbase_tlstream_tl_kbase_new_device(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_gpu_core_count,
u32 kbase_device_max_num_csgs,
- u32 kbase_device_as_count);
+ u32 kbase_device_as_count,
+ u32 kbase_device_sb_entry_count,
+ u32 kbase_device_has_cross_stream_sync);
void __kbase_tlstream_tl_kbase_device_program_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
+ u32 kernel_ctx_id,
u32 gpu_cmdq_grp_handle,
u32 kbase_device_csg_slot_index);
void __kbase_tlstream_tl_kbase_device_deprogram_csg(
struct kbase_tlstream *stream,
u32 kbase_device_id,
u32 kbase_device_csg_slot_index);
+void __kbase_tlstream_tl_kbase_device_halt_csg(
+ struct kbase_tlstream *stream,
+ u32 kbase_device_id,
+ u32 kbase_device_csg_slot_index);
void __kbase_tlstream_tl_kbase_new_ctx(
struct kbase_tlstream *stream,
u32 kernel_ctx_id,
@@ -491,6 +505,41 @@ void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
void __kbase_tlstream_tl_kbase_csffw_reset(
struct kbase_tlstream *stream,
u64 csffw_cycle);
+void __kbase_tlstream_tl_js_sched_start(
+ struct kbase_tlstream *stream,
+ u32 dummy);
+void __kbase_tlstream_tl_js_sched_end(
+ struct kbase_tlstream *stream,
+ u32 dummy);
+void __kbase_tlstream_tl_jd_submit_atom_start(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_jd_submit_atom_end(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_jd_done_no_lock_start(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_jd_done_no_lock_end(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_jd_done_start(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_jd_done_end(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_jd_atom_complete(
+ struct kbase_tlstream *stream,
+ const void *atom);
+void __kbase_tlstream_tl_run_atom_start(
+ struct kbase_tlstream *stream,
+ const void *atom,
+ u32 atom_nr);
+void __kbase_tlstream_tl_run_atom_end(
+ struct kbase_tlstream *stream,
+ const void *atom,
+ u32 atom_nr);
struct kbase_tlstream;
@@ -1593,14 +1642,48 @@ struct kbase_tlstream;
} while (0)
/**
+ * KBASE_TLSTREAM_AUX_MMU_COMMAND -
+ * mmu commands with synchronicity info
+ *
+ * @kbdev: Kbase device
+ * @kernel_ctx_id: Unique ID for the KBase Context
+ * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE)
+ * @mmu_synchronicity: Indicates whether the command is related to current running job
+ * that needs to be resolved to make it progress (synchronous, e.g.
+ * grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls
+ * from user-space). This param will be 0 if it is an asynchronous
+ * operation.
+ * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated
+ * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated
+ */
+#define KBASE_TLSTREAM_AUX_MMU_COMMAND( \
+ kbdev, \
+ kernel_ctx_id, \
+ mmu_cmd_id, \
+ mmu_synchronicity, \
+ mmu_lock_addr, \
+ mmu_lock_page_num \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_aux_mmu_command( \
+ __TL_DISPATCH_STREAM(kbdev, aux), \
+ kernel_ctx_id, mmu_cmd_id, mmu_synchronicity, mmu_lock_addr, mmu_lock_page_num); \
+ } while (0)
+
+/**
* KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE -
* New KBase Device
*
* @kbdev: Kbase device
- * @kbase_device_id: The id of the physical hardware
+ * @kbase_device_id: The ID of the physical hardware
* @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware
* @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports
* @kbase_device_as_count: The number of address spaces the physical hardware has available
+ * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the
+ * physical hardware has available
+ * @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \
@@ -1608,14 +1691,16 @@ struct kbase_tlstream;
kbase_device_id, \
kbase_device_gpu_core_count, \
kbase_device_max_num_csgs, \
- kbase_device_as_count \
+ kbase_device_as_count, \
+ kbase_device_sb_entry_count, \
+ kbase_device_has_cross_stream_sync \
) \
do { \
int enabled = atomic_read(&kbdev->timeline_flags); \
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
__kbase_tlstream_tl_kbase_new_device( \
__TL_DISPATCH_STREAM(kbdev, obj), \
- kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count); \
+ kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count, kbase_device_sb_entry_count, kbase_device_has_cross_stream_sync); \
} while (0)
#else
#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \
@@ -1623,7 +1708,9 @@ struct kbase_tlstream;
kbase_device_id, \
kbase_device_gpu_core_count, \
kbase_device_max_num_csgs, \
- kbase_device_as_count \
+ kbase_device_as_count, \
+ kbase_device_sb_entry_count, \
+ kbase_device_has_cross_stream_sync \
) \
do { } while (0)
#endif /* MALI_USE_CSF */
@@ -1633,7 +1720,8 @@ struct kbase_tlstream;
* CSG is programmed to a slot
*
* @kbdev: Kbase device
- * @kbase_device_id: The id of the physical hardware
+ * @kbase_device_id: The ID of the physical hardware
+ * @kernel_ctx_id: Unique ID for the KBase Context
* @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
* @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
*/
@@ -1641,6 +1729,7 @@ struct kbase_tlstream;
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \
kbdev, \
kbase_device_id, \
+ kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index \
) \
@@ -1649,12 +1738,13 @@ struct kbase_tlstream;
if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \
__kbase_tlstream_tl_kbase_device_program_csg( \
__TL_DISPATCH_STREAM(kbdev, obj), \
- kbase_device_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index); \
+ kbase_device_id, kernel_ctx_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index); \
} while (0)
#else
#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \
kbdev, \
kbase_device_id, \
+ kernel_ctx_id, \
gpu_cmdq_grp_handle, \
kbase_device_csg_slot_index \
) \
@@ -1666,7 +1756,7 @@ struct kbase_tlstream;
* CSG is deprogrammed from a slot
*
* @kbdev: Kbase device
- * @kbase_device_id: The id of the physical hardware
+ * @kbase_device_id: The ID of the physical hardware
* @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
*/
#if MALI_USE_CSF
@@ -1692,12 +1782,33 @@ struct kbase_tlstream;
#endif /* MALI_USE_CSF */
/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG -
+ * CSG is halted
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ */
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( \
+ kbdev, \
+ kbase_device_id, \
+ kbase_device_csg_slot_index \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_kbase_device_halt_csg( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ kbase_device_id, kbase_device_csg_slot_index); \
+ } while (0)
+
+/**
* KBASE_TLSTREAM_TL_KBASE_NEW_CTX -
* New KBase Context
*
* @kbdev: Kbase device
* @kernel_ctx_id: Unique ID for the KBase Context
- * @kbase_device_id: The id of the physical hardware
+ * @kbase_device_id: The ID of the physical hardware
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \
@@ -1935,7 +2046,7 @@ struct kbase_tlstream;
* @cqs_obj_gpu_addr: CQS Object GPU pointer
* @cqs_obj_compare_value: Semaphore value that should be exceeded
* for the WAIT to pass
- * @cqs_obj_inherit_error: Indicates the error state should be inherited into the queue or not
+ * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
*/
#if MALI_USE_CSF
#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \
@@ -3091,6 +3202,219 @@ struct kbase_tlstream;
do { } while (0)
#endif /* MALI_USE_CSF */
+/**
+ * KBASE_TLSTREAM_TL_JS_SCHED_START -
+ * Scheduling starts
+ *
+ * @kbdev: Kbase device
+ * @dummy: dummy argument
+ */
+#define KBASE_TLSTREAM_TL_JS_SCHED_START( \
+ kbdev, \
+ dummy \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_js_sched_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ dummy); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JS_SCHED_END -
+ * Scheduling ends
+ *
+ * @kbdev: Kbase device
+ * @dummy: dummy argument
+ */
+#define KBASE_TLSTREAM_TL_JS_SCHED_END( \
+ kbdev, \
+ dummy \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_js_sched_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ dummy); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START -
+ * Submitting an atom starts
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_submit_atom_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END -
+ * Submitting an atom ends
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_submit_atom_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START -
+ * Within function jd_done_nolock
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_done_no_lock_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END -
+ * Within function jd_done_nolock - end
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_done_no_lock_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_DONE_START -
+ * Start of kbase_jd_done
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_DONE_START( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_done_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_DONE_END -
+ * End of kbase_jd_done
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_DONE_END( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_done_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE -
+ * Atom marked complete
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE( \
+ kbdev, \
+ atom \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_jd_atom_complete( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RUN_ATOM_START -
+ * Running of atom starts
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ * @atom_nr: Sequential number of an atom
+ */
+#define KBASE_TLSTREAM_TL_RUN_ATOM_START( \
+ kbdev, \
+ atom, \
+ atom_nr \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_run_atom_start( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom, atom_nr); \
+ } while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RUN_ATOM_END -
+ * Running of atom ends
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ * @atom_nr: Sequential number of an atom
+ */
+#define KBASE_TLSTREAM_TL_RUN_ATOM_END( \
+ kbdev, \
+ atom, \
+ atom_nr \
+ ) \
+ do { \
+ int enabled = atomic_read(&kbdev->timeline_flags); \
+ if (enabled & TLSTREAM_ENABLED) \
+ __kbase_tlstream_tl_run_atom_end( \
+ __TL_DISPATCH_STREAM(kbdev, obj), \
+ atom, atom_nr); \
+ } while (0)
+
/* Gator tracepoints are hooked into TLSTREAM interface.
* When the following tracepoints are called, corresponding