diff options
author | Siddharth Kapoor <ksiddharth@google.com> | 2022-01-07 19:09:01 +0800 |
---|---|---|
committer | Siddharth Kapoor <ksiddharth@google.com> | 2022-01-07 19:09:01 +0800 |
commit | 0207d6c3b7a2002f15c60d08617e956faf5ba90c (patch) | |
tree | eae0afe608a70b25f64e959db2b782fb33f89160 | |
parent | 0c596dc70431fa2c70021fa1685e3efc969a852d (diff) | |
download | gpu-0207d6c3b7a2002f15c60d08617e956faf5ba90c.tar.gz |
Mali Valhall Android DDK r35p0 KMD
Provenance: 3e260085ac (collaborate/EAC/v_r35p0)
VX504X08X-BU-00000-r35p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r35p0-01eac0 - Valhall Android Document Bundle
VX504X08X-DC-11001-r35p0-01eac0 - Valhall Android DDK Software Errata
VX504X08X-SW-99006-r35p0-01eac0 - Valhall Android Renderscript AOSP parts
Signed-off-by: Siddharth Kapoor <ksiddharth@google.com>
Change-Id: Id9ef73da49680e2935a827c40d54169545f7162e
119 files changed, 7469 insertions, 1916 deletions
diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h new file mode 100644 index 0000000..9d677ca --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _UAPI_KBASE_MODEL_DUMMY_H_ +#define _UAPI_KBASE_MODEL_DUMMY_H_ + +#include <linux/types.h> + +#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) +#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ + (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) + +#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4 +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60 +#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \ + (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + \ + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK) +#define KBASE_DUMMY_MODEL_BLOCK_SIZE \ + (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) +#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 +#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ + (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) +#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \ + KBASE_DUMMY_MODEL_COUNTER_PER_CORE) + +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) +#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) +#define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull) +#define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull) + +#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h index a5dc745..1d15f57 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h @@ -251,8 +251,20 @@ /* CS_KERNEL_INPUT_BLOCK register set definitions */ /* GLB_VERSION register */ #define GLB_VERSION_PATCH_SHIFT (0) +#define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT) +#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT) +#define GLB_VERSION_PATCH_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK)) #define GLB_VERSION_MINOR_SHIFT (16) +#define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT) +#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT) +#define GLB_VERSION_MINOR_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK)) #define GLB_VERSION_MAJOR_SHIFT (24) +#define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT) +#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT) +#define GLB_VERSION_MAJOR_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK)) /* CS_REQ register */ #define CS_REQ_STATE_SHIFT 0 @@ -935,6 +947,7 @@ (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) + /* End of CSG_INPUT_BLOCK register set definitions */ /* CSG_OUTPUT_BLOCK register set definitions */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index ec4870c..3df8a01 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -46,10 +46,14 @@ * trace configurations with CSF trace_command. * 1.6: * - Added new HW performance counters interface to all GPUs. + * 1.7: + * - Added reserved field to QUEUE_GROUP_CREATE ioctl for future use + * 1.8: + * - Removed Kernel legacy HWC interface */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 5 +#define BASE_UK_VERSION_MINOR 8 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -179,6 +183,50 @@ struct kbase_ioctl_cs_queue_terminate { _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate) /** + * union kbase_ioctl_cs_queue_group_create_1_6 - Create a GPU command queue + * group + * @in: Input parameters + * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @in.compute_mask: Mask of compute endpoints the group is allowed to use. + * @in.cs_min: Minimum number of CSs required. + * @in.priority: Queue group's priority within a process. + * @in.tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @in.fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @in.compute_max: Maximum number of compute endpoints the group is allowed + * to use. + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.group_handle: Handle of a newly created queue group. + * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. + */ +union kbase_ioctl_cs_queue_group_create_1_6 { + struct { + __u64 tiler_mask; + __u64 fragment_mask; + __u64 compute_mask; + __u8 cs_min; + __u8 priority; + __u8 tiler_max; + __u8 fragment_max; + __u8 compute_max; + __u8 padding[3]; + + } in; + struct { + __u8 group_handle; + __u8 padding[3]; + __u32 group_uid; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \ + _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6) + +/** * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group * @in: Input parameters * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. @@ -209,7 +257,7 @@ union kbase_ioctl_cs_queue_group_create { __u8 fragment_max; __u8 compute_max; __u8 padding[3]; - + __u64 reserved; } in; struct { __u8 group_handle; @@ -218,8 +266,8 @@ union kbase_ioctl_cs_queue_group_create { } out; }; -#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ - _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create) +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ + _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create) /** * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h index 4001a4c..b1720ed 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -250,6 +250,17 @@ GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE)) +/* Clean and invalidate L2, LSC, and Other caches */ +#define GPU_COMMAND_CACHE_CLN_INV_FULL \ + GPU_COMMAND_CODE_PAYLOAD( \ + GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE)) + +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) + /* Places the GPU in protected mode */ #define GPU_COMMAND_SET_PROTECTED_MODE \ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h index dcadcc7..ecf812c 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -175,6 +175,7 @@ /* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ #define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) #define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8) #define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) #define JS_CONFIG_START_MMU (1u << 10) #define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) @@ -264,6 +265,11 @@ /* GPU_COMMAND cache flush alias to CSF command payload */ #define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES #define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES + +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ + ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) /* IRQ flags */ #define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 2598e20..d957dea 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -121,9 +121,11 @@ * - Added ioctl 55: set_limited_core_count. * 11.32: * - Added new HW performance counters interface to all GPUs. + * 11.33: + * - Removed Kernel legacy HWC interface */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 31 +#define BASE_UK_VERSION_MINOR 33 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h index 15843ee..2cdd29c 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -126,6 +126,7 @@ enum prfcnt_list_type { #define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0) #define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1) +#define PRFCNT_REQUEST_TYPE_SCOPE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 2) #define PRFCNT_SAMPLE_META_TYPE_SAMPLE \ FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0) @@ -150,6 +151,7 @@ struct prfcnt_item_header { * @PRFCNT_BLOCK_TYPE_TILER: Tiler. * @PRFCNT_BLOCK_TYPE_MEMORY: Memory System. * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core. + * @PRFCNT_BLOCK_TYPE_RESERVED: Reserved. */ enum prfcnt_block_type { PRFCNT_BLOCK_TYPE_FE, @@ -160,10 +162,11 @@ enum prfcnt_block_type { }; /** - * enum prfcnt_block_set - Type of performance counter block set. + * enum prfcnt_set - Type of performance counter block set. * @PRFCNT_SET_PRIMARY: Primary. * @PRFCNT_SET_SECONDARY: Secondary. * @PRFCNT_SET_TERTIARY: Tertiary. + * @PRFCNT_SET_RESERVED: Reserved. */ enum prfcnt_set { PRFCNT_SET_PRIMARY, @@ -176,19 +179,19 @@ enum prfcnt_set { * struct prfcnt_enum_block_counter - Performance counter block descriptor. * @block_type: Type of performance counter block. * @set: Which SET this represents: primary, secondary or tertiary. + * @pad: Padding bytes. * @num_instances: How many instances of this block type exist in the hardware. * @num_values: How many entries in the values array there are for samples * from this block. - * @pad: Padding bytes. * @counter_mask: Bitmask that indicates the availability of counters in this * block. */ struct prfcnt_enum_block_counter { __u8 block_type; __u8 set; - __u8 num_instances; - __u8 num_values; - __u8 pad[4]; + __u8 pad[2]; + __u16 num_instances; + __u16 num_values; __u64 counter_mask[2]; }; @@ -206,12 +209,14 @@ struct prfcnt_enum_request { /** * struct prfcnt_enum_item - Performance counter enumeration item. - * @hdr: Header describing the type of item in the list. - * @block_counter: Performance counter block descriptor. - * @request: Request descriptor. + * @hdr: Header describing the type of item in the list. + * @u: Structure containing discriptor for enumeration item type. + * @u.block_counter: Performance counter block descriptor. + * @u.request: Request descriptor. */ struct prfcnt_enum_item { struct prfcnt_item_header hdr; + /** union u - union of block_counter and request */ union { struct prfcnt_enum_block_counter block_counter; struct prfcnt_enum_request request; @@ -222,6 +227,7 @@ struct prfcnt_enum_item { * enum prfcnt_mode - Capture mode for counter sampling. * @PRFCNT_MODE_MANUAL: Manual sampling mode. * @PRFCNT_MODE_PERIODIC: Periodic sampling mode. + * @PRFCNT_MODE_RESERVED: Reserved. */ enum prfcnt_mode { PRFCNT_MODE_MANUAL, @@ -231,16 +237,19 @@ enum prfcnt_mode { /** * struct prfcnt_request_mode - Mode request descriptor. - * @mode: Capture mode for the session, either manual or periodic. - * @pad: Padding bytes. - * @period_us: Period in microseconds, for periodic mode. + * @mode: Capture mode for the session, either manual or periodic. + * @pad: Padding bytes. + * @mode_config: Structure containing configuration for periodic mode. + * @mode_config.period: Periodic config. + * @mode_config.period.period_ns: Period in nanoseconds, for periodic mode. */ struct prfcnt_request_mode { __u8 mode; __u8 pad[7]; + /** union mode_config - request mode configuration*/ union { struct { - __u64 period_us; + __u64 period_ns; } periodic; } mode_config; }; @@ -261,16 +270,40 @@ struct prfcnt_request_enable { }; /** + * enum prfcnt_scope - Scope of performance counters. + * @PRFCNT_SCOPE_GLOBAL: Global scope. + * @PRFCNT_SCOPE_RESERVED: Reserved. + */ +enum prfcnt_scope { + PRFCNT_SCOPE_GLOBAL, + PRFCNT_SCOPE_RESERVED = 255, +}; + +/** + * struct prfcnt_request_scope - Scope request descriptor. + * @scope: Scope of the performance counters to capture. + * @pad: Padding bytes. + */ +struct prfcnt_request_scope { + __u8 scope; + __u8 pad[7]; +}; + +/** * struct prfcnt_request_item - Performance counter request item. - * @hdr: Header describing the type of item in the list. - * @req_mode: Mode request descriptor. - * @req_enable: Enable request descriptor. + * @hdr: Header describing the type of item in the list. + * @u: Structure containing descriptor for request type. + * @u.req_mode: Mode request descriptor. + * @u.req_enable: Enable request descriptor. + * @u.req_scope: Scope request descriptor. */ struct prfcnt_request_item { struct prfcnt_item_header hdr; + /** union u - union on req_mode and req_enable */ union { struct prfcnt_request_mode req_mode; struct prfcnt_request_enable req_enable; + struct prfcnt_request_scope req_scope; } u; }; @@ -278,12 +311,19 @@ struct prfcnt_request_item { * enum prfcnt_request_type - Type of request descriptor. * @PRFCNT_REQUEST_MODE: Specify the capture mode to be used for the session. * @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture. + * @PRFCNT_REQUEST_SCOPE: Specify the scope of the performance counters. */ enum prfcnt_request_type { PRFCNT_REQUEST_MODE, PRFCNT_REQUEST_ENABLE, + PRFCNT_REQUEST_SCOPE, }; +/* This sample contains overflows from dump duration stretch because the sample buffer was full */ +#define SAMPLE_FLAG_OVERFLOW (1u << 0) +/* This sample has had an error condition for sample duration */ +#define SAMPLE_FLAG_ERROR (1u << 30) + /** * struct prfcnt_sample_metadata - Metadata for counter sample data. * @timestamp_start: Earliest timestamp that values in this sample represent. @@ -292,6 +332,7 @@ enum prfcnt_request_type { * GET_SAMPLE. * @user_data: User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_* * @flags: Property flags. + * @pad: Padding bytes. */ struct prfcnt_sample_metadata { __u64 timestamp_start; @@ -302,18 +343,25 @@ struct prfcnt_sample_metadata { __u32 pad; }; +/* Maximum number of domains a metadata for clock cycles can refer to */ +#define MAX_REPORTED_DOMAINS (4) + /** * struct prfcnt_clock_metadata - Metadata for clock cycles. * @num_domains: Number of domains this metadata refers to. + * @pad: Padding bytes. * @cycles: Number of cycles elapsed in each counter domain between - * timestamp_start and timestamp_end. + * timestamp_start and timestamp_end. Valid only for the + * first @p num_domains. */ struct prfcnt_clock_metadata { __u32 num_domains; __u32 pad; - __u64 *cycles; + __u64 cycles[MAX_REPORTED_DOMAINS]; }; +/* This block state is unknown */ +#define BLOCK_STATE_UNKNOWN (0) /* This block was powered on for at least some portion of the sample */ #define BLOCK_STATE_ON (1 << 0) /* This block was powered off for at least some portion of the sample */ @@ -336,10 +384,12 @@ struct prfcnt_clock_metadata { * @block_type: Type of performance counter block. * @block_idx: Index of performance counter block. * @set: Set of performance counter block. + * @pad_u8: Padding bytes. * @block_state: Bits set indicate the states which the block is known * to have operated in during this sample. * @values_offset: Offset from the start of the mmapped region, to the values * for this block. The values themselves are an array of __u64. + * @pad_u32: Padding bytes. */ struct prfcnt_block_metadata { __u8 block_type; @@ -351,6 +401,14 @@ struct prfcnt_block_metadata { __u32 pad_u32; }; +/** + * struct prfcnt_metadata - Performance counter metadata item. + * @hdr: Header describing the type of item in the list. + * @u: Structure containing descriptor for metadata type. + * @u.sample_md: Counter sample data metadata descriptor. + * @u.clock_md: Clock cycles metadata descriptor. + * @u.block_md: Counter block metadata descriptor. + */ struct prfcnt_metadata { struct prfcnt_item_header hdr; union { @@ -360,5 +418,67 @@ struct prfcnt_metadata { } u; }; +/** + * enum prfcnt_control_cmd_code - Control command code for client session. + * @PRFCNT_CONTROL_CMD_START: Start the counter data dump run for + * the calling client session. + * @PRFCNT_CONTROL_CMD_STOP: Stop the counter data dump run for the + * calling client session. + * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC: Trigger a synchronous manual sample. + * @PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: Trigger an asynchronous manual sample. + * @PRFCNT_CONTROL_CMD_DISCARD: Discard all samples which have not yet + * been consumed by userspace. Note that + * this can race with new samples if + * HWC_CMD_STOP is not called first. + */ +enum prfcnt_control_cmd_code { + PRFCNT_CONTROL_CMD_START = 1, + PRFCNT_CONTROL_CMD_STOP, + PRFCNT_CONTROL_CMD_SAMPLE_SYNC, + PRFCNT_CONTROL_CMD_SAMPLE_ASYNC, + PRFCNT_CONTROL_CMD_DISCARD, +}; + +/** struct prfcnt_control_cmd - Control command + * @cmd: Control command for the session. + * @pad: Padding bytes. + * @user_data: Pointer to user data, which will be returned as part of + * sample metadata. It only affects a single sample if used + * with CMD_SAMPLE_SYNC or CMD_SAMPLE_ASYNC. It affects all + * samples between CMD_START and CMD_STOP if used with the + * periodic sampling. + */ +struct prfcnt_control_cmd { + __u16 cmd; + __u16 pad[3]; + __u64 user_data; +}; + +/** struct prfcnt_sample_access - Metadata to access a sample. + * @sequence: Sequence number for the sample. + * For GET_SAMPLE, it will be set by the kernel. + * For PUT_SAMPLE, it shall be equal to the same value + * provided by the kernel for GET_SAMPLE. + * @sample_offset_bytes: Offset from the start of the mapped area to the first + * entry in the metadata list (sample_metadata) for this + * sample. + */ +struct prfcnt_sample_access { + __u64 sequence; + __u64 sample_offset_bytes; +}; + +/* The ids of ioctl commands, on a reader file descriptor, magic number */ +#define KBASE_KINSTR_PRFCNT_READER 0xBF +/* Ioctl ID for issuing a session operational command */ +#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \ + _IOW(KBASE_KINSTR_PRFCNT_READER, 0x00, struct prfcnt_control_cmd) +/* Ioctl ID for fetching a dumpped sample */ +#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \ + _IOR(KBASE_KINSTR_PRFCNT_READER, 0x01, struct prfcnt_sample_access) +/* Ioctl ID for release internal buffer of the previously fetched sample */ +#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \ + _IOW(KBASE_KINSTR_PRFCNT_READER, 0x10, struct prfcnt_sample_access) + #endif /* _UAPI_KBASE_HWCNT_READER_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h index 8e1ed55..63dd3c8 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -171,34 +171,6 @@ struct kbase_ioctl_hwcnt_reader_setup { _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) /** - * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection - * @dump_buffer: GPU address to write counters to - * @fe_bm: counters selection bitmask (Front end) - * @shader_bm: counters selection bitmask (Shader) - * @tiler_bm: counters selection bitmask (Tiler) - * @mmu_l2_bm: counters selection bitmask (MMU_L2) - */ -struct kbase_ioctl_hwcnt_enable { - __u64 dump_buffer; - __u32 fe_bm; - __u32 shader_bm; - __u32 tiler_bm; - __u32 mmu_l2_bm; -}; - -/* This IOCTL is deprecated as of R33, and will be removed in R35. */ -#define KBASE_IOCTL_HWCNT_ENABLE \ - _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) - -/* This IOCTL is deprecated as of R33, and will be removed in R35. */ -#define KBASE_IOCTL_HWCNT_DUMP \ - _IO(KBASE_IOCTL_TYPE, 10) - -/* This IOCTL is deprecated as of R33, and will be removed in R35. */ -#define KBASE_IOCTL_HWCNT_CLEAR \ - _IO(KBASE_IOCTL_TYPE, 11) - -/** * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. * @data: Counter samples for the dummy model. * @size: Size of the counter sample data. diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index e253f1c..afc0f83 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -71,7 +71,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"r34p0-00dev1"' +MALI_RELEASE_NAME ?= '"r35p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_DEBUG), y) MALI_UNIT_TEST = 1 @@ -82,8 +82,6 @@ else endif MALI_COVERAGE ?= 0 -CONFIG_MALI_PLATFORM_NAME ?= "devicetree" - # Kconfig passes in the name with quotes for in-tree builds - remove them. MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) @@ -122,7 +120,6 @@ ccflags-y = \ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \ - -DMALI_KBASE_BUILD \ -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) @@ -166,7 +163,6 @@ mali_kbase-y := \ mali_kbase_hwcnt.o \ mali_kbase_hwcnt_gpu.o \ mali_kbase_hwcnt_gpu_narrow.o \ - mali_kbase_hwcnt_legacy.o \ mali_kbase_hwcnt_types.o \ mali_kbase_hwcnt_virtualizer.o \ mali_kbase_softjobs.o \ @@ -206,6 +202,7 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \ ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) mali_kbase-y += \ mali_kbase_hwcnt_backend_csf.o \ + mali_kbase_hwcnt_watchdog_if_timer.o \ mali_kbase_hwcnt_backend_csf_if_fw.o else mali_kbase-y += \ diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index 099da33..850b257 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -34,10 +34,19 @@ endif CONFIG_MALI_MIDGARD ?= m ifeq ($(CONFIG_MALI_MIDGARD),m) + CONFIG_MALI_PLATFORM_NAME ?= "devicetree" CONFIG_MALI_GATOR_SUPPORT ?= y CONFIG_MALI_ARBITRATION ?= n CONFIG_MALI_PARTITION_MANAGER ?= n + ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined) + CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION) + endif + + ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined) + CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION) + endif + ifneq ($(CONFIG_MALI_NO_MALI),y) # Prevent misuse when CONFIG_MALI_NO_MALI=y CONFIG_MALI_REAL_HW ?= y @@ -135,6 +144,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m) else # Prevent misuse when CONFIG_MALI_MIDGARD=n CONFIG_MALI_ARBITRATION = n + CONFIG_MALI_ARBITER_MODULES = n + CONFIG_MALI_GPU_POWER_MODULES = n CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n @@ -148,6 +159,8 @@ CONFIGS := \ CONFIG_MALI_DMA_FENCE \ CONFIG_MALI_ARBITER_SUPPORT \ CONFIG_MALI_ARBITRATION \ + CONFIG_MALI_ARBITER_MODULES \ + CONFIG_MALI_GPU_POWER_MODULES \ CONFIG_MALI_PARTITION_MANAGER \ CONFIG_MALI_REAL_HW \ CONFIG_MALI_GEM5_BUILD \ @@ -191,6 +204,8 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \ $(value config)=$(value $(value config)), \ $(value config)=n)) +MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) + # # EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build # @@ -201,6 +216,8 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ $(if $(filter y m,$(value $(value config))), \ -D$(value config)=1)) +EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) + # # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions # diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c index 62ff4fd..5425f2b 100644 --- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c +++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c @@ -1053,8 +1053,8 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, mutex_lock(&arb_freq->arb_freq_lock); if (arb_freq->arb_freq != freq) { - ndata.new_rate = freq * KHZ_TO_HZ; - ndata.old_rate = arb_freq->arb_freq * KHZ_TO_HZ; + ndata.new_rate = (unsigned long)freq * KHZ_TO_HZ; + ndata.old_rate = (unsigned long)arb_freq->arb_freq * KHZ_TO_HZ; ndata.gpu_clk_handle = arb_freq; arb_freq->arb_freq = freq; arb_freq->freq_updated = true; diff --git a/mali_kbase/arbitration/Kconfig b/mali_kbase/arbitration/Kconfig index b4d6202..1935c81 100644 --- a/mali_kbase/arbitration/Kconfig +++ b/mali_kbase/arbitration/Kconfig @@ -27,5 +27,23 @@ config MALI_XEN virtualization setup for Mali If unsure, say N. +config MALI_ARBITER_MODULES + tristate "Enable mali arbiter modules" + depends on MALI_ARBITRATION + default y + help + Enables the build of the arbiter modules used in the reference + virtualization setup for Mali + If unsure, say N + +config MALI_GPU_POWER_MODULES + tristate "Enable gpu power modules" + depends on MALI_ARBITRATION + default y + help + Enables the build of the gpu power modules used in the reference + virtualization setup for Mali + If unsure, say N + source "drivers/gpu/arm/midgard/arbitration/ptm/Kconfig" diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index 5dbcff3..90bf6cd 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -47,3 +47,8 @@ endif mali_kbase-$(CONFIG_MALI_DEVFREQ) += \ backend/gpu/mali_kbase_devfreq.o +# Dummy model +mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o +mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o +# HW error simulation +mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h index df30b63..a6ee959 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -64,13 +64,12 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can * run as using the device tree, and save this * within kbdev. + * @kbdev: Pointer to kbase device. * * This function could be called from kbase_clk_rate_trace_manager_init, * but is left separate as it can be called as soon as * dev_pm_opp_of_add_table() has been called to initialize the OPP table. * - * @kbdev: Pointer to kbase device. - * * Return: 0 in any case. */ int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev); diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c index b117e57..a7110b3 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c @@ -43,7 +43,7 @@ * This function will be called only when the opp table which is compatible with * "operating-points-v2-mali", is not present in the devicetree for GPU device. * - * Return: Voltage value in milli volts, 0 in case of error. + * Return: Voltage value in micro volts, 0 in case of error. */ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) { @@ -69,8 +69,8 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) rcu_read_unlock(); #endif - /* Return the voltage in milli volts */ - return voltage / 1000; + /* Return the voltage in micro volts */ + return voltage; } void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, @@ -116,6 +116,9 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) struct dev_pm_opp *opp; unsigned long nominal_freq; unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; +#if IS_ENABLED(CONFIG_REGULATOR) + unsigned long original_freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; +#endif unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; unsigned int i; u64 core_mask; @@ -187,6 +190,9 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) err = clk_set_rate(kbdev->clocks[i], freqs[i]); if (!err) { +#if IS_ENABLED(CONFIG_REGULATOR) + original_freqs[i] = kbdev->current_freqs[i]; +#endif kbdev->current_freqs[i] = freqs[i]; } else { dev_err(dev, "Failed to set clock %lu (target %lu)\n", @@ -200,7 +206,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) for (i = 0; i < kbdev->nr_clocks; i++) { if (kbdev->regulators[i] && kbdev->current_voltages[i] != volts[i] && - kbdev->current_freqs[i] > freqs[i]) { + original_freqs[i] > freqs[i]) { int err; err = regulator_set_voltage(kbdev->regulators[i], diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.h b/mali_kbase/backend/gpu/mali_kbase_devfreq.h index 901827e..ac88b02 100644 --- a/mali_kbase/backend/gpu/mali_kbase_devfreq.h +++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.h @@ -55,6 +55,7 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, * This function will only perform translation if an operating-points-v2-mali * table is present in devicetree. If one is not present then it will return an * untranslated frequency (and corresponding voltage) and all cores enabled. + * The voltages returned are in micro Volts (uV). */ void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask, unsigned long *freqs, unsigned long *volts); diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c index 7b04286..268a888 100644 --- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c @@ -46,10 +46,10 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, registers.core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); #else /* !MALI_USE_CSF */ - if (((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TGRX) || - ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TVAX)) + if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TDUX) || + ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TODX))) registers.core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); #endif /* MALI_USE_CSF */ diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c index 90cc537..1691a87 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c @@ -119,29 +119,62 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, return err; } +static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) +{ + u32 irq_mask; + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->hwcnt.lock); + + if (kbase_is_gpu_removed(kbdev)) + /* GPU has been removed by Arbiter */ + return; + + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); + + /* Disable the counters */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); + + kbdev->hwcnt.kctx = NULL; + kbdev->hwcnt.addr = 0ULL; + kbdev->hwcnt.addr_bytes = 0ULL; +} + int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; int err = -EINVAL; - u32 irq_mask; struct kbase_device *kbdev = kctx->kbdev; while (1) { spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { + /* Instrumentation is in unrecoverable error state, + * there is nothing for us to do. + */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + /* Already disabled, return no error. */ + return 0; + } + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - goto out; + return err; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - goto out; + return err; } if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) @@ -158,25 +191,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; kbdev->hwcnt.backend.triggered = 0; - if (kbase_is_gpu_removed(kbdev)) { - /* GPU has been removed by Arbiter */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - err = 0; - goto out; - } - - /* Disable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~PRFCNT_SAMPLE_COMPLETED); - - /* Disable the counters */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); - - kbdev->hwcnt.kctx = NULL; - kbdev->hwcnt.addr = 0ULL; - kbdev->hwcnt.addr_bytes = 0ULL; + kbasep_instr_hwc_disable_hw_prfcnt(kbdev); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); @@ -184,9 +199,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx); - err = 0; - out: - return err; + return 0; } int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) @@ -204,7 +217,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { /* HW counters are disabled or another dump is ongoing, or we're - * resetting + * resetting, or we are in unrecoverable error state. */ goto unlock; } @@ -274,6 +287,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* If the state is in unrecoverable error, we already wake_up the waiter + * and don't need to do any action when sample is done. + */ + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); @@ -302,6 +319,8 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { err = -EINVAL; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { + err = -EIO; } else { /* Dump done */ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == @@ -322,8 +341,8 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Check it's the context previously set up and we're not already - * dumping + /* Check it's the context previously set up and we're not in IDLE + * state. */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) @@ -347,6 +366,48 @@ out: } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); +void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* If we already in unrecoverable error state, early return. */ + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return; + } + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR; + + /* Need to disable HW if it's not disabled yet. */ + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) + kbasep_instr_hwc_disable_hw_prfcnt(kbdev); + + /* Wake up any waiters. */ + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_unrecoverable_error); + +void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* A reset is the only way to exit the unrecoverable error state */ + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_before_reset); + int kbase_instr_backend_init(struct kbase_device *kbdev) { spin_lock_init(&kbdev->hwcnt.lock); diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h index e356348..7190f42 100644 --- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h @@ -38,8 +38,12 @@ enum kbase_instr_state { KBASE_INSTR_STATE_IDLE, /* Hardware is currently dumping a frame. */ KBASE_INSTR_STATE_DUMPING, - /* An error has occured during DUMPING (page fault). */ - KBASE_INSTR_STATE_FAULT + /* An error has occurred during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT, + /* An unrecoverable error has occurred, a reset is the only way to exit + * from unrecoverable error state. + */ + KBASE_INSTR_STATE_UNRECOVERABLE_ERROR, }; /* Structure used for instrumentation and HW counters dumping */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h index e29ace7..3ce3903 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h @@ -38,10 +38,31 @@ struct rb_entry { struct kbase_jd_atom *katom; }; +/* SLOT_RB_TAG_PURGED assumes a value that is different from + * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of + * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x). + */ +#define SLOT_RB_TAG_PURGED ((u64)(1 << 1)) +#define SLOT_RB_NULL_TAG_VAL ((u64)0) + +/** + * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a + * u64 for serving as tagged value. + */ +#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx)) /** * struct slot_rb - Slot ringbuffer * @entries: Ringbuffer entries - * @last_context: The last context to submit a job on this slot + * @last_kctx_tagged: The last context that submitted a job to the slot's + * HEAD_NEXT register. The value is a tagged variant so + * must not be dereferenced. It is used in operation to + * track when shader core L1 caches might contain a + * previous context's data, and so must only be set to + * SLOT_RB_NULL_TAG_VAL after reset/powerdown of the + * cores. In slot job submission, if there is a kctx + * change, and the relevant katom is configured with + * BASE_JD_REQ_SKIP_CACHE_START, a L1 read only cache + * maintenace operation is enforced. * @read_idx: Current read index of buffer * @write_idx: Current write index of buffer * @job_chain_flag: Flag used to implement jobchain disambiguation @@ -49,7 +70,7 @@ struct rb_entry { struct slot_rb { struct rb_entry entries[SLOT_RB_SIZE]; - struct kbase_context *last_context; + u64 last_kctx_tagged; u8 read_idx; u8 write_idx; diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 001efd9..ec3b906 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -33,6 +33,7 @@ #include <mali_kbase_reset_gpu.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_kinstr_jm.h> +#include <mali_kbase_hwaccess_instr.h> #include <mali_kbase_hwcnt_context.h> #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_irq_internal.h> @@ -198,7 +199,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, u32 cfg; u64 const jc_head = select_job_chain(katom); u64 affinity; + struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; + lockdep_assert_held(&kbdev->hwaccess_lock); KBASE_DEBUG_ASSERT(kbdev); KBASE_DEBUG_ASSERT(katom); @@ -227,9 +230,23 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; - if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) - cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; - else + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) { + /* Force a cache maintenance operation if the newly submitted + * katom to the slot is from a different kctx. For a JM GPU + * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, + * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a + * FLUSH_CLEAN_INVALIDATE. + */ + u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged; + + if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) { + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER)) + cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER; + else + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + } else + cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; + } else cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && @@ -246,13 +263,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; - if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { + if (!ptr_slot_rb->job_chain_flag) { cfg |= JS_CONFIG_JOB_CHAIN_FLAG; katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; - kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = true; + ptr_slot_rb->job_chain_flag = true; } else { katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; - kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = false; + ptr_slot_rb->job_chain_flag = false; } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); @@ -290,6 +307,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); kbase_kinstr_jm_atom_hw_submit(katom); + + /* Update the slot's last katom submission kctx */ + ptr_slot_rb->last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx); + #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -300,7 +321,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, sizeof(js_string)), ktime_to_ns(katom->start_timestamp), (u32)katom->kctx->id, 0, katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; } #endif @@ -823,7 +843,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) if (timeout != 0) goto exit; - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) { dev_err(kbdev->dev, "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ZAP_TIMEOUT); @@ -938,6 +958,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, JS_COMMAND_HARD_STOP); + CSTD_UNUSED(stopped); } /** @@ -1177,6 +1198,13 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_metrics_update(kbdev, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* Tell hardware counters a reset is about to occur. + * If the instr backend is in an unrecoverable error state (e.g. due to + * HW being unresponsive), this will transition the backend out of + * it, on the assumption a reset will fix whatever problem there was. + */ + kbase_instr_hwcnt_on_before_reset(kbdev); + /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); @@ -1309,7 +1337,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * @kbdev: kbase device * @flags: Bitfield indicating impact of reset (see flag defines) * - * This function just soft-stops all the slots to ensure that as many jobs as + * This function soft-stops all the slots to ensure that as many jobs as * possible are saved. * * Return: @@ -1323,7 +1351,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, { int i; - CSTD_UNUSED(flags); KBASE_DEBUG_ASSERT(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -1335,6 +1362,9 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, } #endif + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) + kbase_instr_hwcnt_on_unrecoverable_error(kbdev); + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_PREPARED) != diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index 1906286..0f2f296 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -760,6 +760,13 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, /* ***TRANSITION TO HIGHER STATE*** */ fallthrough; case KBASE_ATOM_EXIT_PROTECTED_RESET: + /* L2 cache has been turned off (which is needed prior to the reset of GPU + * to exit the protected mode), so the override flag can be safely cleared. + * Even if L2 cache is powered up again before the actual reset, it should + * not be an issue (there are no jobs running on the GPU). + */ + kbase_pm_protected_override_disable(kbdev); + /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); @@ -768,7 +775,6 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, if (err) { kbdev->protected_mode_transition = false; - kbase_pm_protected_override_disable(kbdev); /* Failed to exit protected mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; @@ -1069,9 +1075,9 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, /** * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is * related to a failed JSn_HEAD atom - * @kbdev kbase device - * @js job slot to check - * @completion_code completion code of the failed atom + * @kbdev: kbase device + * @js: job slot to check + * @completion_code: completion code of the failed atom * * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but * unlike other failure codes we _can_ re-run them. @@ -1129,6 +1135,14 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, if (next_katom->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + /* On evicting the next_katom, the last submission kctx on the + * given job slot then reverts back to the one that owns katom. + * The aim is to enable the next submission that can determine + * if the read only shader core L1 cache should be invalidated. + */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = + SLOT_RB_TAG_KCTX(katom->kctx); + return true; } @@ -1137,11 +1151,11 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, /** * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD - * @kbdev kbase device - * @js job slot to check - * @completion_code completion code of the completed atom - * @job_tail value read from JSn_TAIL, for STOPPED atoms - * @end_timestamp pointer to approximate ktime value when the katom completed + * @kbdev: kbase device + * @js: job slot to check + * @completion_code: completion code of the completed atom + * @job_tail: value read from JSn_TAIL, for STOPPED atoms + * @end_timestamp: pointer to approximate ktime value when the katom completed * * Among other operations, this also executes step 2 of a 2-step process of * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), @@ -1323,8 +1337,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, ktime_to_ns(*end_timestamp), (u32)next_katom->kctx->id, 0, next_katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = - next_katom->kctx; } else { char js_string[16]; @@ -1333,7 +1345,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, sizeof(js_string)), ktime_to_ns(ktime_get()), 0, 0, 0); - kbdev->hwaccess.backend.slot_rb[js].last_context = 0; } } #endif @@ -1427,6 +1438,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; kbase_jm_complete(kbdev, katom, end_timestamp); } + + /* Clear the slot's last katom submission kctx on reset */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; } /* Re-enable GPU hardware counters if we're resetting from protected @@ -1649,6 +1663,11 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, kbase_gpu_remove_atom(kbdev, katom_idx1, action, true); + /* Revert the last_context. */ + kbdev->hwaccess.backend.slot_rb[js] + .last_kctx_tagged = + SLOT_RB_TAG_KCTX(katom_idx0->kctx); + stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); @@ -1724,6 +1743,10 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); + /* Revert the last_context, or mark as purged */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = + kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) : + SLOT_RB_TAG_PURGED; } else { /* idx0 has already completed - stop * idx1 @@ -1753,7 +1776,8 @@ void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->need_cache_flush_cores_retained) { - kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_start_cache_clean(kbdev, + GPU_COMMAND_CACHE_CLN_INV_FULL); kbase_gpu_wait_cache_clean(kbdev); katom->need_cache_flush_cores_retained = false; @@ -1811,3 +1835,34 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + int js; + bool tracked = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged; + + if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) { + /* Marking the slot kctx tracking field is purged */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED; + tracked = true; + } + } + + if (tracked) { + /* The context had run some jobs before the purge, other slots + * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as + * purged as well. + */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged == + SLOT_RB_NULL_TAG_VAL) + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = + SLOT_RB_TAG_PURGED; + } + } +} diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c index 7131546..c2d7a26 100644 --- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c @@ -26,7 +26,7 @@ #include "mali_kbase_l2_mmu_config.h" /** - * struct l2_mmu_config_limit_region + * struct l2_mmu_config_limit_region - L2 MMU limit field * * @value: The default value to load into the L2_MMU_CONFIG register * @mask: The shifted mask of the field in the L2_MMU_CONFIG register @@ -39,7 +39,7 @@ struct l2_mmu_config_limit_region { }; /** - * struct l2_mmu_config_limit + * struct l2_mmu_config_limit - L2 MMU read and write limit * * @product_model: The GPU for which this entry applies * @read: Values for the read limit field diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c new file mode 100644 index 0000000..ccf0e7c --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -0,0 +1,2008 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* NOTES: + * - A default GPU can be compiled in during the build, by defining + * CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that + * insmod'ing mali_kbase.ko with no arguments after a build with "scons + * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be + * overridden by passing the 'no_mali_gpu' argument to insmod. + * + * - if CONFIG_MALI_ERROR_INJECT is defined the error injection system is + * activated. + */ + +/* Implementation of failure injection system: + * + * Error conditions are generated by gpu_generate_error(). + * According to CONFIG_MALI_ERROR_INJECT definition gpu_generate_error() either + * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or + * checks if there is (in error_track_list) an error configuration to be set for + * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). + * Each error condition will trigger a specific "state" for a certain set of + * registers as per Midgard Architecture Specifications doc. + * + * According to Midgard Architecture Specifications doc the following registers + * are always affected by error conditions: + * + * JOB Exception: + * JOB_IRQ_RAWSTAT + * JOB<n> STATUS AREA + * + * MMU Exception: + * MMU_IRQ_RAWSTAT + * AS<n>_FAULTSTATUS + * AS<n>_FAULTADDRESS + * + * GPU Exception: + * GPU_IRQ_RAWSTAT + * GPU_FAULTSTATUS + * GPU_FAULTADDRESS + * + * For further clarification on the model behaviour upon specific error + * conditions the user may refer to the Midgard Architecture Specification + * document + */ +#include <mali_kbase.h> +#include <gpu/mali_kbase_gpu_regmap.h> +#include <backend/gpu/mali_kbase_model_dummy.h> +#include <mali_kbase_mem_linux.h> + +#if MALI_USE_CSF +#include <csf/mali_kbase_csf_firmware.h> + +/* Index of the last value register for each type of core, with the 1st value + * register being at index 0. + */ +#define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1) + +/* Array for storing the value of SELECT register for each type of core */ +static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM]; +static bool ipa_control_timer_enabled; +#endif + +#define LO_MASK(M) ((M) & 0xFFFFFFFF) + +static u32 get_implementation_register(u32 reg) +{ + switch (reg) { + case GPU_CONTROL_REG(SHADER_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT); + case GPU_CONTROL_REG(TILER_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); + case GPU_CONTROL_REG(L2_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); + case GPU_CONTROL_REG(STACK_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT); + + case GPU_CONTROL_REG(SHADER_PRESENT_HI): + case GPU_CONTROL_REG(TILER_PRESENT_HI): + case GPU_CONTROL_REG(L2_PRESENT_HI): + case GPU_CONTROL_REG(STACK_PRESENT_HI): + /* *** FALLTHROUGH *** */ + default: + return 0; + } +} + +struct { + unsigned long prfcnt_base; + u32 *prfcnt_base_cpu; + struct kbase_device *kbdev; + struct tagged_addr *pages; + size_t page_count; + + u32 time; + + struct { + u32 jm; + u32 tiler; + u32 l2; + u32 shader; + } prfcnt_en; + + u64 l2_present; + u64 shader_present; + +#if !MALI_USE_CSF + u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +#else + u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +#endif /* !MALI_USE_CSF */ + u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * + KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * + KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + +} performance_counters = { + .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, +}; + +struct job_slot { + int job_active; + int job_queued; + int job_complete_irq_asserted; + int job_irq_mask; + int job_disabled; +}; + +/** + * struct control_reg_values_t - control register values specific to the GPU being 'emulated' + * @name: GPU name + * @gpu_id: GPU ID to report + * @as_present: Bitmap of address spaces present + * @thread_max_threads: Maximum number of threads per core + * @thread_max_workgroup_size: Maximum number of threads per workgroup + * @thread_max_barrier_size: Maximum number of threads per barrier + * @thread_features: Thread features, NOT INCLUDING the 2 + * most-significant bits, which are always set to + * IMPLEMENTATION_MODEL. + * @core_features: Core features + * @tiler_features: Tiler features + * @mmu_features: MMU features + * @gpu_features_lo: GPU features (low) + * @gpu_features_hi: GPU features (high) + */ +struct control_reg_values_t { + const char *name; + u32 gpu_id; + u32 as_present; + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + u32 core_features; + u32 tiler_features; + u32 mmu_features; + u32 gpu_features_lo; + u32 gpu_features_hi; +}; + +struct dummy_model_t { + int reset_completed; + int reset_completed_mask; + int prfcnt_sample_completed; + int power_changed_mask; /* 2bits: _ALL,_SINGLE */ + int power_changed; /* 1bit */ + bool clean_caches_completed; + bool clean_caches_completed_irq_enabled; + int power_on; /* 6bits: SHADER[4],TILER,L2 */ + u32 stack_power_on_lo; + u32 coherency_enable; + unsigned int job_irq_js_state; + struct job_slot slots[NUM_SLOTS]; + const struct control_reg_values_t *control_reg_values; + u32 l2_config; + void *data; +}; + +void gpu_device_set_data(void *model, void *data) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)model; + + dummy->data = data; +} + +void *gpu_device_get_data(void *model) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)model; + + return dummy->data; +} + +#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 + +/* SCons should pass in a default GPU, but other ways of building (e.g. + * in-tree) won't, so define one here in case. + */ +#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU +#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" +#endif + +static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; +module_param(no_mali_gpu, charp, 0000); +MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); + +/* Construct a value for the THREAD_FEATURES register, *except* the two most + * significant bits, which are set to IMPLEMENTATION_MODEL in + * midgard_model_read_reg(). + */ +#if MALI_USE_CSF +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ + ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) +#else +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ + ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) +#endif + +/* Array associating GPU names with control register values. The first + * one is used in the case of no match. + */ +static const struct control_reg_values_t all_control_reg_values[] = { + { + .name = "tMIx", + .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tHEx", + .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tSIx", + .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0), + .as_present = 0xFF, + .thread_max_threads = 0x300, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x209, + .mmu_features = 0x2821, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tDVx", + .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x300, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x209, + .mmu_features = 0x2821, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tNOx", + .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tGOx_r0p0", + .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tGOx_r1p0", + .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .core_features = 0x2, + .tiler_features = 0x209, + .mmu_features = 0x2823, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tTRx", + .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tNAx", + .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tBEx", + .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tBAx", + .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tDUx", + .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tODx", + .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tGRx", + .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .core_features = 0x0, /* core_1e16fma2tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tVAx", + .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .core_features = 0x0, /* core_1e16fma2tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + }, + { + .name = "tTUx", + .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x800, + .thread_max_workgroup_size = 0x400, + .thread_max_barrier_size = 0x400, + .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0), + .core_features = 0x0, /* core_1e32fma2tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0xf, + .gpu_features_hi = 0, + }, +}; + +struct error_status_t hw_error_status; + +#if MALI_USE_CSF +static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, + u32 cnt_idx, bool is_low_word) +{ + u64 *counters_data; + u32 core_count = 0; + u32 event_index; + u64 value = 0; + u32 core; + + if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) + return 0; + + if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)) + return 0; + + event_index = + (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; + + /* Currently only primary counter blocks are supported */ + if (WARN_ON(event_index >= 64)) + return 0; + + /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, + * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for + * IPA counters. If selected, the value returned for them will be zero. + */ + if (WARN_ON(event_index <= 3)) + return 0; + + event_index -= 4; + + switch (core_type) { + case KBASE_IPA_CORE_TYPE_CSHW: + core_count = 1; + counters_data = performance_counters.cshw_counters; + break; + case KBASE_IPA_CORE_TYPE_MEMSYS: + core_count = hweight64(performance_counters.l2_present); + counters_data = performance_counters.l2_counters; + break; + case KBASE_IPA_CORE_TYPE_TILER: + core_count = 1; + counters_data = performance_counters.tiler_counters; + break; + case KBASE_IPA_CORE_TYPE_SHADER: + core_count = hweight64(performance_counters.shader_present); + counters_data = performance_counters.shader_counters; + break; + default: + WARN(1, "Invalid core_type %d\n", core_type); + break; + } + + for (core = 0; core < core_count; core++) { + value += counters_data[event_index]; + event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; + } + + if (is_low_word) + return (value & U32_MAX); + else + return (value >> 32); +} + +void gpu_model_clear_prfcnt_values(void) +{ + memset(performance_counters.cshw_counters, 0, + sizeof(performance_counters.cshw_counters)); + + memset(performance_counters.tiler_counters, 0, + sizeof(performance_counters.tiler_counters)); + + memset(performance_counters.l2_counters, 0, + sizeof(performance_counters.l2_counters)); + + memset(performance_counters.shader_counters, 0, + sizeof(performance_counters.shader_counters)); +} +KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); +#endif + +/** + * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer + * + * @values: Array of values to be written out + * @out_index: Index into performance counter buffer + * @block_count: Number of blocks to dump + * @prfcnt_enable_mask: Counter enable mask + * @blocks_present: Available blocks bit mask + */ +static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, + u32 block_count, + u32 prfcnt_enable_mask, + u64 blocks_present) +{ + u32 block_idx, counter; + u32 counter_value = 0; + u32 *prfcnt_base; + u32 index = 0; + + prfcnt_base = performance_counters.prfcnt_base_cpu; + + for (block_idx = 0; block_idx < block_count; block_idx++) { + /* only dump values if core is present */ + if (!(blocks_present & (1 << block_idx))) { +#if MALI_USE_CSF + /* if CSF dump zeroed out block */ + memset(&prfcnt_base[*out_index], 0, + KBASE_DUMMY_MODEL_BLOCK_SIZE); + *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK; +#endif /* MALI_USE_CSF */ + continue; + } + + /* write the header */ + prfcnt_base[*out_index] = performance_counters.time++; + prfcnt_base[*out_index+2] = prfcnt_enable_mask; + *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; + + /* write the counters */ + for (counter = 0; + counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; + counter++) { + /* HW counter values retrieved through + * PRFCNT_SAMPLE request are of 32 bits only. + */ + counter_value = (u32)values[index++]; + if (KBASE_DUMMY_MODEL_COUNTER_ENABLED( + prfcnt_enable_mask, (counter + + KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { + prfcnt_base[*out_index + counter] = + counter_value; + } + } + *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; + } +} + +/** + * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values + * + * Used to ensure counter values are not lost if cache invalidation is performed + * prior to reading. + */ +static void gpu_model_sync_dummy_prfcnt(void) +{ + int i; + struct page *pg; + + for (i = 0; i < performance_counters.page_count; i++) { + pg = as_page(performance_counters.pages[i]); + kbase_sync_single_for_device(performance_counters.kbdev, + kbase_dma_addr(pg), PAGE_SIZE, + DMA_BIDIRECTIONAL); + } +} + +static void midgard_model_dump_prfcnt(void) +{ + u32 index = 0; + +#if !MALI_USE_CSF + gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, + 1, 0xffffffff, 0x1); +#else + gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, + 1, 0xffffffff, 0x1); +#endif /* !MALI_USE_CSF */ + gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, + &index, 1, + performance_counters.prfcnt_en.tiler, + DUMMY_IMPLEMENTATION_TILER_PRESENT); + gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index, + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, + performance_counters.prfcnt_en.l2, + performance_counters.l2_present); + gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, + &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES, + performance_counters.prfcnt_en.shader, + performance_counters.shader_present); + + gpu_model_sync_dummy_prfcnt(); + + /* simulate a 'long' time between samples */ + performance_counters.time += 10; +} + +static void init_register_statuses(struct dummy_model_t *dummy) +{ + int i; + + hw_error_status.errors_mask = 0; + hw_error_status.gpu_error_irq = 0; + hw_error_status.gpu_fault_status = 0; + hw_error_status.job_irq_rawstat = 0; + hw_error_status.job_irq_status = 0; + hw_error_status.mmu_irq_rawstat = 0; + hw_error_status.mmu_irq_mask = 0; + + for (i = 0; i < NUM_SLOTS; i++) { + hw_error_status.js_status[i] = 0; + hw_error_status.job_irq_rawstat |= + (dummy->slots[i].job_complete_irq_asserted) << i; + hw_error_status.job_irq_status |= + (dummy->slots[i].job_complete_irq_asserted) << i; + } + for (i = 0; i < NUM_MMU_AS; i++) { + hw_error_status.as_command[i] = 0; + hw_error_status.as_faultstatus[i] = 0; + hw_error_status.mmu_irq_mask |= 1 << i; + } + + performance_counters.time = 0; +} + +static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) +{ + if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { + if (job_slot == hw_error_status.current_job_slot) { +#if !MALI_USE_CSF + if (hw_error_status.js_status[job_slot] == 0) { + /* status reg is clean; it can be written */ + + switch (hw_error_status.errors_mask & + IS_A_JOB_ERROR) { + case KBASE_JOB_INTERRUPTED: + hw_error_status.js_status[job_slot] = + JS_STATUS_INTERRUPTED; + break; + + case KBASE_JOB_STOPPED: + hw_error_status.js_status[job_slot] = + JS_STATUS_STOPPED; + break; + + case KBASE_JOB_TERMINATED: + hw_error_status.js_status[job_slot] = + JS_STATUS_TERMINATED; + break; + + case KBASE_JOB_CONFIG_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_CONFIG_FAULT; + break; + + case KBASE_JOB_POWER_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_POWER_FAULT; + break; + + case KBASE_JOB_READ_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_READ_FAULT; + break; + + case KBASE_JOB_WRITE_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_WRITE_FAULT; + break; + + case KBASE_JOB_AFFINITY_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_AFFINITY_FAULT; + break; + + case KBASE_JOB_BUS_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_BUS_FAULT; + break; + + case KBASE_INSTR_INVALID_PC: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_INVALID_PC; + break; + + case KBASE_INSTR_INVALID_ENC: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_INVALID_ENC; + break; + + case KBASE_INSTR_TYPE_MISMATCH: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_TYPE_MISMATCH; + break; + + case KBASE_INSTR_OPERAND_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_OPERAND_FAULT; + break; + + case KBASE_INSTR_TLS_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_TLS_FAULT; + break; + + case KBASE_INSTR_BARRIER_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_BARRIER_FAULT; + break; + + case KBASE_INSTR_ALIGN_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_INSTR_ALIGN_FAULT; + break; + + case KBASE_DATA_INVALID_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_DATA_INVALID_FAULT; + break; + + case KBASE_TILE_RANGE_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_TILE_RANGE_FAULT; + break; + + case KBASE_ADDR_RANGE_FAULT: + hw_error_status.js_status[job_slot] = + JS_STATUS_ADDRESS_RANGE_FAULT; + break; + + case KBASE_OUT_OF_MEMORY: + hw_error_status.js_status[job_slot] = + JS_STATUS_OUT_OF_MEMORY; + break; + + case KBASE_UNKNOWN: + hw_error_status.js_status[job_slot] = + JS_STATUS_UNKNOWN; + break; + + default: + model_error_log(KBASE_CORE, + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); + break; + } + } +#endif /* !MALI_USE_CSF */ + + /* we set JOB_FAIL_<n> */ + hw_error_status.job_irq_rawstat |= + (dummy->slots[job_slot].job_complete_irq_asserted) << + (job_slot + 16); + hw_error_status.job_irq_status |= + (((dummy->slots[job_slot].job_complete_irq_asserted) << + (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << + job_slot)) << 16; + } else { + hw_error_status.job_irq_rawstat |= + (dummy->slots[job_slot].job_complete_irq_asserted) << + job_slot; + hw_error_status.job_irq_status |= + ((dummy->slots[job_slot].job_complete_irq_asserted) << + (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << + job_slot); + } + } else { + hw_error_status.job_irq_rawstat |= + (dummy->slots[job_slot].job_complete_irq_asserted) << + job_slot; + hw_error_status.job_irq_status |= + ((dummy->slots[job_slot].job_complete_irq_asserted) << + (job_slot)) & + (dummy->slots[job_slot].job_irq_mask << job_slot); + } /* end of job register statuses */ + + if (hw_error_status.errors_mask & IS_A_MMU_ERROR) { + int i; + + for (i = 0; i < NUM_MMU_AS; i++) { + if (i == hw_error_status.faulty_mmu_as) { + if (hw_error_status.as_faultstatus[i] == 0) { + u32 status = + hw_error_status.as_faultstatus[i]; + /* status reg is clean; it can be + * written + */ + switch (hw_error_status.errors_mask & + IS_A_MMU_ERROR) { + case KBASE_TRANSLATION_FAULT: + /* 0xCm means TRANSLATION FAULT + * (m is mmu_table_level) + */ + status = + ((1 << 7) | (1 << 6) | + hw_error_status.mmu_table_level + ); + break; + + case KBASE_PERMISSION_FAULT: + /*0xC8 means PERMISSION FAULT */ + status = ((1 << 7) | (1 << 6) | + (1 << 3)); + break; + + case KBASE_TRANSTAB_BUS_FAULT: + /* 0xDm means TRANSITION TABLE + * BUS FAULT (m is + * mmu_table_level) + */ + status = ((1 << 7) | (1 << 6) | + (1 << 4) | + hw_error_status.mmu_table_level + ); + break; + + case KBASE_ACCESS_FLAG: + /* 0xD8 means ACCESS FLAG */ + status = ((1 << 7) | (1 << 6) | + (1 << 4) | (1 << 3)); + break; + + default: + model_error_log(KBASE_CORE, + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); + break; + } + hw_error_status.as_faultstatus[i] = + status; + } + + if (hw_error_status.errors_mask & + KBASE_TRANSTAB_BUS_FAULT) + hw_error_status.mmu_irq_rawstat |= + 1 << (16 + i); /* bus error */ + else + hw_error_status.mmu_irq_rawstat |= + 1 << i; /* page fault */ + } + } + } /*end of mmu register statuses */ + if (hw_error_status.errors_mask & IS_A_GPU_ERROR) { + if (hw_error_status.gpu_fault_status) { + /* not the first GPU error reported */ + hw_error_status.gpu_error_irq |= (1 << 7); + } else { + hw_error_status.gpu_error_irq |= 1; + switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) { + case KBASE_DELAYED_BUS_FAULT: + hw_error_status.gpu_fault_status = (1 << 7); + break; + + case KBASE_SHAREABILITY_FAULT: + hw_error_status.gpu_fault_status = (1 << 7) | + (1 << 3); + break; + + default: + model_error_log(KBASE_CORE, + "\nAtom Chain 0x%llx: Invalid Error Mask!", + hw_error_status.current_jc); + break; + } + } + } + hw_error_status.errors_mask = 0; /*clear error mask */ +} + +#if !MALI_USE_CSF +static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) +{ + int i; + + pr_debug("%s", "Updating the JS_ACTIVE register"); + + for (i = 0; i < NUM_SLOTS; i++) { + int slot_active = dummy->slots[i].job_active; + int next_busy = dummy->slots[i].job_queued; + + if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) { + /* clear the bits we're updating */ + dummy->job_irq_js_state &= ~((1 << (16 + i)) | + (1 << i)); + if (hw_error_status.js_status[i]) { + dummy->job_irq_js_state |= next_busy << + (i + 16); + if (mask & (1 << (i + 16))) { + /* clear job slot status */ + hw_error_status.js_status[i] = 0; + /* continue execution of jobchain */ + dummy->slots[i].job_active = + dummy->slots[i].job_queued; + } + } else { + /* set bits if needed */ + dummy->job_irq_js_state |= ((slot_active << i) | + (next_busy << (i + 16))); + } + } + } + pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state); +} +#endif /* !MALI_USE_CSF */ + +/** + * find_control_reg_values() - Look up constant control register values. + * @gpu: GPU name + * + * Look up the GPU name to find the correct set of control register values for + * that GPU. If not found, warn and use the first values in the array. + * + * Return: Pointer to control register values for that GPU. + */ +static const struct control_reg_values_t *find_control_reg_values(const char *gpu) +{ + size_t i; + const struct control_reg_values_t *ret = NULL; + + for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { + const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; + + if (!strcmp(fcrv->name, gpu)) { + ret = fcrv; + pr_debug("Found control register values for %s\n", gpu); + break; + } + } + + if (!ret) { + ret = &all_control_reg_values[0]; + pr_warn("Couldn't find control register values for GPU %s; using default %s\n", + gpu, ret->name); + } + + return ret; +} + +void *midgard_model_create(const void *config) +{ + struct dummy_model_t *dummy = NULL; + + dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); + + if (dummy) { + dummy->job_irq_js_state = 0; + init_register_statuses(dummy); + dummy->control_reg_values = find_control_reg_values(no_mali_gpu); + } + return dummy; +} + +void midgard_model_destroy(void *h) +{ + kfree((void *)h); +} + +static void midgard_model_get_outputs(void *h) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)h; + + if (hw_error_status.job_irq_status) + gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); + + if ((dummy->power_changed && dummy->power_changed_mask) || + (dummy->reset_completed & dummy->reset_completed_mask) || + hw_error_status.gpu_error_irq || + (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) || + dummy->prfcnt_sample_completed) + gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); + + if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) + gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ); +} + +static void midgard_model_update(void *h) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)h; + int i; + + for (i = 0; i < NUM_SLOTS; i++) { + if (!dummy->slots[i].job_active) + continue; + + if (dummy->slots[i].job_disabled) { + update_register_statuses(dummy, i); + continue; + } + + /* If there are any pending interrupts that have not + * been cleared we cannot run the job in the next register + * as we will overwrite the register status of the job in + * the head registers - which has not yet been read + */ + if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) || + (hw_error_status.job_irq_rawstat & (1 << i))) { + continue; + } + + /*this job is done assert IRQ lines */ + signal_int(dummy, i); +#ifdef CONFIG_MALI_ERROR_INJECT + midgard_set_error(i); +#endif /* CONFIG_MALI_ERROR_INJECT */ + update_register_statuses(dummy, i); + /*if this job slot returned failures we cannot use it */ + if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) { + dummy->slots[i].job_active = 0; + continue; + } + /*process next job */ + dummy->slots[i].job_active = dummy->slots[i].job_queued; + dummy->slots[i].job_queued = 0; + if (dummy->slots[i].job_active) { + if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) + model_error_log(KBASE_CORE, + "\natom %lld running a job on a dirty slot", + hw_error_status.current_jc); + } + } +} + +static void invalidate_active_jobs(struct dummy_model_t *dummy) +{ + int i; + + for (i = 0; i < NUM_SLOTS; i++) { + if (dummy->slots[i].job_active) { + hw_error_status.job_irq_rawstat |= (1 << (16 + i)); + + hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ + } + } +} + +u8 midgard_model_write_reg(void *h, u32 addr, u32 value) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)h; +#if !MALI_USE_CSF + if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && + (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { + int slot_idx = (addr >> 7) & 0xf; + + KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); + if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { + hw_error_status.current_jc &= + ~((u64) (0xFFFFFFFF)); + hw_error_status.current_jc |= (u64) value; + } + if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) { + hw_error_status.current_jc &= (u64) 0xFFFFFFFF; + hw_error_status.current_jc |= + ((u64) value) << 32; + } + if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && + value == 1) { + pr_debug("%s", "start detected"); + KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active || + !dummy->slots[slot_idx].job_queued); + if ((dummy->slots[slot_idx].job_active) || + (hw_error_status.job_irq_rawstat & + (1 << (slot_idx + 16)))) { + pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~" + ); + dummy->slots[slot_idx].job_queued = 1; + } else { + dummy->slots[slot_idx].job_active = 1; + } + } + + if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == + 0) + dummy->slots[slot_idx].job_queued = 0; + + if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) && + (value == JS_COMMAND_SOFT_STOP || + value == JS_COMMAND_HARD_STOP)) { + /*dummy->slots[slot_idx].job_active = 0; */ + hw_error_status.current_job_slot = slot_idx; + if (value == JS_COMMAND_SOFT_STOP) { + hw_error_status.errors_mask = KBASE_JOB_STOPPED; + } else { /*value == 3 */ + + if (dummy->slots[slot_idx].job_disabled != 0) { + pr_debug("enabling slot after HARD_STOP" + ); + dummy->slots[slot_idx].job_disabled = 0; + } + hw_error_status.errors_mask = + KBASE_JOB_TERMINATED; + } + } + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { + int i; + + for (i = 0; i < NUM_SLOTS; i++) { + if (value & ((1 << i) | (1 << (i + 16)))) + dummy->slots[i].job_complete_irq_asserted = 0; + /* hw_error_status.js_status[i] is cleared in + * update_job_irq_js_state + */ + } + pr_debug("%s", "job irq cleared"); + update_job_irq_js_state(dummy, value); + /*remove error condition for JOB */ + hw_error_status.job_irq_rawstat &= ~(value); + hw_error_status.job_irq_status &= ~(value); + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + int i; + + for (i = 0; i < NUM_SLOTS; i++) + dummy->slots[i].job_irq_mask = (value >> i) & 0x01; + pr_debug("job irq mask to value %x", value); + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { +#else /* !MALI_USE_CSF */ + if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { + pr_debug("%s", "job irq cleared"); + + hw_error_status.job_irq_rawstat &= ~(value); + hw_error_status.job_irq_status &= ~(value); + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { +#endif /* !MALI_USE_CSF */ + pr_debug("GPU_IRQ_MASK set to 0x%x", value); + dummy->reset_completed_mask = (value >> 8) & 0x01; + dummy->power_changed_mask = (value >> 9) & 0x03; + dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u; + } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { + dummy->coherency_enable = value; + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { + if (value & (1 << 8)) { + pr_debug("%s", "gpu RESET_COMPLETED irq cleared"); + dummy->reset_completed = 0; + } + if (value & (3 << 9)) + dummy->power_changed = 0; + + if (value & (1 << 17)) + dummy->clean_caches_completed = false; + if (value & (1 << 16)) + dummy->prfcnt_sample_completed = 0; + + /*update error status */ + hw_error_status.gpu_error_irq &= ~(value); + } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { + switch (value) { + case GPU_COMMAND_SOFT_RESET: + case GPU_COMMAND_HARD_RESET: + pr_debug("gpu reset (%d) requested", value); + /* no more fault status */ + hw_error_status.gpu_fault_status = 0; + /* completed reset instantly */ + dummy->reset_completed = 1; + break; +#if MALI_USE_CSF + case GPU_COMMAND_CACHE_CLN_INV_L2: + case GPU_COMMAND_CACHE_CLN_INV_L2_LSC: + case GPU_COMMAND_CACHE_CLN_INV_FULL: +#else + case GPU_COMMAND_CLEAN_CACHES: + case GPU_COMMAND_CLEAN_INV_CACHES: +#endif + pr_debug("clean caches requested"); + dummy->clean_caches_completed = true; + break; + case GPU_COMMAND_PRFCNT_SAMPLE: + midgard_model_dump_prfcnt(); + dummy->prfcnt_sample_completed = 1; + default: + break; + } + } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { + dummy->l2_config = value; + } +#if MALI_USE_CSF + else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && + addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + + (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { + if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) + hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; + } else if (addr == IPA_CONTROL_REG(COMMAND)) { + pr_debug("Received IPA_CONTROL command"); + } else if (addr == IPA_CONTROL_REG(TIMER)) { + ipa_control_timer_enabled = value ? true : false; + } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) && + (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) { + enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)( + (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); + bool is_low_word = + !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); + + if (is_low_word) { + ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX; + ipa_ctl_select_config[core_type] |= value; + } else { + ipa_ctl_select_config[core_type] &= U32_MAX; + ipa_ctl_select_config[core_type] |= ((u64)value << 32); + } + } +#endif + else if (addr == MMU_REG(MMU_IRQ_MASK)) { + hw_error_status.mmu_irq_mask = value; + } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { + hw_error_status.mmu_irq_rawstat &= (~value); + } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && + (addr <= MMU_AS_REG(15, AS_STATUS))) { + int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) + >> 6; + + switch (addr & 0x3F) { + case AS_COMMAND: + switch (value) { + case AS_COMMAND_NOP: + hw_error_status.as_command[mem_addr_space] = + value; + break; + + case AS_COMMAND_UPDATE: + hw_error_status.as_command[mem_addr_space] = + value; + if ((hw_error_status.as_faultstatus[ + mem_addr_space]) + && ((hw_error_status.as_transtab[ + mem_addr_space] & 0x3) != 0)) { + model_error_log(KBASE_CORE, + "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n" + ); + } else if ((hw_error_status.as_faultstatus[ + mem_addr_space]) + && ((hw_error_status.as_transtab[ + mem_addr_space] & 0x3) == 0)) { + + /*invalidate all active jobs */ + invalidate_active_jobs(dummy); + /* error handled */ + hw_error_status.as_faultstatus[ + mem_addr_space] = 0; + } + break; + + case AS_COMMAND_LOCK: + case AS_COMMAND_UNLOCK: + hw_error_status.as_command[mem_addr_space] = + value; + break; + + case AS_COMMAND_FLUSH_PT: + case AS_COMMAND_FLUSH_MEM: + if (hw_error_status.as_command[mem_addr_space] + != AS_COMMAND_LOCK) + model_error_log(KBASE_CORE, + "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n" + ); + else /* error handled if any */ + hw_error_status.as_faultstatus[ + mem_addr_space] = 0; + hw_error_status.as_command[mem_addr_space] = + value; + break; + + default: + model_error_log(KBASE_CORE, + "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", + value); + break; + } + break; + + case AS_TRANSTAB_LO: + hw_error_status.as_transtab[mem_addr_space] &= + ~((u64) (0xffffffff)); + hw_error_status.as_transtab[mem_addr_space] |= + (u64) value; + break; + + case AS_TRANSTAB_HI: + hw_error_status.as_transtab[mem_addr_space] &= + (u64) 0xffffffff; + hw_error_status.as_transtab[mem_addr_space] |= + ((u64) value) << 32; + break; + + case AS_LOCKADDR_LO: + case AS_LOCKADDR_HI: + case AS_MEMATTR_LO: + case AS_MEMATTR_HI: + case AS_TRANSCFG_LO: + case AS_TRANSCFG_HI: + /* Writes ignored */ + break; + + default: + model_error_log(KBASE_CORE, + "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n", + mem_addr_space, addr, value); + break; + } + } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) && + addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) { + switch (addr) { + case PRFCNT_BASE_LO: + performance_counters.prfcnt_base |= value; + break; + case PRFCNT_BASE_HI: + performance_counters.prfcnt_base |= ((u64) value) << 32; + break; +#if !MALI_USE_CSF + case PRFCNT_JM_EN: + performance_counters.prfcnt_en.jm = value; + break; +#endif /* !MALI_USE_CSF */ + case PRFCNT_SHADER_EN: + performance_counters.prfcnt_en.shader = value; + break; + case PRFCNT_TILER_EN: + performance_counters.prfcnt_en.tiler = value; + break; + case PRFCNT_MMU_L2_EN: + performance_counters.prfcnt_en.l2 = value; + break; + } + } else { + switch (addr) { + case TILER_PWRON_LO: + dummy->power_on |= (value & 1) << 1; + /* Also ensure L2 is powered on */ + dummy->power_on |= value & 1; + dummy->power_changed = 1; + break; + case SHADER_PWRON_LO: + dummy->power_on |= (value & 0xF) << 2; + dummy->power_changed = 1; + break; + case L2_PWRON_LO: + dummy->power_on |= value & 1; + dummy->power_changed = 1; + break; + case STACK_PWRON_LO: + dummy->stack_power_on_lo |= value; + dummy->power_changed = 1; + break; + case TILER_PWROFF_LO: + dummy->power_on &= ~((value & 1) << 1); + dummy->power_changed = 1; + break; + case SHADER_PWROFF_LO: + dummy->power_on &= ~((value & 0xF) << 2); + dummy->power_changed = 1; + break; + case L2_PWROFF_LO: + dummy->power_on &= ~(value & 1); + /* Also ensure tiler is powered off */ + dummy->power_on &= ~((value & 1) << 1); + dummy->power_changed = 1; + break; + case STACK_PWROFF_LO: + dummy->stack_power_on_lo &= ~value; + dummy->power_changed = 1; + break; + + case TILER_PWROFF_HI: + case SHADER_PWROFF_HI: + case L2_PWROFF_HI: + case PWR_KEY: + case PWR_OVERRIDE0: +#if !MALI_USE_CSF + case JM_CONFIG: +#else /* !MALI_USE_CSF */ + case CSF_CONFIG: +#endif /* !MALI_USE_CSF */ + case SHADER_CONFIG: + case TILER_CONFIG: + case L2_MMU_CONFIG: + /* Writes ignored */ + break; + default: + model_error_log(KBASE_CORE, + "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n", + addr, value); + break; + } + } + + midgard_model_update(dummy); + midgard_model_get_outputs(dummy); + + return 1; +} + +u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)h; + *value = 0; /* 0 by default */ +#if !MALI_USE_CSF + if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { + pr_debug("%s", "JS_ACTIVE being read"); + + *value = dummy->job_irq_js_state; + } else if (addr == GPU_CONTROL_REG(GPU_ID)) { +#else /* !MALI_USE_CSF */ + if (addr == GPU_CONTROL_REG(GPU_ID)) { +#endif /* !MALI_USE_CSF */ + + *value = dummy->control_reg_values->gpu_id; + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { + *value = hw_error_status.job_irq_rawstat; + pr_debug("%s", "JS_IRQ_RAWSTAT being read"); + } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) { + *value = hw_error_status.job_irq_status; + pr_debug("JS_IRQ_STATUS being read %x", *value); + } +#if !MALI_USE_CSF + else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + int i; + + *value = 0; + for (i = 0; i < NUM_SLOTS; i++) + *value |= dummy->slots[i].job_irq_mask << i; + pr_debug("JS_IRQ_MASK being read %x", *value); + } +#else /* !MALI_USE_CSF */ + else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { + /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ + } +#endif /* !MALI_USE_CSF */ + else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { + *value = (dummy->reset_completed_mask << 8) | + (dummy->power_changed_mask << 9) | (1 << 7) | 1; + pr_debug("GPU_IRQ_MASK read %x", *value); + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { + *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | + (dummy->reset_completed << 8) | + ((dummy->clean_caches_completed ? 1u : 0u) << 17) | + (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; + pr_debug("GPU_IRQ_RAWSTAT read %x", *value); + } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { + *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | + ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | + ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | + (((dummy->clean_caches_completed && + dummy->clean_caches_completed_irq_enabled) ? + 1u : + 0u) + << 17) | + (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq; + pr_debug("GPU_IRQ_STAT read %x", *value); + } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { + *value = 0; +#if !MALI_USE_CSF + } else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) { + *value = 0; +#endif + } else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) { + *value = hw_error_status.gpu_fault_status; + } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { + *value = dummy->l2_config; + } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && + (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { + switch (addr) { + case GPU_CONTROL_REG(SHADER_PRESENT_LO): + case GPU_CONTROL_REG(SHADER_PRESENT_HI): + case GPU_CONTROL_REG(TILER_PRESENT_LO): + case GPU_CONTROL_REG(TILER_PRESENT_HI): + case GPU_CONTROL_REG(L2_PRESENT_LO): + case GPU_CONTROL_REG(L2_PRESENT_HI): + case GPU_CONTROL_REG(STACK_PRESENT_LO): + case GPU_CONTROL_REG(STACK_PRESENT_HI): + *value = get_implementation_register(addr); + break; + case GPU_CONTROL_REG(SHADER_READY_LO): + *value = (dummy->power_on >> 0x02) & + get_implementation_register( + GPU_CONTROL_REG(SHADER_PRESENT_LO)); + break; + case GPU_CONTROL_REG(TILER_READY_LO): + *value = (dummy->power_on >> 0x01) & + get_implementation_register( + GPU_CONTROL_REG(TILER_PRESENT_LO)); + break; + case GPU_CONTROL_REG(L2_READY_LO): + *value = dummy->power_on & + get_implementation_register( + GPU_CONTROL_REG(L2_PRESENT_LO)); + break; + case GPU_CONTROL_REG(STACK_READY_LO): + *value = dummy->stack_power_on_lo & + get_implementation_register( + GPU_CONTROL_REG(STACK_PRESENT_LO)); + break; + + case GPU_CONTROL_REG(SHADER_READY_HI): + case GPU_CONTROL_REG(TILER_READY_HI): + case GPU_CONTROL_REG(L2_READY_HI): + case GPU_CONTROL_REG(STACK_READY_HI): + *value = 0; + break; + + case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): + case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): + case GPU_CONTROL_REG(TILER_PWRTRANS_LO): + case GPU_CONTROL_REG(TILER_PWRTRANS_HI): + case GPU_CONTROL_REG(L2_PWRTRANS_LO): + case GPU_CONTROL_REG(L2_PWRTRANS_HI): + case GPU_CONTROL_REG(STACK_PWRTRANS_LO): + case GPU_CONTROL_REG(STACK_PWRTRANS_HI): + *value = 0; + break; + + case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): + case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): + case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): + case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): + case GPU_CONTROL_REG(L2_PWRACTIVE_LO): + case GPU_CONTROL_REG(L2_PWRACTIVE_HI): + *value = 0; + break; + +#if !MALI_USE_CSF + case GPU_CONTROL_REG(JM_CONFIG): +#else /* !MALI_USE_CSF */ + case GPU_CONTROL_REG(CSF_CONFIG): +#endif /* !MALI_USE_CSF */ + + case GPU_CONTROL_REG(SHADER_CONFIG): + case GPU_CONTROL_REG(TILER_CONFIG): + case GPU_CONTROL_REG(L2_MMU_CONFIG): + *value = 0; + break; + + case GPU_CONTROL_REG(COHERENCY_FEATURES): + *value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */ + break; + case GPU_CONTROL_REG(COHERENCY_ENABLE): + *value = dummy->coherency_enable; + break; + + case GPU_CONTROL_REG(THREAD_TLS_ALLOC): + *value = 0; + break; + + default: + model_error_log(KBASE_CORE, + "Dummy model register access: Reading unknown control reg 0x%x\n", + addr); + break; + } +#if !MALI_USE_CSF + } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && + (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { + int slot_idx = (addr >> 7) & 0xf; + int sub_reg = addr & 0x7F; + + KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); + switch (sub_reg) { + case JS_HEAD_NEXT_LO: + *value = (u32) ((hw_error_status.current_jc) & + 0xFFFFFFFF); + break; + case JS_HEAD_NEXT_HI: + *value = (u32) (hw_error_status.current_jc >> 32); + break; + case JS_STATUS: + if (hw_error_status.js_status[slot_idx]) + *value = hw_error_status.js_status[slot_idx]; + else /* 0x08 means active, 0x00 idle */ + *value = (dummy->slots[slot_idx].job_active) + << 3; + break; + case JS_COMMAND_NEXT: + *value = dummy->slots[slot_idx].job_queued; + break; + + /* The dummy model does not implement these registers + * avoid printing error messages + */ + case JS_HEAD_HI: + case JS_HEAD_LO: + case JS_TAIL_HI: + case JS_TAIL_LO: + case JS_FLUSH_ID_NEXT: + break; + + default: + model_error_log(KBASE_CORE, + "Dummy model register access: unknown job slot reg 0x%02X being read\n", + sub_reg); + break; + } +#endif /* !MALI_USE_CSF */ + } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { + *value = dummy->control_reg_values->as_present; +#if !MALI_USE_CSF + } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) { + *value = 0x7; +#endif /* !MALI_USE_CSF */ + } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) && + addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { + switch (addr) { + case GPU_CONTROL_REG(TEXTURE_FEATURES_0): + *value = 0xfffff; + break; + + case GPU_CONTROL_REG(TEXTURE_FEATURES_1): + *value = 0xffff; + break; + + case GPU_CONTROL_REG(TEXTURE_FEATURES_2): + *value = 0x9f81ffff; + break; + + case GPU_CONTROL_REG(TEXTURE_FEATURES_3): + *value = 0; + break; + } +#if !MALI_USE_CSF + } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) && + addr <= GPU_CONTROL_REG(JS15_FEATURES)) { + switch (addr) { + case GPU_CONTROL_REG(JS0_FEATURES): + *value = 0x20e; + break; + + case GPU_CONTROL_REG(JS1_FEATURES): + *value = 0x1fe; + break; + + case GPU_CONTROL_REG(JS2_FEATURES): + *value = 0x7e; + break; + + default: + *value = 0; + break; + } +#endif /* !MALI_USE_CSF */ + } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) + && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { + switch (addr) { + case GPU_CONTROL_REG(L2_FEATURES): + *value = 0x6100206; + break; + + case GPU_CONTROL_REG(CORE_FEATURES): + *value = dummy->control_reg_values->core_features; + break; + + case GPU_CONTROL_REG(TILER_FEATURES): + *value = dummy->control_reg_values->tiler_features; + break; + + case GPU_CONTROL_REG(MEM_FEATURES): + /* Bit 0: Core group is coherent */ + *value = 0x01; + /* Bits 11:8: L2 slice count - 1 */ + *value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8; + break; + + case GPU_CONTROL_REG(MMU_FEATURES): + *value = dummy->control_reg_values->mmu_features; + break; + } + } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) + && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { + switch (addr) { + case GPU_CONTROL_REG(THREAD_FEATURES): + *value = dummy->control_reg_values->thread_features + | (IMPLEMENTATION_MODEL << 30); + break; + case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE): + *value = dummy->control_reg_values->thread_max_barrier_size; + break; + case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE): + *value = dummy->control_reg_values->thread_max_workgroup_size; + break; + case GPU_CONTROL_REG(THREAD_MAX_THREADS): + *value = dummy->control_reg_values->thread_max_threads; + break; + } + } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) + && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { + *value = 0; + } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) + && addr <= MMU_AS_REG(15, AS_STATUS)) { + int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) + >> 6; + + switch (addr & 0x3F) { + case AS_TRANSTAB_LO: + *value = (u32) + (hw_error_status.as_transtab[mem_addr_space] & + 0xffffffff); + break; + + case AS_TRANSTAB_HI: + *value = (u32) + (hw_error_status.as_transtab[mem_addr_space] >> + 32); + break; + + case AS_STATUS: + *value = 0; + break; + + case AS_FAULTSTATUS: + if (mem_addr_space == hw_error_status.faulty_mmu_as) + *value = hw_error_status.as_faultstatus[ + hw_error_status.faulty_mmu_as]; + else + *value = 0; + break; + + case AS_LOCKADDR_LO: + case AS_LOCKADDR_HI: + case AS_MEMATTR_LO: + case AS_MEMATTR_HI: + case AS_TRANSCFG_LO: + case AS_TRANSCFG_HI: + /* Read ignored */ + *value = 0; + break; + + default: + model_error_log(KBASE_CORE, + "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", + mem_addr_space, addr); + *value = 0; + break; + } + } else if (addr == MMU_REG(MMU_IRQ_MASK)) { + *value = hw_error_status.mmu_irq_mask; + } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { + *value = hw_error_status.mmu_irq_rawstat; + } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { + *value = hw_error_status.mmu_irq_mask & + hw_error_status.mmu_irq_rawstat; + } +#if MALI_USE_CSF + else if (addr == IPA_CONTROL_REG(STATUS)) { + *value = (ipa_control_timer_enabled << 31); + } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && + (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( + IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = + (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; + bool is_low_word = + !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); + + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, + counter_index, is_low_word); + } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && + (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( + IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = + (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; + bool is_low_word = + !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); + + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, + counter_index, is_low_word); + } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && + (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( + IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = + (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; + bool is_low_word = + !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); + + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, + counter_index, is_low_word); + } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && + (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( + IPA_CTL_MAX_VAL_CNT_IDX)))) { + u32 counter_index = + (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; + bool is_low_word = + !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); + + *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, + counter_index, is_low_word); + } +#endif + else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { + *value = dummy->control_reg_values->gpu_features_lo; + } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { + *value = dummy->control_reg_values->gpu_features_hi; + } else { + model_error_log(KBASE_CORE, + "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", + addr); + *value = 0; + } + + CSTD_UNUSED(dummy); + + return 1; +} + +static u32 set_user_sample_core_type(u64 *counters, + u32 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) +{ + u32 sample_size; + u32 *usr_data = NULL; + + sample_size = + core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); + + if ((usr_data_size >= usr_data_offset) && + (sample_size <= usr_data_size - usr_data_offset)) + usr_data = usr_data_start + (usr_data_offset / sizeof(u32)); + + if (!usr_data) + model_error_log(KBASE_CORE, "Unable to set counter sample 1"); + else { + u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE; + u32 i; + + for (i = 0; i < loop_cnt; i++) { + if (copy_from_user(&counters[i], &usr_data[i], + sizeof(u32))) { + model_error_log(KBASE_CORE, "Unable to set counter sample 2"); + break; + } + } + } + + return usr_data_offset + sample_size; +} + +static u32 set_kernel_sample_core_type(u64 *counters, + u64 *usr_data_start, u32 usr_data_offset, + u32 usr_data_size, u32 core_count) +{ + u32 sample_size; + u64 *usr_data = NULL; + + sample_size = + core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); + + if ((usr_data_size >= usr_data_offset) && + (sample_size <= usr_data_size - usr_data_offset)) + usr_data = usr_data_start + (usr_data_offset / sizeof(u64)); + + if (!usr_data) + model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1"); + else + memcpy(counters, usr_data, sample_size); + + return usr_data_offset + sample_size; +} + +/* Counter values injected through ioctl are of 32 bits */ +void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size) +{ + u32 offset = 0; + +#if !MALI_USE_CSF + offset = set_user_sample_core_type(performance_counters.jm_counters, + usr_data, offset, usr_data_size, 1); +#else + offset = set_user_sample_core_type(performance_counters.cshw_counters, + usr_data, offset, usr_data_size, 1); +#endif /* !MALI_USE_CSF */ + offset = set_user_sample_core_type(performance_counters.tiler_counters, + usr_data, offset, usr_data_size, + hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_user_sample_core_type(performance_counters.l2_counters, + usr_data, offset, usr_data_size, + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); + offset = set_user_sample_core_type(performance_counters.shader_counters, + usr_data, offset, usr_data_size, + KBASE_DUMMY_MODEL_MAX_SHADER_CORES); +} + +/* Counter values injected through kutf are of 64 bits */ +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size) +{ + u32 offset = 0; + +#if !MALI_USE_CSF + offset = set_kernel_sample_core_type(performance_counters.jm_counters, + usr_data, offset, usr_data_size, 1); +#else + offset = set_kernel_sample_core_type(performance_counters.cshw_counters, + usr_data, offset, usr_data_size, 1); +#endif /* !MALI_USE_CSF */ + offset = set_kernel_sample_core_type(performance_counters.tiler_counters, + usr_data, offset, usr_data_size, + hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); + offset = set_kernel_sample_core_type(performance_counters.l2_counters, + usr_data, offset, usr_data_size, + hweight64(performance_counters.l2_present)); + offset = set_kernel_sample_core_type(performance_counters.shader_counters, + usr_data, offset, usr_data_size, + hweight64(performance_counters.shader_present)); +} +KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); + +void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, + u64 *l2_present, u64 *shader_present) +{ + if (shader_present) + *shader_present = performance_counters.shader_present; + if (l2_present) + *l2_present = performance_counters.l2_present; +} +KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores); + +void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, + u64 l2_present, u64 shader_present) +{ + if (WARN_ON(!l2_present || !shader_present + || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) + return; + + performance_counters.l2_present = l2_present; + performance_counters.shader_present = shader_present; + + /* Update the GPU properties used by vinstr to calculate the counter + * dump buffer size. + */ + kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present); + kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present; + kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present); + kbdev->gpu_props.curr_config.shader_present = shader_present; +} +KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); + +void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, + struct tagged_addr *pages, + size_t page_count) +{ + performance_counters.prfcnt_base_cpu = base; + performance_counters.kbdev = kbdev; + performance_counters.pages = pages; + performance_counters.page_count = page_count; +} + +int gpu_model_control(void *model, + struct kbase_model_control_params *params) +{ + struct dummy_model_t *dummy = (struct dummy_model_t *)model; + int i; + + if (params->command == KBASE_MC_DISABLE_JOBS) { + for (i = 0; i < NUM_SLOTS; i++) + dummy->slots[i].job_disabled = params->value; + } else { + return -EINVAL; + } + + midgard_model_update(dummy); + midgard_model_get_outputs(dummy); + + return 0; +} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h new file mode 100644 index 0000000..e092134 --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _KBASE_MODEL_DUMMY_H_ +#define _KBASE_MODEL_DUMMY_H_ + +#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h> + +#define model_error_log(module, ...) pr_err(__VA_ARGS__) + +#define NUM_SLOTS 4 /*number of job slots */ + +/*Errors Mask Codes*/ +/* each bit of errors_mask is associated to a specific error: + * NON FAULT STATUS CODES: only the following are implemented since the others + * represent normal working statuses + */ +#define KBASE_JOB_INTERRUPTED (1<<0) +#define KBASE_JOB_STOPPED (1<<1) +#define KBASE_JOB_TERMINATED (1<<2) + +/* JOB EXCEPTIONS: */ +#define KBASE_JOB_CONFIG_FAULT (1<<3) +#define KBASE_JOB_POWER_FAULT (1<<4) +#define KBASE_JOB_READ_FAULT (1<<5) +#define KBASE_JOB_WRITE_FAULT (1<<6) +#define KBASE_JOB_AFFINITY_FAULT (1<<7) +#define KBASE_JOB_BUS_FAULT (1<<8) +#define KBASE_INSTR_INVALID_PC (1<<9) +#define KBASE_INSTR_INVALID_ENC (1<<10) +#define KBASE_INSTR_TYPE_MISMATCH (1<<11) +#define KBASE_INSTR_OPERAND_FAULT (1<<12) +#define KBASE_INSTR_TLS_FAULT (1<<13) +#define KBASE_INSTR_BARRIER_FAULT (1<<14) +#define KBASE_INSTR_ALIGN_FAULT (1<<15) +#define KBASE_DATA_INVALID_FAULT (1<<16) +#define KBASE_TILE_RANGE_FAULT (1<<17) +#define KBASE_ADDR_RANGE_FAULT (1<<18) +#define KBASE_OUT_OF_MEMORY (1<<19) +#define KBASE_UNKNOWN (1<<20) + +/* GPU EXCEPTIONS:*/ +#define KBASE_DELAYED_BUS_FAULT (1<<21) +#define KBASE_SHAREABILITY_FAULT (1<<22) + +/* MMU EXCEPTIONS:*/ +#define KBASE_TRANSLATION_FAULT (1<<23) +#define KBASE_PERMISSION_FAULT (1<<24) +#define KBASE_TRANSTAB_BUS_FAULT (1<<25) +#define KBASE_ACCESS_FLAG (1<<26) + +/* generic useful bitmasks */ +#define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED) +#define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT) +#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT) + +/* number of possible MMU address spaces */ +#define NUM_MMU_AS 16 /* total number of MMU address spaces as in + * MMU_IRQ_RAWSTAT register + */ + +/* Forward declaration */ +struct kbase_device; + +/* + * the function below is used to trigger the simulation of a faulty + * HW condition for a specific job chain atom + */ + +struct kbase_error_params { + u64 jc; + u32 errors_mask; + u32 mmu_table_level; + u16 faulty_mmu_as; + u16 padding[3]; +}; + +enum kbase_model_control_command { + /* Disable/Enable job completion in the dummy model */ + KBASE_MC_DISABLE_JOBS +}; + +/* struct to control dummy model behavior */ +struct kbase_model_control_params { + s32 command; + s32 value; +}; + +/* struct to track faulty atoms */ +struct kbase_error_atom { + struct kbase_error_params params; + struct kbase_error_atom *next; +}; + +/*struct to track the system error state*/ +struct error_status_t { + u32 errors_mask; + u32 mmu_table_level; + int faulty_mmu_as; + + u64 current_jc; + int current_job_slot; + + u32 job_irq_rawstat; + u32 job_irq_status; + u32 js_status[NUM_SLOTS]; + + u32 mmu_irq_mask; + u32 mmu_irq_rawstat; + + u32 gpu_error_irq; + u32 gpu_fault_status; + + u32 as_faultstatus[NUM_MMU_AS]; + u32 as_command[NUM_MMU_AS]; + u64 as_transtab[NUM_MMU_AS]; +}; + +void *midgard_model_create(const void *config); +void midgard_model_destroy(void *h); +u8 midgard_model_write_reg(void *h, u32 addr, u32 value); +u8 midgard_model_read_reg(void *h, u32 addr, + u32 * const value); +void gpu_generate_error(void); +void midgard_set_error(int job_slot); +int job_atom_inject_error(struct kbase_error_params *params); +int gpu_model_control(void *h, + struct kbase_model_control_params *params); + +void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size); +void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size); +void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, + u64 *l2_present, u64 *shader_present); +void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, + u64 l2_present, u64 shader_present); +void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev, + struct tagged_addr *pages, + size_t page_count); +/* Clear the counter values array maintained by the dummy model */ +void gpu_model_clear_prfcnt_values(void); + +enum gpu_dummy_irq { + GPU_DUMMY_JOB_IRQ, + GPU_DUMMY_GPU_IRQ, + GPU_DUMMY_MMU_IRQ +}; + +void gpu_device_raise_irq(void *model, + enum gpu_dummy_irq irq); +void gpu_device_set_data(void *model, void *data); +void *gpu_device_get_data(void *model); + +extern struct error_status_t hw_error_status; + +#endif diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c new file mode 100644 index 0000000..dfa7f62 --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include <mali_kbase.h> +#include <linux/random.h> +#include "backend/gpu/mali_kbase_model_dummy.h" + +/* all the error conditions supported by the model */ +#define TOTAL_FAULTS 27 +/* maximum number of levels in the MMU translation table tree */ +#define MAX_MMU_TABLE_LEVEL 4 +/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ +#define MAX_CONCURRENT_FAULTS 3 + +static struct kbase_error_atom *error_track_list; + +unsigned int rand_seed; + +/*following error probability are set quite high in order to stress the driver*/ +unsigned int error_probability = 50; /* to be set between 0 and 100 */ +/* probability to have multiple error give that there is an error */ +unsigned int multiple_error_probability = 50; + +void gpu_generate_error(void) +{ + unsigned int errors_num = 0; + + /*is there at least one error? */ + if ((prandom_u32() % 100) < error_probability) { + /* pick up a faulty mmu address space */ + hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; + /* pick up an mmu table level */ + hw_error_status.mmu_table_level = + 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); + hw_error_status.errors_mask = + (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); + + /*is there also one or more errors? */ + if ((prandom_u32() % 100) < multiple_error_probability) { + errors_num = 1 + (prandom_u32() % + (MAX_CONCURRENT_FAULTS - 1)); + while (errors_num-- > 0) { + u32 temp_mask; + + temp_mask = (u32)( + 1 << (prandom_u32() % TOTAL_FAULTS)); + /* below we check that no bit of the same error + * type is set again in the error mask + */ + if ((temp_mask & IS_A_JOB_ERROR) && + (hw_error_status.errors_mask & + IS_A_JOB_ERROR)) { + errors_num++; + continue; + } + if ((temp_mask & IS_A_MMU_ERROR) && + (hw_error_status.errors_mask & + IS_A_MMU_ERROR)) { + errors_num++; + continue; + } + if ((temp_mask & IS_A_GPU_ERROR) && + (hw_error_status.errors_mask & + IS_A_GPU_ERROR)) { + errors_num++; + continue; + } + /* this error mask is already set */ + if ((hw_error_status.errors_mask | temp_mask) == + hw_error_status.errors_mask) { + errors_num++; + continue; + } + hw_error_status.errors_mask |= temp_mask; + } + } + } +} + +int job_atom_inject_error(struct kbase_error_params *params) +{ + struct kbase_error_atom *new_elem; + + KBASE_DEBUG_ASSERT(params); + + new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL); + + if (!new_elem) { + model_error_log(KBASE_CORE, + "\njob_atom_inject_error: kzalloc failed for new_elem\n" + ); + return -ENOMEM; + } + new_elem->params.jc = params->jc; + new_elem->params.errors_mask = params->errors_mask; + new_elem->params.mmu_table_level = params->mmu_table_level; + new_elem->params.faulty_mmu_as = params->faulty_mmu_as; + + /*circular list below */ + if (error_track_list == NULL) { /*no elements */ + error_track_list = new_elem; + new_elem->next = error_track_list; + } else { + struct kbase_error_atom *walker = error_track_list; + + while (walker->next != error_track_list) + walker = walker->next; + + new_elem->next = error_track_list; + walker->next = new_elem; + } + return 0; +} + +void midgard_set_error(int job_slot) +{ +#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM + gpu_generate_error(); +#else + struct kbase_error_atom *walker, *auxiliar; + + if (error_track_list != NULL) { + walker = error_track_list->next; + auxiliar = error_track_list; + do { + if (walker->params.jc == hw_error_status.current_jc) { + /* found a faulty atom matching with the + * current one + */ + hw_error_status.errors_mask = + walker->params.errors_mask; + hw_error_status.mmu_table_level = + walker->params.mmu_table_level; + hw_error_status.faulty_mmu_as = + walker->params.faulty_mmu_as; + hw_error_status.current_job_slot = job_slot; + + if (walker->next == walker) { + /* only one element */ + kfree(error_track_list); + error_track_list = NULL; + } else { + auxiliar->next = walker->next; + if (walker == error_track_list) + error_track_list = walker->next; + + kfree(walker); + } + break; + } + auxiliar = walker; + walker = walker->next; + } while (auxiliar->next != error_track_list); + } +#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ +} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c new file mode 100644 index 0000000..ed5d4ce --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Model interface + */ + +#include <mali_kbase.h> +#include <gpu/mali_kbase_gpu_regmap.h> +#include <backend/gpu/mali_kbase_model_dummy.h> +#include "backend/gpu/mali_kbase_model_linux.h" +#include "device/mali_kbase_device.h" +#include "mali_kbase_irq_internal.h" + +#include <linux/kthread.h> + +struct model_irq_data { + struct kbase_device *kbdev; + struct work_struct work; +}; + +static void serve_job_irq(struct work_struct *work) +{ + struct model_irq_data *data = container_of(work, struct model_irq_data, + work); + struct kbase_device *kbdev = data->kbdev; + + /* Make sure no worker is already serving this IRQ */ + while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) { + u32 val; + + while ((val = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_STATUS)))) { + unsigned long flags; + + /* Handle the IRQ */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + kbase_csf_interrupt(kbdev, val); +#else + kbase_job_done(kbdev, val); +#endif + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } + + kmem_cache_free(kbdev->irq_slab, data); +} + +static void serve_gpu_irq(struct work_struct *work) +{ + struct model_irq_data *data = container_of(work, struct model_irq_data, + work); + struct kbase_device *kbdev = data->kbdev; + + /* Make sure no worker is already serving this IRQ */ + while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) { + u32 val; + + while ((val = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_STATUS)))) { + /* Handle the IRQ */ + kbase_gpu_interrupt(kbdev, val); + } + } + + kmem_cache_free(kbdev->irq_slab, data); +} + +static void serve_mmu_irq(struct work_struct *work) +{ + struct model_irq_data *data = container_of(work, struct model_irq_data, + work); + struct kbase_device *kbdev = data->kbdev; + + /* Make sure no worker is already serving this IRQ */ + if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { + u32 val; + + while ((val = kbase_reg_read(kbdev, + MMU_REG(MMU_IRQ_STATUS)))) { + /* Handle the IRQ */ + kbase_mmu_interrupt(kbdev, val); + } + } + + kmem_cache_free(kbdev->irq_slab, data); +} + +void gpu_device_raise_irq(void *model, + enum gpu_dummy_irq irq) +{ + struct model_irq_data *data; + struct kbase_device *kbdev = gpu_device_get_data(model); + + KBASE_DEBUG_ASSERT(kbdev); + + data = kmem_cache_alloc(kbdev->irq_slab, GFP_ATOMIC); + if (data == NULL) + return; + + data->kbdev = kbdev; + + switch (irq) { + case GPU_DUMMY_JOB_IRQ: + INIT_WORK(&data->work, serve_job_irq); + atomic_set(&kbdev->serving_job_irq, 1); + break; + case GPU_DUMMY_GPU_IRQ: + INIT_WORK(&data->work, serve_gpu_irq); + atomic_set(&kbdev->serving_gpu_irq, 1); + break; + case GPU_DUMMY_MMU_IRQ: + INIT_WORK(&data->work, serve_mmu_irq); + atomic_set(&kbdev->serving_mmu_irq, 1); + break; + default: + dev_warn(kbdev->dev, "Unknown IRQ"); + kmem_cache_free(kbdev->irq_slab, data); + } + queue_work(kbdev->irq_workq, &data->work); +} + +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_write_reg(kbdev->model, offset, value); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_reg_write); + +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + midgard_model_read_reg(kbdev->model, offset, &val); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + + return val; +} + +KBASE_EXPORT_TEST_API(kbase_reg_read); + +/** + * kbase_is_gpu_removed - Has the GPU been removed. + * @kbdev: Kbase device pointer + * + * This function would return true if the GPU has been removed. + * It is stubbed here + * Return: Always false + */ +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + return false; +} + +int kbase_install_interrupts(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + atomic_set(&kbdev->serving_job_irq, 0); + atomic_set(&kbdev->serving_gpu_irq, 0); + atomic_set(&kbdev->serving_mmu_irq, 0); + + kbdev->irq_workq = alloc_ordered_workqueue("dummy irq queue", 0); + if (kbdev->irq_workq == NULL) + return -ENOMEM; + + kbdev->irq_slab = kmem_cache_create("dummy_irq_slab", + sizeof(struct model_irq_data), 0, 0, NULL); + if (kbdev->irq_slab == NULL) { + destroy_workqueue(kbdev->irq_workq); + return -ENOMEM; + } + + return 0; +} + +void kbase_release_interrupts(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + destroy_workqueue(kbdev->irq_workq); + kmem_cache_destroy(kbdev->irq_slab); +} + +void kbase_synchronize_irqs(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + flush_workqueue(kbdev->irq_workq); +} + +KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); + +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type) +{ + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + +irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +{ + if (!val) + return IRQ_NONE; + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); + +int kbase_gpu_device_create(struct kbase_device *kbdev) +{ + kbdev->model = midgard_model_create(NULL); + if (kbdev->model == NULL) + return -ENOMEM; + + gpu_device_set_data(kbdev->model, kbdev); + + spin_lock_init(&kbdev->reg_op_lock); + + dev_warn(kbdev->dev, "Using Dummy Model"); + + return 0; +} + +void kbase_gpu_device_destroy(struct kbase_device *kbdev) +{ + midgard_model_destroy(kbdev->model); +} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h new file mode 100644 index 0000000..dcb2e7c --- /dev/null +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Model interface + */ + +#ifndef _KBASE_MODEL_LINUX_H_ +#define _KBASE_MODEL_LINUX_H_ + +int kbase_gpu_device_create(struct kbase_device *kbdev); +void kbase_gpu_device_destroy(struct kbase_device *kbdev); + +#endif /* _KBASE_MODEL_LINUX_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index 5df7f67..8711a6c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -568,11 +568,14 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) * when system suspend takes place. * The function first waits for the @gpu_poweroff_wait_work to complete, which * could have been enqueued after the last PM reference was released. + * + * Return: 0 on success, negative value otherwise. */ -static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) +static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; + int ret = 0; WARN_ON(kbdev->pm.active_count); @@ -581,8 +584,8 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) kbase_pm_lock(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(backend->poweroff_wait_in_progress); + WARN_ON(backend->gpu_sleep_mode_active); if (backend->gpu_powered) { - int ret; backend->mcu_desired = false; backend->l2_desired = false; @@ -591,17 +594,11 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) ret = kbase_pm_wait_for_desired_state(kbdev); if (ret) { - dev_warn(kbdev->dev, "Wait failed on synchronous power off"); - kbase_pm_unlock(kbdev); - /* Wait for the completion of reset, triggered due to - * the previous failure. - */ - kbase_reset_gpu_wait(kbdev); - /* Wait again for the poweroff work which could have - * been enqueued by the GPU reset worker. - */ - kbase_pm_wait_for_poweroff_work_complete(kbdev); - kbase_pm_lock(kbdev); + dev_warn( + kbdev->dev, + "Wait for pm state change failed on synchronous power off"); + ret = -EBUSY; + goto out; } /* Due to the power policy, GPU could have been kept active @@ -614,12 +611,19 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) backend->gpu_idled = true; } - kbase_pm_clock_off(kbdev); + if (!kbase_pm_clock_off(kbdev)) { + dev_warn( + kbdev->dev, + "Failed to turn off GPU clocks on synchronous power off, MMU faults pending"); + ret = -EBUSY; + } } else { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +out: kbase_pm_unlock(kbdev); + return ret; } #endif @@ -793,7 +797,7 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - kbase_pm_do_poweroff_sync(kbdev); + WARN_ON(kbase_pm_do_poweroff_sync(kbdev)); #else mutex_lock(&kbdev->pm.lock); kbase_pm_do_poweroff(kbdev); @@ -902,10 +906,14 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) kbase_pm_update_active(kbdev); } -void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) +int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) { + int ret = 0; + #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) - kbase_pm_do_poweroff_sync(kbdev); + ret = kbase_pm_do_poweroff_sync(kbdev); + if (ret) + return ret; #else /* Force power off the GPU and all cores (regardless of policy), only * after the PM active count reaches zero (otherwise, we risk turning it @@ -929,6 +937,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) if (kbdev->pm.backend.callback_power_suspend) kbdev->pm.backend.callback_power_suspend(kbdev); + + return ret; } void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) @@ -1044,7 +1054,12 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); if (ret) { - dev_warn(kbdev->dev, "Wait for MCU wake up failed on runtime suspend"); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + dev_warn( + kbdev->dev, + "Waiting for MCU to wake up failed on runtime suspend"); + kbdev->pm.backend.gpu_wakeup_override = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c index efc620f..803ba4d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c @@ -26,6 +26,9 @@ #include <mali_kbase.h> #include <mali_kbase_pm.h> #include <backend/gpu/mali_kbase_pm_internal.h> +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* CONFIG_MALI_NO_MALI */ #include <mali_kbase_dummy_job_wa.h> int kbase_pm_ca_init(struct kbase_device *kbdev) @@ -120,7 +123,9 @@ u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); -#if MALI_USE_CSF +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); +#elif MALI_USE_CSF return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); #else return kbdev->pm.backend.pm_shaders_core_mask; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h index 8d169c3..90dcaf5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.h @@ -29,10 +29,10 @@ /** * kbase_pm_ca_init - Initialize core availability framework * - * Must be called before calling any other core availability function - * * @kbdev: The kbase device structure for the device (must be a valid pointer) * + * Must be called before calling any other core availability function + * * Return: 0 if the core availability framework was successfully initialized, * -errno otherwise */ diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h b/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h index 41f3c14..d1e4b53 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -30,12 +30,12 @@ /** * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy * - * This contains data that is private to the devfreq core availability - * policy. - * * @cores_desired: Cores that the policy wants to be available * @cores_enabled: Cores that the policy is currently returning as available * @cores_used: Cores currently powered or transitioning + * + * This contains data that is private to the devfreq core availability + * policy. */ struct kbasep_pm_ca_policy_devfreq { u64 cores_desired; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h b/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h index 5e3f17e..a947e8f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -52,10 +52,8 @@ /** * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand * policy - * - * This contains data that is private to the coarse demand power policy. - * * @dummy: Dummy member - no state needed + * This contains data that is private to the coarse demand power policy. */ struct kbasep_pm_policy_coarse_demand { int dummy; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h index 52877f5..c7efe23 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h @@ -40,6 +40,11 @@ struct kbase_jd_atom; /** * enum kbase_pm_core_type - The types of core in a GPU. * + * @KBASE_PM_CORE_L2: The L2 cache + * @KBASE_PM_CORE_SHADER: Shader cores + * @KBASE_PM_CORE_TILER: Tiler cores + * @KBASE_PM_CORE_STACK: Core stacks + * * These enumerated values are used in calls to * - kbase_pm_get_present_cores() * - kbase_pm_get_active_cores() @@ -49,11 +54,6 @@ struct kbase_jd_atom; * They specify which type of core should be acted on. These values are set in * a manner that allows core_type_to_reg() function to be simpler and more * efficient. - * - * @KBASE_PM_CORE_L2: The L2 cache - * @KBASE_PM_CORE_SHADER: Shader cores - * @KBASE_PM_CORE_TILER: Tiler cores - * @KBASE_PM_CORE_STACK: Core stacks */ enum kbase_pm_core_type { KBASE_PM_CORE_L2 = L2_PRESENT_LO, @@ -215,9 +215,6 @@ union kbase_pm_policy_data { /** * struct kbase_pm_backend_data - Data stored per device for power management. * - * This structure contains data for the power management framework. There is one - * instance of this structure per device in the system. - * * @pm_current_policy: The policy that is currently actively controlling the * power state. * @pm_policy_data: Private data for current PM policy. This is automatically @@ -324,6 +321,10 @@ union kbase_pm_policy_data { * @policy_change_lock: Used to serialize the policy change calls. In CSF case, * the change of policy may involve the scheduler to * suspend running CSGs and then reconfigure the MCU. + * @core_idle_wq: Workqueue for executing the @core_idle_work. + * @core_idle_work: Work item used to wait for undesired cores to become inactive. + * The work item is enqueued when Host controls the power for + * shader cores and down scaling of cores is performed. * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be * supported by the kernel driver or not. If this * flag is not set, then HW state is directly saved @@ -389,6 +390,9 @@ union kbase_pm_policy_data { * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle * using gpu_clock_control * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + * * Note: * During an IRQ, @pm_current_policy can be NULL when the policy is being * changed with kbase_pm_set_policy(). The change is protected under @@ -455,6 +459,8 @@ struct kbase_pm_backend_data { bool policy_change_clamp_state_to_off; unsigned int csf_pm_sched_flags; struct mutex policy_change_lock; + struct workqueue_struct *core_idle_wq; + struct work_struct core_idle_work; #ifdef KBASE_PM_RUNTIME bool gpu_sleep_supported; @@ -547,9 +553,6 @@ enum kbase_pm_policy_event { /** * struct kbase_pm_policy - Power policy structure. * - * Each power policy exposes a (static) instance of this structure which - * contains function pointers to the policy's methods. - * * @name: The name of this policy * @init: Function called when the policy is selected * @term: Function called when the policy is unselected @@ -567,6 +570,8 @@ enum kbase_pm_policy_event { * Pre-defined required flags exist for each of the * ARM released policies, such as 'always_on', 'coarse_demand' * and etc. + * Each power policy exposes a (static) instance of this structure which + * contains function pointers to the policy's methods. */ struct kbase_pm_policy { char *name; diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index d65c684..81c922f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -54,6 +54,10 @@ #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> #endif +#if MALI_USE_CSF +#include <linux/delay.h> +#endif + #include <linux/of.h> #ifdef CONFIG_MALI_CORESTACK @@ -72,16 +76,16 @@ KBASE_EXPORT_TEST_API(corestack_driver_control); /** * enum kbasep_pm_action - Actions that can be performed on a core. * - * This enumeration is private to the file. Its values are set to allow - * core_type_to_reg() function, which decodes this enumeration, to be simpler - * and more efficient. - * * @ACTION_PRESENT: The cores that are present * @ACTION_READY: The cores that are ready * @ACTION_PWRON: Power on the cores specified * @ACTION_PWROFF: Power off the cores specified * @ACTION_PWRTRANS: The cores that are transitioning * @ACTION_PWRACTIVE: The cores that are active + * + * This enumeration is private to the file. Its values are set to allow + * core_type_to_reg() function, which decodes this enumeration, to be simpler + * and more efficient. */ enum kbasep_pm_action { ACTION_PRESENT = 0, @@ -221,14 +225,14 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) /** * core_type_to_reg - Decode a core type and action to a register. * + * @core_type: The type of core + * @action: The type of action + * * Given a core type (defined by kbase_pm_core_type) and an action (defined * by kbasep_pm_action) this function will return the register offset that * will perform the action on the core type. The register returned is the _LO * register and an offset must be applied to use the _HI register. * - * @core_type: The type of core - * @action: The type of action - * * Return: The register offset of the _LO register that performs an action of * type @action on a core of type @core_type. */ @@ -291,14 +295,14 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) /** * kbase_pm_invoke - Invokes an action on a core set * - * This function performs the action given by @action on a set of cores of a - * type given by @core_type. It is a static function used by - * kbase_pm_transition_core_type() - * * @kbdev: The kbase device structure of the device * @core_type: The type of core that the action should be performed on * @cores: A bit mask of cores to perform the action on (low 32 bits) * @action: The action to perform on the cores + * + * This function performs the action given by @action on a set of cores of a + * type given by @core_type. It is a static function used by + * kbase_pm_transition_core_type() */ static void kbase_pm_invoke(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, @@ -376,15 +380,15 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, /** * kbase_pm_get_state - Get information about a core set * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the should be queried + * @action: The property of the cores to query + * * This function gets information (chosen by @action) about a set of cores of * a type given by @core_type. It is a static function used by * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and * kbase_pm_get_ready_cores(). * - * @kbdev: The kbase device structure of the device - * @core_type: The type of core that the should be queried - * @action: The property of the cores to query - * * Return: A bit mask specifying the state of the cores */ static u64 kbase_pm_get_state(struct kbase_device *kbdev, @@ -753,17 +757,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) if (!kbase_pm_is_mcu_desired(kbdev)) backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; else if (kbdev->csf.firmware_hctl_core_pwr) { - /* Host control add additional Cores to be active */ - if (backend->shaders_desired_mask & ~shaders_ready) { + /* Host control scale up/down cores as needed */ + if (backend->shaders_desired_mask != shaders_ready) { backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); backend->mcu_state = KBASE_MCU_HCTL_MCU_ON_RECHECK; } - } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) - kbdev->pm.backend.mcu_state = - KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; + } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) { + backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; + } break; case KBASE_MCU_HCTL_MCU_ON_RECHECK: @@ -787,16 +791,54 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) ACTION_PWRON); backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; + + } else if (~backend->shaders_desired_mask & shaders_ready) { + kbase_csf_firmware_update_core_attr(kbdev, false, true, + backend->shaders_desired_mask); + backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND; } else { backend->mcu_state = KBASE_MCU_HCTL_SHADERS_PEND_ON; } break; + case KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: + if (kbase_csf_firmware_core_attr_updated(kbdev)) { + /* wait in queue until cores idle */ + queue_work(backend->core_idle_wq, &backend->core_idle_work); + backend->mcu_state = KBASE_MCU_HCTL_CORE_INACTIVE_PEND; + } + break; + + case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: + { + u64 active_cores = kbase_pm_get_active_cores( + kbdev, + KBASE_PM_CORE_SHADER); + u64 cores_to_disable = shaders_ready & + ~backend->shaders_desired_mask; + + if (!(cores_to_disable & active_cores)) { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + cores_to_disable, + ACTION_PWROFF); + backend->shaders_avail = backend->shaders_desired_mask; + backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; + } + } + break; + + case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND: + if (!shaders_trans && shaders_ready == backend->shaders_avail) { + /* Cores now stable */ + backend->pm_shaders_core_mask = shaders_ready; + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + } + break; + case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: if (kbase_csf_firmware_core_attr_updated(kbdev)) { - backend->shaders_avail = - backend->shaders_desired_mask; + backend->shaders_avail = backend->shaders_desired_mask; backend->mcu_state = KBASE_MCU_ON; } break; @@ -832,6 +874,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_HALT: if (kbase_csf_firmware_mcu_halted(kbdev)) { + KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL, + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); if (kbdev->csf.firmware_hctl_core_pwr) backend->mcu_state = KBASE_MCU_HCTL_SHADERS_READY_OFF; @@ -875,6 +919,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_ON_PEND_SLEEP: if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { + KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL, + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); backend->mcu_state = KBASE_MCU_IN_SLEEP; kbase_pm_enable_db_mirror_interrupt(kbdev); kbase_csf_scheduler_reval_idleness_post_sleep(kbdev); @@ -884,6 +930,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) case KBASE_MCU_IN_SLEEP: if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) { + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( + kbdev, kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_enable_mcu_db_notification(kbdev); kbase_pm_disable_db_mirror_interrupt(kbdev); backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; @@ -910,6 +958,33 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) return 0; } + +static void core_idle_worker(struct work_struct *work) +{ + struct kbase_device *kbdev = + container_of(work, struct kbase_device, pm.backend.core_idle_work); + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) { + const unsigned int core_inactive_wait_ms = 1; + u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask; + + if (!(cores_to_disable & active_cores)) { + kbase_pm_update_state(kbdev); + break; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + msleep(core_inactive_wait_ms); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} #endif static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) @@ -925,6 +1000,23 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) return strings[state]; } +#if !MALI_USE_CSF +/* On powering on the L2, the tracked kctx becomes stale and can be cleared. + * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER + * operation on the first submitted katom after the L2 powering on. + */ +static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Clear the slots' last katom submission kctx */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; +} +#endif + static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -1015,6 +1107,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present & ~1, ACTION_PWRON); + /* Clear backend slot submission kctx */ + kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); #else /* With CSF firmware, Host driver doesn't need to * handle power management with both shader and tiler cores. @@ -1217,7 +1311,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) * powered off. */ kbase_gpu_start_cache_clean_nolock( - kbdev); + kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); #if !MALI_USE_CSF KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); #else @@ -1594,10 +1688,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: - shader_poweroff_timer_queue_cancel(kbdev); + if (!backend->partial_shaderoff) + shader_poweroff_timer_queue_cancel(kbdev); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { - kbase_gpu_start_cache_clean_nolock(kbdev); + kbase_gpu_start_cache_clean_nolock( + kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); backend->shaders_state = KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; } else { @@ -1895,11 +1991,24 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; stt->configured_ticks = stt->default_ticks; +#if MALI_USE_CSF + kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!kbdev->pm.backend.core_idle_wq) { + destroy_workqueue(stt->wq); + return -ENOMEM; + } + + INIT_WORK(&kbdev->pm.backend.core_idle_work, core_idle_worker); +#endif + return 0; } void kbase_pm_state_machine_term(struct kbase_device *kbdev) { +#if MALI_USE_CSF + destroy_workqueue(kbdev->pm.backend.core_idle_wq); +#endif hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); } @@ -2419,9 +2528,9 @@ void kbase_pm_reset_done(struct kbase_device *kbdev) /** * kbase_pm_wait_for_reset - Wait for a reset to happen * - * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. - * * @kbdev: Kbase device + * + * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. */ static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) { @@ -2889,6 +2998,7 @@ exit: /** * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters + * @kbdev: The kbase device structure of the device * * Increase the count of cycle counter users and turn the cycle counters on if * they were previously off @@ -2899,8 +3009,6 @@ exit: * * When this function is called the l2 cache must be on - i.e., the GPU must be * on. - * - * @kbdev: The kbase device structure of the device */ static void kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) @@ -2918,11 +3026,13 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) /* This might happen after GPU reset. * Then counter needs to be kicked. */ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & GPU_STATUS_CYCLE_COUNT_ACTIVE)) { kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START); } +#endif } spin_unlock_irqrestore( diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index ef26c16..97e8607 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -35,18 +35,18 @@ /** * kbase_pm_dev_idle - The GPU is idle. * - * The OS may choose to turn off idle devices - * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * The OS may choose to turn off idle devices */ void kbase_pm_dev_idle(struct kbase_device *kbdev); /** * kbase_pm_dev_activate - The GPU is active. * - * The OS should avoid opportunistically turning off the GPU while it is active - * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * The OS should avoid opportunistically turning off the GPU while it is active */ void kbase_pm_dev_activate(struct kbase_device *kbdev); @@ -54,14 +54,14 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev); * kbase_pm_get_present_cores - Get details of the cores that are present in * the device. * - * This function can be called by the active power policy to return a bitmask of - * the cores (of a specified type) present in the GPU device and also a count of - * the number of cores. - * * @kbdev: The kbase device structure for the device (must be a valid * pointer) * @type: The type of core (see the enum kbase_pm_core_type enumeration) * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) present in the GPU device and also a count of + * the number of cores. + * * Return: The bit mask of cores present */ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, @@ -71,13 +71,13 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, * kbase_pm_get_active_cores - Get details of the cores that are currently * active in the device. * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are actively processing work (i.e. * turned on *and* busy). * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * * Return: The bit mask of active cores */ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, @@ -87,13 +87,13 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, * kbase_pm_get_trans_cores - Get details of the cores that are currently * transitioning between power states. * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are currently transitioning between * power states. * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * * Return: The bit mask of transitioning cores */ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, @@ -103,13 +103,13 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, * kbase_pm_get_ready_cores - Get details of the cores that are currently * powered and ready for jobs. * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are powered and ready for jobs (they may * or may not be currently executing jobs). * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * * Return: The bit mask of ready cores */ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, @@ -119,13 +119,13 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, * kbase_pm_clock_on - Turn the clock for the device on, and enable device * interrupts. * - * This function can be used by a power policy to turn the clock for the GPU on. - * It should be modified during integration to perform the necessary actions to - * ensure that the GPU is fully powered and clocked. - * * @kbdev: The kbase device structure for the device (must be a valid * pointer) * @is_resume: true if clock on due to resume after suspend, false otherwise + * + * This function can be used by a power policy to turn the clock for the GPU on. + * It should be modified during integration to perform the necessary actions to + * ensure that the GPU is fully powered and clocked. */ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); @@ -133,6 +133,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the * device off. * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * * This function can be used by a power policy to turn the clock for the GPU * off. It should be modified during integration to perform the necessary * actions to turn the clock off (if this is possible in the integration). @@ -141,9 +144,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); * then this function would usually be invoked from the runtime suspend * callback function. * - * @kbdev: The kbase device structure for the device (must be a valid - * pointer) - * * Return: true if clock was turned off, or * false if clock can not be turned off due to pending page/bus fault * workers. Caller must flush MMU workqueues and retry @@ -153,22 +153,22 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev); /** * kbase_pm_enable_interrupts - Enable interrupts on the device. * - * Interrupts are also enabled after a call to kbase_pm_clock_on(). - * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). */ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); /** * kbase_pm_disable_interrupts - Disable interrupts on the device. * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * * This prevents delivery of Power Management interrupts to the CPU so that * kbase_pm_update_state() will not be called from the IRQ handler * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. * * Interrupts are also disabled after a call to kbase_pm_clock_off(). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_disable_interrupts(struct kbase_device *kbdev); @@ -176,9 +176,9 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev); * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() * that does not take the hwaccess_lock * - * Caller must hold the hwaccess_lock. - * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Caller must hold the hwaccess_lock. */ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); @@ -197,12 +197,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); /** * kbase_pm_reset_done - The GPU has been reset successfully. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This function must be called by the GPU interrupt handler when the * RESET_COMPLETED bit is set. It signals to the power management initialization * code that the GPU has been successfully reset. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_reset_done(struct kbase_device *kbdev); @@ -210,6 +209,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); /** * kbase_pm_wait_for_desired_state - Wait for the desired power state to be * reached + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Wait for the L2 and MCU state machines to reach the states corresponding * to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'. @@ -224,8 +224,6 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * power off in progress and kbase_pm_context_active() was called instead of * kbase_csf_scheduler_pm_active(). * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * * Return: 0 on success, error code on error */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); @@ -233,6 +231,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); /** * kbase_pm_wait_for_desired_state - Wait for the desired power state to be * reached + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Wait for the L2 and shader power state machines to reach the states * corresponding to the values of 'l2_desired' and 'shaders_desired'. @@ -248,8 +247,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); * must ensure that this is not the case by, for example, calling * kbase_pm_wait_for_poweroff_work_complete() * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * * Return: 0 on success, error code on error */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); @@ -258,6 +255,8 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); /** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * * Wait for the L2 to be powered on, and for the L2 and the state machines of * its dependent stack components to stabilise. * @@ -266,8 +265,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, * because this function will take that lock itself. * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * * Return: 0 on success, error code on error */ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); @@ -276,13 +273,12 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state * machines after changing shader core * availability + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * It can be called in any status, so need to check the l2 and shader core * power status in this function or it will break shader/l2 state machine * * Caller must hold hwaccess_lock - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev); @@ -318,22 +314,21 @@ void kbase_pm_state_machine_term(struct kbase_device *kbdev); * kbase_pm_update_cores_state - Update the desired state of shader cores from * the Power Policy, and begin any power * transitions. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This function will update the desired_xx_state members of * struct kbase_pm_device_data by calling into the current Power Policy. It will * then begin power transitions to make the hardware acheive the desired shader * core state. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_update_cores_state(struct kbase_device *kbdev); /** * kbasep_pm_metrics_init - Initialize the metrics gathering framework. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This must be called before other metric gathering APIs are called. * - * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Return: 0 on success, error code on error */ @@ -341,29 +336,27 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev); /** * kbasep_pm_metrics_term - Terminate the metrics gathering framework. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This must be called when metric gathering is no longer required. It is an * error to call any metrics gathering function (other than * kbasep_pm_metrics_init()) after calling this function. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbasep_pm_metrics_term(struct kbase_device *kbdev); /** * kbase_pm_report_vsync - Function to be called by the frame buffer driver to * update the vsync metric. + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @buffer_updated: True if the buffer has been updated on this VSync, + * false otherwise * * This function should be called by the frame buffer driver to update whether * the system is hitting the vsync target or not. buffer_updated should be true * if the vsync corresponded with a new frame being displayed, otherwise it * should be false. This function does not need to be called every vsync, but * only when the value of @buffer_updated differs from a previous call. - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - * @buffer_updated: True if the buffer has been updated on this VSync, - * false otherwise */ void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); @@ -381,6 +374,7 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); /** * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is * needed + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * If the caller is the first caller then the GPU cycle counters will be enabled * along with the l2 cache @@ -388,13 +382,13 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); * The GPU must be powered when calling this function (i.e. * kbase_pm_context_active() must have been called). * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); /** * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is * needed (l2 cache already on) + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This is a version of the above function * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the @@ -405,14 +399,13 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); * The GPU must be powered when calling this function (i.e. * kbase_pm_context_active() must have been called) and the l2 cache must be * powered on. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); /** * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no * longer in use + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * If the caller is the last caller then the GPU cycle counters will be * disabled. A request must have been made before a call to this. @@ -420,18 +413,15 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); * Caller must not hold the hwaccess_lock, as it will be taken in this function. * If the caller is already holding this lock then * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); /** * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() * that does not take hwaccess_lock + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Caller must hold the hwaccess_lock. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); @@ -458,12 +448,11 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev); /** * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Setup the power management callbacks and initialize/enable the runtime-pm * for the Mali GPU platform device, using the callback function. This must be * called before the kbase_pm_register_access_enable() function. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ int kbase_pm_runtime_init(struct kbase_device *kbdev); @@ -476,6 +465,7 @@ void kbase_pm_runtime_term(struct kbase_device *kbdev); /** * kbase_pm_register_access_enable - Enable access to GPU registers + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Enables access to the GPU registers before power management has powered up * the GPU with kbase_pm_powerup(). @@ -486,13 +476,12 @@ void kbase_pm_runtime_term(struct kbase_device *kbdev); * * This should only be used before power management is powered up with * kbase_pm_powerup() - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_register_access_enable(struct kbase_device *kbdev); /** * kbase_pm_register_access_disable - Disable early register access + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Disables access to the GPU registers enabled earlier by a call to * kbase_pm_register_access_enable(). @@ -503,8 +492,6 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev); * * This should only be used before power management is powered up with * kbase_pm_powerup() - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_register_access_disable(struct kbase_device *kbdev); @@ -515,6 +502,7 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev); /** * kbase_pm_metrics_is_active - Check if the power management metrics * collection is active. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Note that this returns if the power management metrics collection was * active at the time of calling, it is possible that after the call the metrics @@ -522,7 +510,6 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev); * * The caller must handle the consequence that the state may have changed. * - * @kbdev: The kbase device structure for the device (must be a valid pointer) * Return: true if metrics collection was active else false. */ bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); @@ -558,12 +545,13 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, /** * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU * - * Function provided by platform specific code when DVFS is enabled to allow - * the power management metrics system to report utilisation. - * * @kbdev: The kbase device structure for the device (must be a * valid pointer) * @utilisation: The current calculated utilisation by the metrics system. + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * * Return: Returns 0 on failure and non zero on success. */ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); @@ -571,15 +559,15 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); /** * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU * - * Function provided by platform specific code when DVFS is enabled to allow - * the power management metrics system to report utilisation. - * * @kbdev: The kbase device structure for the device (must be a * valid pointer) * @utilisation: The current calculated utilisation by the metrics system. * @util_gl_share: The current calculated gl share of utilisation. * @util_cl_share: The current calculated cl share of utilisation per core * group. + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * * Return: Returns 0 on failure and non zero on success. */ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h index 96f196f..5e57c9d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h @@ -25,37 +25,47 @@ * this header file. This header file can be included multiple times in the * same compilation unit with different definitions of KBASEP_MCU_STATE(). * - * @OFF: The MCU is powered off. - * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with - * firmware reloading) is in progress. - * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration - * requests have been sent to the firmware. - * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now - * ready for use and hwcnt is being enabled. - * @ON: The MCU is active and hwcnt has been enabled. - * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores - * is being updated. - * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. - * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU - * halt would be triggered. - * @ON_PEND_HALT: MCU halt in progress, confirmation pending. - * @POWER_DOWN: MCU halted operations, pending being disabled. - * @PEND_OFF: MCU is being disabled, pending on powering off. - * @RESET_WAIT: The GPU is resetting, MCU state is unknown. - * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware - * have completed and shaders have been requested to - * power on. - * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being - * notified of the mask of enabled shader cores. - * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered - * and checks are done to increase the number of - * enabled cores. - * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down - * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down. - * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU - * is being put to sleep. - * @ON_PEND_SLEEP: MCU sleep is in progress. - * @IN_SLEEP: Sleep request is completed and MCU has halted. + * @OFF: The MCU is powered off. + * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with + * firmware reloading) is in progress. + * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration + * requests have been sent to the firmware. + * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now + * ready for use and hwcnt is being enabled. + * @ON: The MCU is active and hwcnt has been enabled. + * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores + * is being updated. + * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. + * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU + * halt would be triggered. + * @ON_PEND_HALT: MCU halt in progress, confirmation pending. + * @POWER_DOWN: MCU halted operations, pending being disabled. + * @PEND_OFF: MCU is being disabled, pending on powering off. + * @RESET_WAIT: The GPU is resetting, MCU state is unknown. + * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware + * have completed and shaders have been requested to + * power on. + * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being + * notified of the mask of enabled shader cores. + * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered + * and checks are done to update the number of + * enabled cores. + * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down + * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down. + * @HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: Firmware has been informed to stop using + * specific cores, due to core_mask change request. + * After the ACK from FW, the wait will be done for + * undesired cores to become inactive. + * @HCTL_CORE_INACTIVE_PEND: Waiting for specific cores to become inactive. + * Once the cores become inactive their power down + * will be initiated. + * @HCTL_SHADERS_CORE_OFF_PEND: Waiting for specific cores to complete the + * transition to power down. Once powered down, + * HW counters will be re-enabled. + * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU + * is being put to sleep. + * @ON_PEND_SLEEP: MCU sleep is in progress. + * @IN_SLEEP: Sleep request is completed and MCU has halted. */ KBASEP_MCU_STATE(OFF) KBASEP_MCU_STATE(PEND_ON_RELOAD) @@ -75,6 +85,9 @@ KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND) KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK) KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF) KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF) +KBASEP_MCU_STATE(HCTL_CORES_DOWN_SCALE_NOTIFY_PEND) +KBASEP_MCU_STATE(HCTL_CORE_INACTIVE_PEND) +KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND) /* Additional MCU states to support GPU sleep feature */ KBASEP_MCU_STATE(ON_SLEEP_INITIATE) KBASEP_MCU_STATE(ON_PEND_SLEEP) diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index 7b126a1..bc05bd7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -36,8 +36,13 @@ #include <linux/of.h> static const struct kbase_pm_policy *const all_policy_list[] = { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + &kbase_pm_always_on_policy_ops, &kbase_pm_coarse_demand_policy_ops, - &kbase_pm_always_on_policy_ops +#else /* CONFIG_MALI_NO_MALI */ + &kbase_pm_coarse_demand_policy_ops, + &kbase_pm_always_on_policy_ops, +#endif /* CONFIG_MALI_NO_MALI */ }; void kbase_pm_policy_init(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 92a366b..51812ee 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -67,6 +67,9 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, */ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + return true; +#else bool success = false; const unsigned int timeout = 100; const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); @@ -79,6 +82,7 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev) } } return success; +#endif } #endif diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c index 1ce806f..7d45a08 100644 --- a/mali_kbase/context/backend/mali_kbase_context_csf.c +++ b/mali_kbase/context/backend/mali_kbase_context_csf.c @@ -48,6 +48,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx) kbase_csf_queue_group_debugfs_init(kctx); kbase_csf_kcpu_debugfs_init(kctx); kbase_csf_tiler_heap_debugfs_init(kctx); + kbase_csf_tiler_heap_total_debugfs_init(kctx); kbase_csf_cpu_queue_debugfs_init(kctx); } KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index 85f4c0a..9eaf69a 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -163,8 +163,6 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; - mutex_init(&kctx->legacy_hwcnt_lock); - mutex_lock(&kctx->kbdev->kctx_list_lock); err = kbase_insert_kctx_to_process(kctx); diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild index 765e419..29983fb 100644 --- a/mali_kbase/csf/Kbuild +++ b/mali_kbase/csf/Kbuild @@ -33,10 +33,12 @@ mali_kbase-y += \ csf/mali_kbase_csf_kcpu_debugfs.o \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ - csf/mali_kbase_csf_cpu_queue_debugfs.o + csf/mali_kbase_csf_cpu_queue_debugfs.o \ + csf/mali_kbase_csf_event.o mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o +mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o ifeq ($(KBUILD_EXTMOD),) # in-tree diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c index ce6d546..546e18d 100644 --- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c +++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -43,7 +43,7 @@ #define COMMAND_PROTECTED_ACK ((u32)4) #define COMMAND_RESET_ACK ((u32)5) -/** +/* * Default value for the TIMER register of the IPA Control interface, * expressed in milliseconds. * @@ -53,22 +53,22 @@ */ #define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ -/** +/* * Number of timer events per second. */ #define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS) -/** +/* * Maximum number of loops polling the GPU before we assume the GPU has hung. */ #define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000) -/** +/* * Number of bits used to configure a performance counter in SELECT registers. */ #define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8) -/** +/* * Maximum value of a performance counter. */ #define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1) @@ -251,9 +251,15 @@ static inline void calc_prfcnt_delta(struct kbase_device *kbdev, delta_value *= prfcnt->scaling_factor; - if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0)) - if (prfcnt->gpu_norm) - delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate); + if (kbdev->csf.ipa_control.cur_gpu_rate == 0) { + static bool warned; + + if (!warned) { + dev_warn(kbdev->dev, "%s: GPU freq is unexpectedly 0", __func__); + warned = true; + } + } else if (prfcnt->gpu_norm) + delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate); prfcnt->latest_raw_value = raw_value; @@ -791,7 +797,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, ipa_ctrl = &kbdev->csf.ipa_control; session = (struct kbase_ipa_control_session *)client; - if (WARN_ON(!session->active)) { + if (!session->active) { dev_err(kbdev->dev, "%s: attempt to query inactive session", __func__); return -EINVAL; diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h index 348a52f..0469c48 100644 --- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h +++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h @@ -24,7 +24,7 @@ #include <mali_kbase.h> -/** +/* * Maximum index accepted to configure an IPA Control performance counter. */ #define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3) diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index 142e5a8..8b70349 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -33,30 +33,12 @@ #include "mali_kbase_csf_timeout.h" #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> #include <mali_kbase_hwaccess_time.h> +#include "mali_kbase_csf_event.h" #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) -/** - * struct kbase_csf_event - CSF event callback. - * - * This structure belongs to the list of events which is part of a Kbase - * context, and describes a callback function with a custom parameter to pass - * to it when a CSF event is signalled. - * - * @link: Link to the rest of the list. - * @kctx: Pointer to the Kbase context this event belongs to. - * @callback: Callback function to call when a CSF event is signalled. - * @param: Parameter to pass to the callback function. - */ -struct kbase_csf_event { - struct list_head link; - struct kbase_context *kctx; - kbase_csf_event_callback *callback; - void *param; -}; - const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { KBASE_QUEUE_GROUP_PRIORITY_HIGH, KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, @@ -530,24 +512,24 @@ static int csf_queue_register_internal(struct kbase_context *kctx, if (reg_ex && reg_ex->ex_buffer_size) { int buf_pages = (reg_ex->ex_buffer_size + (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; + struct kbase_va_region *region_ex = + kbase_region_tracker_find_region_enclosing_address(kctx, + reg_ex->ex_buffer_base); - region = kbase_region_tracker_find_region_enclosing_address( - kctx, reg_ex->ex_buffer_base); - if (kbase_is_region_invalid_or_free(region)) { + if (kbase_is_region_invalid_or_free(region_ex)) { ret = -ENOENT; goto out_unlock_vm; } - if (buf_pages > (region->nr_pages - - ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - - region->start_pfn))) { + if (buf_pages > (region_ex->nr_pages - + ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) { ret = -EINVAL; goto out_unlock_vm; } - region = kbase_region_tracker_find_region_enclosing_address( - kctx, reg_ex->ex_offset_var_addr); - if (kbase_is_region_invalid_or_free(region)) { + region_ex = kbase_region_tracker_find_region_enclosing_address( + kctx, reg_ex->ex_offset_var_addr); + if (kbase_is_region_invalid_or_free(region_ex)) { ret = -ENOENT; goto out_unlock_vm; } @@ -582,6 +564,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->sb_status = 0; queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; + atomic_set(&queue->pending, 0); + INIT_LIST_HEAD(&queue->link); INIT_LIST_HEAD(&queue->error.link); INIT_WORK(&queue->oom_event_work, oom_event_worker); @@ -589,6 +573,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, list_add(&queue->link, &kctx->csf.queue_list); region->flags |= KBASE_REG_NO_USER_FREE; + region->user_data = queue; /* Initialize the cs_trace configuration parameters, When buffer_size * is 0, trace is disabled. Here we only update the fields when @@ -669,8 +654,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, queue = find_queue(kctx, term->buffer_gpu_addr); if (queue) { - unsigned long flags; - /* As the GPU queue has been terminated by the * user space, undo the actions that were performed when the * queue was registered i.e. remove the queue from the per @@ -687,19 +670,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, /* After this the Userspace would be able to free the * memory for GPU queue. In case the Userspace missed * terminating the queue, the cleanup will happen on - * context termination where teardown of region tracker + * context termination where tear down of region tracker * would free up the GPU queue memory. */ queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; + queue->queue_reg->user_data = NULL; } kbase_gpu_vm_unlock(kctx); - spin_lock_irqsave(&kctx->csf.event_lock, flags); dev_dbg(kctx->kbdev->dev, "Remove any pending command queue fatal from context %pK\n", (void *)kctx); - list_del_init(&queue->error.link); - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); + kbase_csf_event_remove_error(kctx, &queue->error); release_queue(queue); } @@ -781,6 +763,48 @@ static struct kbase_queue_group *get_bound_queue_group( return group; } +/** + * pending_submission_worker() - Work item to process pending kicked GPU command queues. + * + * @work: Pointer to pending_submission_work. + * + * This function starts all pending queues, for which the work + * was previously submitted via ioctl call from application thread. + * If the queue is already scheduled and resident, it will be started + * right away, otherwise once the group is made resident. + */ +static void pending_submission_worker(struct work_struct *work) +{ + struct kbase_context *kctx = + container_of(work, struct kbase_context, csf.pending_submission_work); + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_queue *queue; + int err = kbase_reset_gpu_prevent_and_wait(kbdev); + + if (err) { + dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue "); + return; + } + + mutex_lock(&kctx->csf.lock); + + /* Iterate through the queue list and schedule the pending ones for submission. */ + list_for_each_entry(queue, &kctx->csf.queue_list, link) { + if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { + struct kbase_queue_group *group = get_bound_queue_group(queue); + + if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) + dev_dbg(kbdev->dev, "queue is not bound to a group"); + else + WARN_ON(kbase_csf_scheduler_queue_start(queue)); + } + } + + mutex_unlock(&kctx->csf.lock); + + kbase_reset_gpu_allow(kbdev); +} + void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) { if (WARN_ON(slot < 0)) @@ -846,40 +870,44 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, kbase_csf_ring_csg_doorbell(kbdev, csg_nr); } +static void enqueue_gpu_submission_work(struct kbase_context *const kctx) +{ + queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); +} + int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { struct kbase_device *kbdev = kctx->kbdev; - struct kbase_queue_group *group; - struct kbase_queue *queue; + bool trigger_submission = false; + struct kbase_va_region *region; int err = 0; - err = kbase_reset_gpu_prevent_and_wait(kbdev); - if (err) { - dev_warn( - kbdev->dev, - "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)", - kick->buffer_gpu_addr); - return err; - } - - mutex_lock(&kctx->csf.lock); - queue = find_queue(kctx, kick->buffer_gpu_addr); - if (!queue) - err = -EINVAL; + /* GPU work submission happening asynchronously to prevent the contention with + * scheduler lock and as the result blocking application thread. For this reason, + * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr + * from the context list of active va_regions. + * Once the target queue is found the pending flag is set to one atomically avoiding + * a race between submission ioctl thread and the work item. + */ + kbase_gpu_vm_lock(kctx); + region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr); + if (!kbase_is_region_invalid_or_free(region)) { + struct kbase_queue *queue = region->user_data; - if (!err) { - group = get_bound_queue_group(queue); - if (!group) { - dev_err(kctx->kbdev->dev, "queue not bound\n"); - err = -EINVAL; + if (queue) { + atomic_cmpxchg(&queue->pending, 0, 1); + trigger_submission = true; } + } else { + dev_dbg(kbdev->dev, + "Attempt to kick GPU queue without a valid command buffer region"); + err = -EFAULT; } + kbase_gpu_vm_unlock(kctx); - if (!err) - err = kbase_csf_scheduler_queue_start(queue); - mutex_unlock(&kctx->csf.lock); - kbase_reset_gpu_allow(kbdev); + if (likely(trigger_submission)) + enqueue_gpu_submission_work(kctx); return err; } @@ -1310,6 +1338,7 @@ static int create_queue_group(struct kbase_context *const kctx, group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; group->faulted = false; + group->group_uid = generate_group_uid(); create->out.group_uid = group->group_uid; @@ -1343,6 +1372,7 @@ static int create_queue_group(struct kbase_context *const kctx, return group_handle; } + int kbase_csf_queue_group_create(struct kbase_context *const kctx, union kbase_ioctl_cs_queue_group_create *const create) { @@ -1368,6 +1398,9 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, "No CSG has at least %d CSs", create->in.cs_min); err = -EINVAL; + } else if (create->in.reserved) { + dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0"); + err = -EINVAL; } else { /* For the CSG which satisfies the condition for having * the needed number of CSs, check whether it also conforms @@ -1517,6 +1550,19 @@ static void cancel_queue_group_events(struct kbase_queue_group *group) cancel_work_sync(&group->protm_event_work); } +static void remove_pending_group_fatal_error(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + + dev_dbg(kctx->kbdev->dev, + "Remove any pending group fatal error from context %pK\n", + (void *)group->kctx); + + kbase_csf_event_remove_error(kctx, &group->error_tiler_oom); + kbase_csf_event_remove_error(kctx, &group->error_timeout); + kbase_csf_event_remove_error(kctx, &group->error_fatal); +} + void kbase_csf_queue_group_terminate(struct kbase_context *kctx, u8 group_handle) { @@ -1539,19 +1585,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, group = find_queue_group(kctx, group_handle); if (group) { - unsigned long flags; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - - dev_dbg(kbdev->dev, - "Remove any pending group fatal error from context %pK\n", - (void *)group->kctx); - - list_del_init(&group->error_tiler_oom.link); - list_del_init(&group->error_timeout.link); - list_del_init(&group->error_fatal.link); - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); - + remove_pending_group_fatal_error(group); term_queue_group(group); kctx->csf.queue_groups[group_handle] = NULL; } @@ -1603,48 +1637,6 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx, return err; } -/** - * add_error() - Add an error to the list of errors to report to user space - * - * @kctx: Address of a base context associated with a GPU address space. - * @error: Address of the item to be added to the context's pending error list. - * @data: Error data to be returned to userspace. - * - * Does not wake up the event queue blocking a user thread in kbase_poll. This - * is to make it more efficient to add multiple errors. - * - * The added error must not already be on the context's list of errors waiting - * to be reported (e.g. because a previous error concerning the same object has - * not yet been reported). - */ -static void add_error(struct kbase_context *const kctx, - struct kbase_csf_notification *const error, - struct base_csf_notification const *const data) -{ - unsigned long flags; - - if (WARN_ON(!kctx)) - return; - - if (WARN_ON(!error)) - return; - - if (WARN_ON(!data)) - return; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - - if (!WARN_ON(!list_empty(&error->link))) { - error->data = *data; - list_add_tail(&error->link, &kctx->csf.error_list); - dev_dbg(kctx->kbdev->dev, - "Added error %pK of type %d in context %pK\n", - (void *)error, data->type, (void *)kctx); - } - - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); -} - void kbase_csf_add_group_fatal_error( struct kbase_queue_group *const group, struct base_gpu_queue_group_error const *const err_payload) @@ -1667,7 +1659,7 @@ void kbase_csf_add_group_fatal_error( } }; - add_error(group->kctx, &group->error_fatal, &error); + kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); } void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, @@ -1708,12 +1700,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) struct kbase_device *kbdev = kctx->kbdev; int err = -ENOMEM; - INIT_LIST_HEAD(&kctx->csf.event_callback_list); INIT_LIST_HEAD(&kctx->csf.queue_list); INIT_LIST_HEAD(&kctx->csf.link); - INIT_LIST_HEAD(&kctx->csf.error_list); - spin_lock_init(&kctx->csf.event_lock); + kbase_csf_event_init(kctx); + kctx->csf.user_reg_vma = NULL; mutex_lock(&kbdev->pm.lock); /* The inode information for /dev/malixx file is not available at the @@ -1744,9 +1735,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) if (likely(!err)) { err = kbase_csf_tiler_heap_context_init(kctx); - if (likely(!err)) + if (likely(!err)) { mutex_init(&kctx->csf.lock); - else + INIT_WORK(&kctx->csf.pending_submission_work, + pending_submission_worker); + } else kbase_csf_kcpu_queue_context_term(kctx); } @@ -1829,7 +1822,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * for queue groups & kcpu queues, hence no need to explicitly remove * those debugfs files. */ - kbase_csf_event_wait_remove_all(kctx); /* Wait for a GPU reset if it is happening, prevent it if not happening */ err = kbase_reset_gpu_prevent_and_wait(kbdev); @@ -1841,13 +1833,20 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) else reset_prevented = true; + cancel_work_sync(&kctx->csf.pending_submission_work); + mutex_lock(&kctx->csf.lock); + /* Iterate through the queue groups that were not terminated by * userspace and issue the term request to firmware for them. */ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { - if (kctx->csf.queue_groups[i]) - term_queue_group(kctx->csf.queue_groups[i]); + struct kbase_queue_group *group = kctx->csf.queue_groups[i]; + + if (group) { + remove_pending_group_fatal_error(group); + term_queue_group(group); + } } mutex_unlock(&kctx->csf.lock); @@ -1910,185 +1909,19 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) kbase_csf_tiler_heap_context_term(kctx); kbase_csf_kcpu_queue_context_term(kctx); kbase_csf_scheduler_context_term(kctx); + kbase_csf_event_term(kctx); mutex_destroy(&kctx->csf.lock); } -int kbase_csf_event_wait_add(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param) -{ - int err = -ENOMEM; - struct kbase_csf_event *event = - kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL); - - if (event) { - unsigned long flags; - - event->kctx = kctx; - event->callback = callback; - event->param = param; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - list_add_tail(&event->link, &kctx->csf.event_callback_list); - dev_dbg(kctx->kbdev->dev, - "Added event handler %pK with param %pK\n", event, - event->param); - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); - - err = 0; - } - - return err; -} - -void kbase_csf_event_wait_remove(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param) -{ - struct kbase_csf_event *event; - unsigned long flags; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - - list_for_each_entry(event, &kctx->csf.event_callback_list, link) { - if ((event->callback == callback) && (event->param == param)) { - list_del(&event->link); - dev_dbg(kctx->kbdev->dev, - "Removed event handler %pK with param %pK\n", - event, event->param); - kfree(event); - break; - } - } - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); -} - -bool kbase_csf_read_error(struct kbase_context *kctx, - struct base_csf_notification *event_data) -{ - bool got_event = true; - struct kbase_csf_notification *error_data = NULL; - unsigned long flags; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - - if (likely(!list_empty(&kctx->csf.error_list))) { - error_data = list_first_entry(&kctx->csf.error_list, - struct kbase_csf_notification, link); - list_del_init(&error_data->link); - *event_data = error_data->data; - dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", - (void *)error_data, (void *)kctx); - } else { - got_event = false; - } - - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); - - return got_event; -} - -bool kbase_csf_error_pending(struct kbase_context *kctx) -{ - bool event_pended = false; - unsigned long flags; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - event_pended = !list_empty(&kctx->csf.error_list); - dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", - event_pended ? "An" : "No", (void *)kctx); - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); - - return event_pended; -} - -static void sync_update_notify_gpu(struct kbase_context *kctx) -{ - bool can_notify_gpu; - unsigned long flags; - - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered; -#ifdef KBASE_PM_RUNTIME - if (kctx->kbdev->pm.backend.gpu_sleep_mode_active) - can_notify_gpu = false; -#endif - - if (can_notify_gpu) { - kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); - KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u); - } - - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); -} - -void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) -{ - struct kbase_csf_event *event, *next_event; - unsigned long flags; - - dev_dbg(kctx->kbdev->dev, - "Signal event (%s GPU notify) for context %pK\n", - notify_gpu ? "with" : "without", (void *)kctx); - - /* First increment the signal count and wake up event thread. - */ - atomic_set(&kctx->event_count, 1); - kbase_event_wakeup(kctx); - - /* Signal the CSF firmware. This is to ensure that pending command - * stream synch object wait operations are re-evaluated. - * Write to GLB_DOORBELL would suffice as spec says that all pending - * synch object wait operations are re-evaluated on a write to any - * CS_DOORBELL/GLB_DOORBELL register. - */ - if (notify_gpu) - sync_update_notify_gpu(kctx); - - /* Now invoke the callbacks registered on backend side. - * Allow item removal inside the loop, if requested by the callback. - */ - spin_lock_irqsave(&kctx->csf.event_lock, flags); - - list_for_each_entry_safe( - event, next_event, &kctx->csf.event_callback_list, link) { - enum kbase_csf_event_callback_action action; - - dev_dbg(kctx->kbdev->dev, - "Calling event handler %pK with param %pK\n", - (void *)event, event->param); - action = event->callback(event->param); - if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { - list_del(&event->link); - kfree(event); - } - } - - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); -} - -void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) -{ - struct kbase_csf_event *event, *next_event; - unsigned long flags; - - spin_lock_irqsave(&kctx->csf.event_lock, flags); - - list_for_each_entry_safe( - event, next_event, &kctx->csf.event_callback_list, link) { - list_del(&event->link); - dev_dbg(kctx->kbdev->dev, - "Removed event handler %pK with param %pK\n", - (void *)event, event->param); - kfree(event); - } - - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); -} - /** * handle_oom_event - Handle the OoM event generated by the firmware for the * CSI. * + * @kctx: Pointer to the kbase context in which the tiler heap was initialized. + * @stream: Pointer to the structure containing info provided by the firmware + * about the CSI. + * * This function will handle the OoM event request from the firmware for the * CS. It will retrieve the address of heap context and heap's * statistics (like number of render passes in-flight) from the CS's kernel @@ -2097,10 +1930,6 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) * It will also update the CS's kernel input page with the address * of a new chunk that was allocated. * - * @kctx: Pointer to the kbase context in which the tiler heap was initialized. - * @stream: Pointer to the structure containing info provided by the firmware - * about the CSI. - * * Return: 0 if successfully handled the request, otherwise a negative error * code on failure. */ @@ -2171,7 +2000,9 @@ static void report_tiler_oom_error(struct kbase_queue_group *group) BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, } } } }; - add_error(group->kctx, &group->error_tiler_oom, &error); + kbase_csf_event_add_error(group->kctx, + &group->error_tiler_oom, + &error); kbase_event_wakeup(group->kctx); } @@ -2316,7 +2147,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group) "Notify the event notification thread, forward progress timeout (%llu cycles)\n", kbase_csf_timeout_get(group->kctx->kbdev)); - add_error(group->kctx, &group->error_timeout, &error); + kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error); kbase_event_wakeup(group->kctx); } @@ -2452,7 +2283,7 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, } }; - add_error(queue->kctx, &queue->error, &error); + kbase_csf_event_add_error(queue->kctx, &queue->error, &error); kbase_event_wakeup(queue->kctx); } @@ -3008,6 +2839,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { int non_idle_offslot_grps; bool can_suspend_on_idle; + dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); kbase_csf_firmware_global_input_mask( global_iface, GLB_REQ, glb_ack, diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index 640d2ed..e3db81d 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -26,6 +26,7 @@ #include "mali_kbase_csf_scheduler.h" #include "mali_kbase_csf_firmware.h" #include "mali_kbase_csf_protected_memory.h" +#include "mali_kbase_hwaccess_time.h" /* Indicate invalid CS h/w interface */ @@ -47,129 +48,6 @@ #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) /** - * enum kbase_csf_event_callback_action - return type for CSF event callbacks. - * - * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly. - * It doesn't correspond to any action or type of event callback. - * - * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered. - * - * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed - * immediately upon return. - * - * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly. - * It doesn't correspond to any action or type of event callback. - */ -enum kbase_csf_event_callback_action { - KBASE_CSF_EVENT_CALLBACK_FIRST = 0, - KBASE_CSF_EVENT_CALLBACK_KEEP, - KBASE_CSF_EVENT_CALLBACK_REMOVE, - KBASE_CSF_EVENT_CALLBACK_LAST, -}; - -/** - * kbase_csf_event_callback_action - type for callback functions to be - * called upon CSF events. - * - * This is the type of callback functions that can be registered - * for CSF events. These function calls shall be triggered by any call - * to kbase_csf_event_signal. - * - * @param: Generic parameter to pass to the callback function. - * - * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain - * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed. - */ -typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param); - -/** - * kbase_csf_event_wait_add - Add a CSF event callback - * - * This function adds an event callback to the list of CSF event callbacks - * belonging to a given Kbase context, to be triggered when a CSF event is - * signalled by kbase_csf_event_signal. - * - * @kctx: The Kbase context the @callback should be registered to. - * @callback: The callback function to register. - * @param: Custom parameter to be passed to the @callback function. - * - * Return: 0 on success, or negative on failure. - */ -int kbase_csf_event_wait_add(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param); - -/** - * kbase_csf_event_wait_remove - Remove a CSF event callback - * - * This function removes an event callback from the list of CSF event callbacks - * belonging to a given Kbase context. - * - * @kctx: The kbase context the @callback should be removed from. - * @callback: The callback function to remove. - * @param: Custom parameter that would have been passed to the @p callback - * function. - */ -void kbase_csf_event_wait_remove(struct kbase_context *kctx, - kbase_csf_event_callback *callback, void *param); - -/** - * kbase_csf_event_wait_remove_all - Removes all CSF event callbacks - * - * This function empties the list of CSF event callbacks belonging to a given - * Kbase context. - * - * @kctx: The kbase context for which CSF event callbacks have to be removed. - */ -void kbase_csf_event_wait_remove_all(struct kbase_context *kctx); - -/** - * kbase_csf_read_error - Read CS fatal error - * - * This function takes the CS fatal error from context's ordered - * error_list, copies its contents to @event_data. - * - * @kctx: The kbase context to read fatal error from - * @event_data: Caller-provided buffer to copy the fatal error to - * - * Return: true if fatal error is read successfully. - */ -bool kbase_csf_read_error(struct kbase_context *kctx, - struct base_csf_notification *event_data); - -/** - * kbase_csf_error_pending - Check whether fatal error is pending - * - * @kctx: The kbase context to check fatal error upon. - * - * Return: true if fatal error is pending. - */ -bool kbase_csf_error_pending(struct kbase_context *kctx); - -/** - * kbase_csf_event_signal - Signal a CSF event - * - * This function triggers all the CSF event callbacks that are registered to - * a given Kbase context, and also signals the event handling thread of - * userspace driver waiting for the CSF event. - * - * @kctx: The kbase context whose CSF event callbacks shall be triggered. - * @notify_gpu: Flag to indicate if CSF firmware should be notified of the - * signaling of event that happened on the Driver side, either - * the signal came from userspace or from kcpu queues. - */ -void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu); - -static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx) -{ - kbase_csf_event_signal(kctx, true); -} - -static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx) -{ - kbase_csf_event_signal(kctx, false); -} - -/** * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space. * * @kctx: Pointer to the kbase context which is being initialized. @@ -182,11 +60,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx); * kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon * GPU bus fault, MMU page fault or similar. * - * This function terminates all GPU command queue groups in the context and - * notifies the event notification thread of the fault. - * * @kctx: Pointer to faulty kbase context. * @fault: Pointer to the fault. + * + * This function terminates all GPU command queue groups in the context and + * notifies the event notification thread of the fault. */ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, struct kbase_fault *fault); @@ -194,10 +72,10 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, /** * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. * + * @kctx: Pointer to the kbase context which is being terminated. + * * This function terminates any remaining CSGs and CSs which weren't destroyed * before context termination. - * - * @kctx: Pointer to the kbase context which is being terminated. */ void kbase_csf_ctx_term(struct kbase_context *kctx); @@ -246,14 +124,14 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a * GPU command queue. * - * This function allocates a pair of User mode input/output pages for a - * GPU command queue and maps them in the shared interface segment of MCU - * firmware address space. Also reserves a hardware doorbell page for the queue. - * * @kctx: Pointer to the kbase context within which the resources * for the queue are being allocated. * @queue: Pointer to the queue for which to allocate resources. * + * This function allocates a pair of User mode input/output pages for a + * GPU command queue and maps them in the shared interface segment of MCU + * firmware address space. Also reserves a hardware doorbell page for the queue. + * * Return: 0 on success, or negative on failure. */ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, @@ -294,9 +172,9 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); /** * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware * - * @kctx: The kbase context. - * @kick: Pointer to the struct which specifies the queue - * that needs to be scheduled. + * @kctx: The kbase context. + * @kick: Pointer to the struct which specifies the queue + * that needs to be scheduled. * * Return: 0 on success, or negative on failure. */ @@ -307,12 +185,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle * is valid. * - * This function is used to determine if the queue group handle is valid. - * * @kctx: The kbase context under which the queue group exists. * @group_handle: Handle for the group which uniquely identifies it within * the context with which it was created. * + * This function is used to determine if the queue group handle is valid. + * * Return: 0 on success, or negative on failure. */ int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, @@ -359,8 +237,6 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); /** * kbase_csf_queue_group_suspend - Suspend a GPU command queue group * - * This function is used to suspend a queue group and copy the suspend buffer. - * * @kctx: The kbase context for which the queue group is to be * suspended. * @sus_buf: Pointer to the structure which contains details of the @@ -368,6 +244,8 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); * @group_handle: Handle for the group which uniquely identifies it within * the context within which it was created. * + * This function is used to suspend a queue group and copy the suspend buffer. + * * Return: 0 on success or negative value if failed to suspend * queue group and copy suspend buffer contents. */ @@ -397,12 +275,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); * the update of userspace mapping of HW * doorbell page. * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * * The function creates a file and allocates a dummy page to facilitate the * update of userspace mapping to point to the dummy page instead of the real * HW doorbell page after the suspend of queue group. * - * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * * Return: 0 on success, or negative on failure. */ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev); @@ -420,14 +298,14 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev); * instead of the User register page after * the GPU power down. * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * * The function allocates a dummy page which is used to replace the User * register page in the userspace mapping after the power down of GPU. * On the power up of GPU, the mapping is updated to point to the real * User register page. The mapping is used to allow access to LATEST_FLUSH * register from userspace. * - * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * * Return: 0 on success, or negative on failure. */ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); @@ -443,10 +321,10 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); /** * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. * - * The function kicks a notification on the CSG interface to firmware. - * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @slot: Index of CSG interface for ringing the door-bell. + * + * The function kicks a notification on the CSG interface to firmware. */ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); @@ -454,10 +332,10 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG * interfaces. * - * The function kicks a notification on a set of CSG interfaces to firmware. - * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc. + * + * The function kicks a notification on a set of CSG interfaces to firmware. */ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, u32 slot_bitmap); @@ -466,9 +344,6 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI * assigned to a GPU queue * - * The function sends a doorbell interrupt notification to the firmware for - * a CSI assigned to a GPU queue. - * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @csi_index: ID of the CSI assigned to the GPU queue. * @csg_nr: Index of the CSG slot assigned to the queue @@ -479,6 +354,9 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, * The flag is supposed be false only when the input page * for bound GPU queues is programmed at the time of * starting/resuming the group on a CSG slot. + * + * The function sends a doorbell interrupt notification to the firmware for + * a CSI assigned to a GPU queue. */ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index, int csg_nr, @@ -488,11 +366,11 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a * queue. * - * The function kicks a notification to the firmware on the doorbell assigned - * to the queue. - * * @kbdev: Instance of a GPU platform device that implements a CSF interface. * @queue: Pointer to the queue for ringing the door-bell. + * + * The function kicks a notification to the firmware on the doorbell assigned + * to the queue. */ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, struct kbase_queue *queue); @@ -563,5 +441,23 @@ static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority return kbasep_csf_queue_group_priority_to_relative[priority]; } - +/** + * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter + * value for Ktrace purpose. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function is just a wrapper to retreive the GPU cycle counter value, to + * avoid any overhead on Release builds where Ktrace is disabled by default. + * + * Return: Snapshot of the GPU cycle count register. + */ +static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) +{ +#if KBASE_KTRACE_ENABLE + return kbase_backend_get_cycle_cnt(kbdev); +#else + return 0; +#endif +} #endif /* _KBASE_CSF_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index de471eb..0712648 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -30,6 +30,7 @@ #include <linux/wait.h> #include "mali_kbase_csf_firmware.h" +#include "mali_kbase_csf_event.h" /* Maximum number of KCPU command queues to be created per GPU address space. */ @@ -331,6 +332,7 @@ struct kbase_csf_notification { * queue. * @cs_fatal_info: Records additional information about the CS fatal event. * @cs_fatal: Records information about the CS fatal event. + * @pending: Indicating whether the queue has new submitted work. */ struct kbase_queue { struct kbase_context *kctx; @@ -364,6 +366,7 @@ struct kbase_queue { struct work_struct fatal_event_work; u64 cs_fatal_info; u32 cs_fatal; + atomic_t pending; }; /** @@ -487,6 +490,7 @@ struct kbase_queue_group { struct kbase_csf_notification error_tiler_oom; struct work_struct timer_event_work; + }; /** @@ -538,10 +542,6 @@ struct kbase_csf_cpu_queue_context { /** * struct kbase_csf_heap_context_allocator - Allocator of heap contexts * - * Heap context structures are allocated by the kernel for use by the firmware. - * The current implementation subdivides a single GPU memory region for use as - * a sparse array. - * * @kctx: Pointer to the kbase context with which this allocator is * associated. * @region: Pointer to a GPU memory region from which heap context structures @@ -552,6 +552,10 @@ struct kbase_csf_cpu_queue_context { * @lock: Lock preventing concurrent access to the @in_use bitmap. * @in_use: Bitmap that indicates which heap context structures are currently * allocated (in @region). + * + * Heap context structures are allocated by the kernel for use by the firmware. + * The current implementation subdivides a single GPU memory region for use as + * a sparse array. */ struct kbase_csf_heap_context_allocator { struct kbase_context *kctx; @@ -565,10 +569,6 @@ struct kbase_csf_heap_context_allocator { * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps * context for a GPU address space. * - * This contains all of the CSF state relating to chunked tiler heaps for one - * @kbase_context. It is not the same as a heap context structure allocated by - * the kernel for use by the firmware. - * * @lock: Lock to prevent the concurrent access to tiler heaps (after the * initialization), a tiler heap can be terminated whilst an OoM * event is being handled for it. @@ -576,6 +576,10 @@ struct kbase_csf_heap_context_allocator { * @ctx_alloc: Allocator for heap context structures. * @nr_of_heaps: Total number of tiler heaps that were added during the * life time of the context. + * + * This contains all of the CSF state relating to chunked tiler heaps for one + * @kbase_context. It is not the same as a heap context structure allocated by + * the kernel for use by the firmware. */ struct kbase_csf_tiler_heap_context { struct mutex lock; @@ -617,6 +621,43 @@ struct kbase_csf_scheduler_context { }; /** + * enum kbase_csf_event_callback_action - return type for CSF event callbacks. + * + * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly. + * It doesn't correspond to any action or type of event callback. + * + * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered. + * + * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed + * immediately upon return. + * + * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly. + * It doesn't correspond to any action or type of event callback. + */ +enum kbase_csf_event_callback_action { + KBASE_CSF_EVENT_CALLBACK_FIRST = 0, + KBASE_CSF_EVENT_CALLBACK_KEEP, + KBASE_CSF_EVENT_CALLBACK_REMOVE, + KBASE_CSF_EVENT_CALLBACK_LAST, +}; + +/** + * struct kbase_csf_event - Object representing CSF event and error + * + * @callback_list: List of callbacks which are registered to serve CSF + * events. + * @error_list: List for CS fatal errors in CSF context. + * Link of fatal error is &struct_kbase_csf_notification.link. + * @lock: Lock protecting access to @callback_list and + * @error_list. + */ +struct kbase_csf_event { + struct list_head callback_list; + struct list_head error_list; + spinlock_t lock; +}; + +/** * struct kbase_csf_context - Object representing CSF for a GPU address space. * * @event_pages_head: A list of pages allocated for the event memory used by @@ -647,10 +688,7 @@ struct kbase_csf_scheduler_context { * userspace mapping created for them on bind operation * hasn't been removed. * @kcpu_queues: Kernel CPU command queues. - * @event_lock: Lock protecting access to @event_callback_list and - * @error_list. - * @event_callback_list: List of callbacks which are registered to serve CSF - * events. + * @event: CSF event object. * @tiler_heaps: Chunked tiler memory heaps. * @wq: Dedicated workqueue to process work items corresponding * to the OoM events raised for chunked tiler heaps being @@ -661,10 +699,7 @@ struct kbase_csf_scheduler_context { * of the USER register page. Currently used only for sanity * checking. * @sched: Object representing the scheduler's context - * @error_list: List for CS fatal errors in this context. - * Link of fatal error is - * &struct_kbase_csf_notification.link. - * @event_lock needs to be held to access this list. + * @pending_submission_work: Work item to process pending kicked GPU command queues. * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. */ @@ -677,14 +712,13 @@ struct kbase_csf_context { struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM]; struct list_head queue_list; struct kbase_csf_kcpu_queue_context kcpu_queues; - spinlock_t event_lock; - struct list_head event_callback_list; + struct kbase_csf_event event; struct kbase_csf_tiler_heap_context tiler_heaps; struct workqueue_struct *wq; struct list_head link; struct vm_area_struct *user_reg_vma; struct kbase_csf_scheduler_context sched; - struct list_head error_list; + struct work_struct pending_submission_work; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_cpu_queue_context cpu_queue; #endif @@ -882,12 +916,12 @@ struct kbase_csf_scheduler { bool tick_timer_active; }; -/** +/* * Number of GPU cycles per unit of the global progress timeout. */ #define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024) -/** +/* * Maximum value of the global progress timeout. */ #define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ @@ -895,12 +929,12 @@ struct kbase_csf_scheduler { GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ GLB_PROGRESS_TIMER_TIMEOUT_SCALE) -/** +/* * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. */ #define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) -/** +/* * In typical operations, the management of the shader core power transitions * is delegated to the MCU/firmware. However, if the host driver is configured * to take direct control, one needs to disable the MCU firmware GLB_PWROFF @@ -911,7 +945,7 @@ struct kbase_csf_scheduler { /* Index of the GPU_ACTIVE counter within the CSHW counter block */ #define GPU_ACTIVE_CNT_IDX (4) -/** +/* * Maximum number of sessions that can be managed by the IPA Control component. */ #if MALI_UNIT_TEST @@ -937,13 +971,13 @@ enum kbase_ipa_core_type { KBASE_IPA_CORE_TYPE_NUM }; -/** +/* * Number of configurable counters per type of block on the IPA Control * interface. */ #define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8) -/** +/* * Total number of configurable counters existing on the IPA Control interface. */ #define KBASE_IPA_CONTROL_MAX_COUNTERS \ diff --git a/mali_kbase/csf/mali_kbase_csf_event.c b/mali_kbase/csf/mali_kbase_csf_event.c new file mode 100644 index 0000000..5c86688 --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_event.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#include <mali_kbase.h> +#include "mali_kbase_csf_event.h" + +/** + * struct kbase_csf_event_cb - CSF event callback. + * + * @link: Link to the rest of the list. + * @kctx: Pointer to the Kbase context this event belongs to. + * @callback: Callback function to call when a CSF event is signalled. + * @param: Parameter to pass to the callback function. + * + * This structure belongs to the list of events which is part of a Kbase + * context, and describes a callback function with a custom parameter to pass + * to it when a CSF event is signalled. + */ +struct kbase_csf_event_cb { + struct list_head link; + struct kbase_context *kctx; + kbase_csf_event_callback *callback; + void *param; +}; + +int kbase_csf_event_wait_add(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param) +{ + int err = -ENOMEM; + struct kbase_csf_event_cb *event_cb = + kzalloc(sizeof(struct kbase_csf_event_cb), GFP_KERNEL); + + if (event_cb) { + unsigned long flags; + + event_cb->kctx = kctx; + event_cb->callback = callback; + event_cb->param = param; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + list_add_tail(&event_cb->link, &kctx->csf.event.callback_list); + dev_dbg(kctx->kbdev->dev, + "Added event handler %pK with param %pK\n", event_cb, + event_cb->param); + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); + + err = 0; + } + + return err; +} + +void kbase_csf_event_wait_remove(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param) +{ + struct kbase_csf_event_cb *event_cb; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + + list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) { + if ((event_cb->callback == callback) && (event_cb->param == param)) { + list_del(&event_cb->link); + dev_dbg(kctx->kbdev->dev, + "Removed event handler %pK with param %pK\n", + event_cb, event_cb->param); + kfree(event_cb); + break; + } + } + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + +static void sync_update_notify_gpu(struct kbase_context *kctx) +{ + bool can_notify_gpu; + unsigned long flags; + + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered; +#ifdef KBASE_PM_RUNTIME + if (kctx->kbdev->pm.backend.gpu_sleep_mode_active) + can_notify_gpu = false; +#endif + + if (can_notify_gpu) { + kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); + KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u); + } + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +} + +void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) +{ + struct kbase_csf_event_cb *event_cb, *next_event_cb; + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, + "Signal event (%s GPU notify) for context %pK\n", + notify_gpu ? "with" : "without", (void *)kctx); + + /* First increment the signal count and wake up event thread. + */ + atomic_set(&kctx->event_count, 1); + kbase_event_wakeup(kctx); + + /* Signal the CSF firmware. This is to ensure that pending command + * stream synch object wait operations are re-evaluated. + * Write to GLB_DOORBELL would suffice as spec says that all pending + * synch object wait operations are re-evaluated on a write to any + * CS_DOORBELL/GLB_DOORBELL register. + */ + if (notify_gpu) + sync_update_notify_gpu(kctx); + + /* Now invoke the callbacks registered on backend side. + * Allow item removal inside the loop, if requested by the callback. + */ + spin_lock_irqsave(&kctx->csf.event.lock, flags); + + list_for_each_entry_safe( + event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { + enum kbase_csf_event_callback_action action; + + dev_dbg(kctx->kbdev->dev, + "Calling event handler %pK with param %pK\n", + (void *)event_cb, event_cb->param); + action = event_cb->callback(event_cb->param); + if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { + list_del(&event_cb->link); + kfree(event_cb); + } + } + + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + +void kbase_csf_event_term(struct kbase_context *kctx) +{ + struct kbase_csf_event_cb *event_cb, *next_event_cb; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + + list_for_each_entry_safe( + event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { + list_del(&event_cb->link); + dev_warn(kctx->kbdev->dev, + "Removed event handler %pK with param %pK\n", + (void *)event_cb, event_cb->param); + kfree(event_cb); + } + + WARN_ON(!list_empty(&kctx->csf.event.error_list)); + + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + +void kbase_csf_event_init(struct kbase_context *const kctx) +{ + INIT_LIST_HEAD(&kctx->csf.event.callback_list); + INIT_LIST_HEAD(&kctx->csf.event.error_list); + spin_lock_init(&kctx->csf.event.lock); +} + +void kbase_csf_event_remove_error(struct kbase_context *kctx, + struct kbase_csf_notification *error) +{ + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + list_del_init(&error->link); + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + +bool kbase_csf_event_read_error(struct kbase_context *kctx, + struct base_csf_notification *event_data) +{ + struct kbase_csf_notification *error_data = NULL; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + if (likely(!list_empty(&kctx->csf.event.error_list))) { + error_data = list_first_entry(&kctx->csf.event.error_list, + struct kbase_csf_notification, link); + list_del_init(&error_data->link); + *event_data = error_data->data; + dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", + (void *)error_data, (void *)kctx); + } + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); + return !!error_data; +} + +void kbase_csf_event_add_error(struct kbase_context *const kctx, + struct kbase_csf_notification *const error, + struct base_csf_notification const *const data) +{ + unsigned long flags; + + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(!error)) + return; + + if (WARN_ON(!data)) + return; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + if (!WARN_ON(!list_empty(&error->link))) { + error->data = *data; + list_add_tail(&error->link, &kctx->csf.event.error_list); + dev_dbg(kctx->kbdev->dev, + "Added error %pK of type %d in context %pK\n", + (void *)error, data->type, (void *)kctx); + } + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + +bool kbase_csf_event_error_pending(struct kbase_context *kctx) +{ + bool error_pending = false; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event.lock, flags); + error_pending = !list_empty(&kctx->csf.event.error_list); + + dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", + error_pending ? "An" : "No", (void *)kctx); + + spin_unlock_irqrestore(&kctx->csf.event.lock, flags); + + return error_pending; +} diff --git a/mali_kbase/csf/mali_kbase_csf_event.h b/mali_kbase/csf/mali_kbase_csf_event.h new file mode 100644 index 0000000..1270ef6 --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_event.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_EVENT_H_ +#define _KBASE_CSF_EVENT_H_ + +#include <linux/types.h> +#include <linux/wait.h> + +struct kbase_context; +struct kbase_csf_event; +enum kbase_csf_event_callback_action; + +/** + * kbase_csf_event_callback_action - type for callback functions to be + * called upon CSF events. + * @param: Generic parameter to pass to the callback function. + * + * This is the type of callback functions that can be registered + * for CSF events. These function calls shall be triggered by any call + * to kbase_csf_event_signal. + * + * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain + * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed. + */ +typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param); + +/** + * kbase_csf_event_wait_add - Add a CSF event callback + * + * @kctx: The Kbase context the @callback should be registered to. + * @callback: The callback function to register. + * @param: Custom parameter to be passed to the @callback function. + * + * This function adds an event callback to the list of CSF event callbacks + * belonging to a given Kbase context, to be triggered when a CSF event is + * signalled by kbase_csf_event_signal. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_event_wait_add(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param); + +/** + * kbase_csf_event_wait_remove - Remove a CSF event callback + * + * @kctx: The kbase context the @callback should be removed from. + * @callback: The callback function to remove. + * @param: Custom parameter that would have been passed to the @p callback + * function. + * + * This function removes an event callback from the list of CSF event callbacks + * belonging to a given Kbase context. + */ +void kbase_csf_event_wait_remove(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param); + +/** + * kbase_csf_event_term - Removes all CSF event callbacks + * + * @kctx: The kbase context for which CSF event callbacks have to be removed. + * + * This function empties the list of CSF event callbacks belonging to a given + * Kbase context. + */ +void kbase_csf_event_term(struct kbase_context *kctx); + +/** + * kbase_csf_event_signal - Signal a CSF event + * + * @kctx: The kbase context whose CSF event callbacks shall be triggered. + * @notify_gpu: Flag to indicate if CSF firmware should be notified of the + * signaling of event that happened on the Driver side, either + * the signal came from userspace or from kcpu queues. + * + * This function triggers all the CSF event callbacks that are registered to + * a given Kbase context, and also signals the event handling thread of + * userspace driver waiting for the CSF event. + */ +void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu); + +static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx) +{ + kbase_csf_event_signal(kctx, true); +} + +static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx) +{ + kbase_csf_event_signal(kctx, false); +} + +/** + * kbase_csf_event_init - Initialize event object + * + * This function initializes the event object. + * + * @kctx: The kbase context whose event object will be initialized. + */ +void kbase_csf_event_init(struct kbase_context *const kctx); + +struct kbase_csf_notification; +struct base_csf_notification; +/** + * kbase_csf_event_read_error - Read and remove an error from error list in event + * + * @kctx: The kbase context. + * @event_data: Caller-provided buffer to copy the fatal error to + * + * This function takes the CS fatal error from context's ordered + * error_list, copies its contents to @event_data. + * + * Return: true if error is read out or false if there is no error in error list. + */ +bool kbase_csf_event_read_error(struct kbase_context *kctx, + struct base_csf_notification *event_data); + +/** + * kbase_csf_event_add_error - Add an error into event error list + * + * @kctx: Address of a base context associated with a GPU address space. + * @error: Address of the item to be added to the context's pending error list. + * @data: Error data to be returned to userspace. + * + * Does not wake up the event queue blocking a user thread in kbase_poll. This + * is to make it more efficient to add multiple errors. + * + * The added error must not already be on the context's list of errors waiting + * to be reported (e.g. because a previous error concerning the same object has + * not yet been reported). + * + */ +void kbase_csf_event_add_error(struct kbase_context *const kctx, + struct kbase_csf_notification *const error, + struct base_csf_notification const *const data); + +/** + * kbase_csf_event_remove_error - Remove an error from event error list + * + * @kctx: Address of a base context associated with a GPU address space. + * @error: Address of the item to be removed from the context's event error list. + */ +void kbase_csf_event_remove_error(struct kbase_context *kctx, + struct kbase_csf_notification *error); + +/** + * kbase_csf_event_error_pending - Check the error pending status + * + * @kctx: The kbase context to check fatal error upon. + * + * Return: true if there is error in the list. + */ +bool kbase_csf_event_error_pending(struct kbase_context *kctx); +#endif /* _KBASE_CSF_EVENT_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index 785555c..202c677 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -31,6 +31,7 @@ #include "device/mali_kbase_device.h" #include "backend/gpu/mali_kbase_pm_internal.h" #include "tl/mali_kbase_timeline_priv.h" +#include "tl/mali_kbase_tracepoints.h" #include "mali_kbase_csf_tl_reader.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> @@ -157,8 +158,7 @@ static bool entry_optional(u32 header) } /** - * struct firmware_timeline_metadata - - * Timeline metadata item within the MCU firmware + * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware * * @node: List head linking all timeline metadata to * kbase_device:csf.firmware_timeline_metadata. @@ -217,10 +217,11 @@ static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val) return (max_loops == 0) ? -1 : 0; } -void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) { - if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) - dev_err(kbdev->dev, "MCU failed to get disabled"); + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); } static void wait_for_firmware_stop(struct kbase_device *kbdev) @@ -229,6 +230,13 @@ static void wait_for_firmware_stop(struct kbase_device *kbdev) /* This error shall go away once MIDJM-2371 is closed */ dev_err(kbdev->dev, "Firmware failed to stop"); } + + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev)); +} + +void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +{ + wait_for_firmware_stop(kbdev); } static void stop_csf_firmware(struct kbase_device *kbdev) @@ -463,16 +471,16 @@ out: /** * parse_memory_setup_entry() - Process an "interface memory setup" section * + * @kbdev: Kbase device structure + * @fw: The firmware image containing the section + * @entry: Pointer to the start of the section + * @size: Size (in bytes) of the section + * * Read an "interface memory setup" section from the firmware image and create * the necessary memory region including the MMU page tables. If successful * the interface will be added to the kbase_device:csf.firmware_interfaces list. * * Return: 0 if successful, negative error code on failure - * - * @kbdev: Kbase device structure - * @fw: The firmware image containing the section - * @entry: Pointer to the start of the section - * @size: Size (in bytes) of the section */ static int parse_memory_setup_entry(struct kbase_device *kbdev, const struct firmware *fw, @@ -724,6 +732,11 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, /** * load_firmware_entry() - Process an entry from a firmware image * + * @kbdev: Kbase device + * @fw: Firmware image containing the entry + * @offset: Byte offset within the image of the entry to load + * @header: Header word of the entry + * * Read an entry from a firmware image and do any necessary work (e.g. loading * the data into page accessible to the MCU). * @@ -731,11 +744,6 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev, * otherwise the function will fail with -EINVAL * * Return: 0 if successful, negative error code on failure - * - * @kbdev: Kbase device - * @fw: Firmware image containing the entry - * @offset: Byte offset within the image of the entry to load - * @header: Header word of the entry */ static int load_firmware_entry(struct kbase_device *kbdev, const struct firmware *fw, @@ -784,18 +792,6 @@ static int load_firmware_entry(struct kbase_device *kbdev, } return kbase_csf_firmware_cfg_option_entry_parse( kbdev, fw, entry, size, updatable); - case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST: -#ifndef MALI_KBASE_BUILD - /* FW UTF option */ - if (size < 2*sizeof(*entry)) { - dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n", - size); - return -EINVAL; - } - return mali_kutf_process_fw_utf_entry(kbdev, fw->data, - fw->size, entry); -#endif - break; case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: /* Trace buffer */ if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { @@ -1170,6 +1166,7 @@ u32 kbase_csf_firmware_csg_output( dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); void kbase_csf_firmware_global_input( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -1180,6 +1177,7 @@ void kbase_csf_firmware_global_input( dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); input_page_write(iface->input, offset, value); } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); void kbase_csf_firmware_global_input_mask( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -1191,6 +1189,7 @@ void kbase_csf_firmware_global_input_mask( offset, value, mask); input_page_partial_write(iface->input, offset, value, mask); } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); u32 kbase_csf_firmware_global_input_read( const struct kbase_csf_global_iface *const iface, const u32 offset) @@ -1211,6 +1210,7 @@ u32 kbase_csf_firmware_global_output( dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); /** * handle_internal_firmware_fatal - Handler for CS internal firmware fault. @@ -1484,8 +1484,7 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) } /** - * kbase_csf_firmware_reload_worker() - - * reload the fw image and re-enable the MCU + * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU * @work: CSF Work item for reloading the firmware. * * This helper function will reload the firmware image and re-enable the MCU. @@ -1505,6 +1504,8 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) dev_info(kbdev->dev, "reloading firmware"); + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + /* Reload just the data sections from firmware binary image */ err = reload_fw_data_sections(kbdev); if (err) @@ -2017,10 +2018,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) kfree(metadata); } -#ifndef MALI_KBASE_BUILD - mali_kutf_fw_utf_entry_cleanup(kbdev); -#endif - /* This will also free up the region allocated for the shared interface * entry parsed from the firmware image. */ @@ -2144,6 +2141,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; unsigned long flags; + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + kbase_csf_scheduler_spin_lock(kbdev, &flags); /* Validate there are no on-slot groups when sending the * halt request to firmware. @@ -2155,12 +2154,25 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) kbase_csf_scheduler_spin_unlock(kbdev, flags); } +void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) +{ + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + + /* Trigger the boot of MCU firmware, Use the AUTO mode as + * otherwise on fast reset, to exit protected mode, MCU will + * not reboot by itself to enter normal mode. + */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); +} + #ifdef KBASE_PM_RUNTIME void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) { struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; unsigned long flags; + KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + kbase_csf_scheduler_spin_lock(kbdev, &flags); set_global_request(global_iface, GLB_REQ_SLEEP_MASK); dev_dbg(kbdev->dev, "Sending sleep request to MCU"); diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index 0edcc30..f4ce33c 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -346,14 +346,14 @@ static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev, /** * kbase_csf_read_firmware_memory - Read a value in a GPU address * + * @kbdev: Device pointer + * @gpu_addr: GPU address to read + * @value: output pointer to which the read value will be written. + * * This function read a value in a GPU address that belongs to * a private firmware memory region. The function assumes that the location * is not permanently mapped on the CPU address space, therefore it maps it * and then unmaps it to access it independently. - * - * @kbdev: Device pointer - * @gpu_addr: GPU address to read - * @value: output pointer to which the read value will be written. */ void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value); @@ -361,14 +361,14 @@ void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, /** * kbase_csf_update_firmware_memory - Write a value in a GPU address * + * @kbdev: Device pointer + * @gpu_addr: GPU address to write + * @value: Value to write + * * This function writes a given value in a GPU address that belongs to * a private firmware memory region. The function assumes that the destination * is not permanently mapped on the CPU address space, therefore it maps it * and then unmaps it to access it independently. - * - * @kbdev: Device pointer - * @gpu_addr: GPU address to write - * @value: Value to write */ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value); @@ -404,20 +404,20 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev); /** * kbase_csf_firmware_ping - Send the ping request to firmware. * - * The function sends the ping request to firmware. - * * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * The function sends the ping request to firmware. */ void kbase_csf_firmware_ping(struct kbase_device *kbdev); /** * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * * The function sends the ping request to firmware and waits to confirm it is * alive. * - * @kbdev: Instance of a GPU platform device that implements a CSF interface. - * * Return: 0 on success, or negative on failure. */ int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev); @@ -462,8 +462,12 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev); static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + return true; +#else return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) == MCU_STATUS_HALTED); +#endif /* CONFIG_MALI_NO_MALI */ } /** @@ -481,24 +485,14 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev); * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) -{ - /* Trigger the boot of MCU firmware, Use the AUTO mode as - * otherwise on fast reset, to exit protected mode, MCU will - * not reboot by itself to enter normal mode. - */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); -} +void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev); /** * kbase_csf_firmware_disable_mcu - Send the command to disable MCU * * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ -static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) -{ - kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); -} +void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev); /** * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled @@ -560,9 +554,9 @@ void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, * requests, sent after the reboot of MCU firmware, have * completed or not. * - * Return: true if the Global configuration requests completed otherwise false. - * * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: true if the Global configuration requests completed otherwise false. */ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev); @@ -587,17 +581,16 @@ void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, * request has completed or not, that was sent to update * the core attributes. * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * * Return: true if the Global configuration request to update the core * attributes has completed, otherwise false. - * - * @kbdev: Instance of a GPU platform device that implements a CSF interface. */ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); /** - * Request the global control block of CSF interface capabilities - * - * Return: Total number of CSs, summed across all groups. + * kbase_csf_firmware_get_glb_iface - Request the global control block of CSF + * interface capabilities * * @kbdev: Kbase device. * @group_data: Pointer where to store all the group data @@ -620,6 +613,8 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); * @instr_features: Instrumentation features. Bits 7:4 hold the max size * of events. Bits 3:0 hold the offset update rate. * (csf >= 1,1,0) + * + * Return: Total number of CSs, summed across all groups. */ u32 kbase_csf_firmware_get_glb_iface( struct kbase_device *kbdev, struct basep_cs_group_control *group_data, @@ -628,20 +623,26 @@ u32 kbase_csf_firmware_get_glb_iface( u32 *group_num, u32 *prfcnt_size, u32 *instr_features); /** - * Get CSF firmware header timeline metadata content - * - * Return: The firmware timeline metadata content which match @p name. + * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline + * metadata content * * @kbdev: Kbase device. * @name: Name of the metadata which metadata content to be returned. * @size: Metadata size if specified metadata found. + * + * Return: The firmware timeline metadata content which match @p name. */ const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, const char *name, size_t *size); /** - * kbase_csf_firmware_mcu_shared_mapping_init - - * Allocate and map MCU shared memory. + * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory. + * + * @kbdev: Kbase device the memory mapping shall belong to. + * @num_pages: Number of memory pages to map. + * @cpu_map_properties: Either PROT_READ or PROT_WRITE. + * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR. + * @csf_mapping: Object where to write metadata for the memory mapping. * * This helper function allocates memory and maps it on both the CPU * and the GPU address spaces. Most of the properties of the mapping @@ -653,12 +654,6 @@ const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, * will be ignored by the function. * * Return: 0 if success, or an error code on failure. - * - * @kbdev: Kbase device the memory mapping shall belong to. - * @num_pages: Number of memory pages to map. - * @cpu_map_properties: Either PROT_READ or PROT_WRITE. - * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR. - * @csf_mapping: Object where to write metadata for the memory mapping. */ int kbase_csf_firmware_mcu_shared_mapping_init( struct kbase_device *kbdev, @@ -676,35 +671,6 @@ int kbase_csf_firmware_mcu_shared_mapping_init( void kbase_csf_firmware_mcu_shared_mapping_term( struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping); -#ifndef MALI_KBASE_BUILD -/** - * mali_kutf_process_fw_utf_entry() - Process the "Firmware UTF tests" section - * - * Read "Firmware UTF tests" section from the firmware image and create - * necessary kutf app+suite+tests. - * - * Return: 0 if successful, negative error code on failure. In both cases - * caller will have to invoke mali_kutf_fw_utf_entry_cleanup for the cleanup - * - * @kbdev: Kbase device structure - * @fw_data: Pointer to the start of firmware binary image loaded from disk - * @fw_size: Size (in bytes) of the firmware image - * @entry: Pointer to the start of the section - */ -int mali_kutf_process_fw_utf_entry(struct kbase_device *kbdev, - const void *fw_data, size_t fw_size, const u32 *entry); - -/** - * mali_kutf_fw_utf_entry_cleanup() - Remove the Fw UTF tests debugfs entries - * - * Destroy the kutf apps+suites+tests created on parsing "Firmware UTF tests" - * section from the firmware image. - * - * @kbdev: Kbase device structure - */ -void mali_kutf_fw_utf_entry_cleanup(struct kbase_device *kbdev); -#endif - #ifdef CONFIG_MALI_DEBUG extern bool fw_debug; #endif @@ -722,11 +688,11 @@ static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs) * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis * monitoring operation * + * @kbdev: Kbase device structure + * * Program the firmware interface with its configured hysteresis count value * and enable the firmware to act on it. The Caller is * assumed to hold the kbdev->csf.scheduler.interrupt_lock. - * - * @kbdev: Kbase device structure */ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev); @@ -734,10 +700,10 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev); * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time * hysteresis monitoring operation * + * @kbdev: Kbase device structure + * * Program the firmware interface to disable the idle hysteresis timer. The * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock. - * - * @kbdev: Kbase device structure */ void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev); diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c index f00acb1..70bf26a 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c @@ -29,10 +29,6 @@ /** * struct firmware_config - Configuration item within the MCU firmware * - * The firmware may expose configuration options. Each option has a name, the - * address where the option is controlled and the minimum and maximum values - * that the option can take. - * * @node: List head linking all options to * kbase_device:csf.firmware_config * @kbdev: Pointer to the Kbase device @@ -47,6 +43,10 @@ * @min: The lowest legal value of the configuration option * @max: The maximum legal value of the configuration option * @cur_val: The current value of the configuration option + * + * The firmware may expose configuration options. Each option has a name, the + * address where the option is controlled and the minimum and maximum values + * that the option can take. */ struct firmware_config { struct list_head node; diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h index 080c154..c2d2fc5 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h @@ -32,12 +32,12 @@ * kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration * options present in firmware image. * + * @kbdev: Pointer to the Kbase device + * * This function would create a sysfs directory and populate it with a * sub-directory, that would contain a file per attribute, for every * configuration option parsed from firmware image. * - * @kbdev: Pointer to the Kbase device - * * Return: The initialization error code. */ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev); @@ -55,16 +55,16 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); * kbase_csf_firmware_cfg_option_entry_parse() - Process a * "configuration option" section. * - * Read a "configuration option" section adding it to the - * kbase_device:csf.firmware_config list. - * - * Return: 0 if successful, negative error code on failure - * * @kbdev: Kbase device structure * @fw: Firmware image containing the section * @entry: Pointer to the section * @size: Size (in bytes) of the section * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE + * + * Read a "configuration option" section adding it to the + * kbase_device:csf.firmware_config list. + * + * Return: 0 if successful, negative error code on failure */ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, const struct firmware *fw, diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index e99c968..6f61631 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -136,13 +136,13 @@ static inline void output_page_write(u32 *const output, const u32 offset, /** * invent_memory_setup_entry() - Invent an "interface memory setup" section * + * @kbdev: Kbase device structure + * * Invent an "interface memory setup" section similar to one from a firmware * image. If successful the interface will be added to the * kbase_device:csf.firmware_interfaces list. * * Return: 0 if successful, negative error code on failure - * - * @kbdev: Kbase device structure */ static int invent_memory_setup_entry(struct kbase_device *kbdev) { @@ -371,6 +371,7 @@ u32 kbase_csf_firmware_csg_output( dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); static void csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface, @@ -418,6 +419,7 @@ void kbase_csf_firmware_global_input( output_page_write(iface->output, GLB_ACK, value); } } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); void kbase_csf_firmware_global_input_mask( const struct kbase_csf_global_iface *const iface, const u32 offset, @@ -431,6 +433,7 @@ void kbase_csf_firmware_global_input_mask( /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); u32 kbase_csf_firmware_global_input_read( const struct kbase_csf_global_iface *const iface, const u32 offset) @@ -451,6 +454,7 @@ u32 kbase_csf_firmware_global_output( dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); return val; } +KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); /** * handle_internal_firmware_fatal - Handler for CS internal firmware fault. @@ -1020,10 +1024,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) /* NO_MALI: No trace buffers to terminate */ -#ifndef MALI_KBASE_BUILD - mali_kutf_fw_utf_entry_cleanup(kbdev); -#endif - mutex_destroy(&kbdev->csf.reg_lock); /* This will also free up the region allocated for the shared interface @@ -1154,6 +1154,15 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) kbase_csf_scheduler_spin_unlock(kbdev, flags); } +void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) +{ + /* Trigger the boot of MCU firmware, Use the AUTO mode as + * otherwise on fast reset, to exit protected mode, MCU will + * not reboot by itself to enter normal mode. + */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); +} + #ifdef KBASE_PM_RUNTIME void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) { @@ -1290,6 +1299,11 @@ const char *kbase_csf_firmware_get_timeline_metadata( return NULL; } +void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); +} + void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) { /* NO_MALI: Nothing to do here */ diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h index 993db63..9aab7ab 100644 --- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h +++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.h @@ -47,11 +47,11 @@ void kbase_csf_heap_context_allocator_term( /** * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure * + * @ctx_alloc: Pointer to the heap context allocator. + * * If this function is successful then it returns the address of a * zero-initialized heap context structure for use by the firmware. * - * @ctx_alloc: Pointer to the heap context allocator. - * * Return: GPU virtual address of the allocated heap context or 0 on failure. */ u64 kbase_csf_heap_context_allocator_alloc( @@ -60,13 +60,13 @@ u64 kbase_csf_heap_context_allocator_alloc( /** * kbase_csf_heap_context_allocator_free - Free a heap context structure * - * This function returns a heap context structure to the free pool of unused - * contexts for possible reuse by a future call to - * @kbase_csf_heap_context_allocator_alloc. - * * @ctx_alloc: Pointer to the heap context allocator. * @heap_gpu_va: The GPU virtual address of a heap context structure that * was allocated for the firmware. + * + * This function returns a heap context structure to the free pool of unused + * contexts for possible reuse by a future call to + * @kbase_csf_heap_context_allocator_alloc. */ void kbase_csf_heap_context_allocator_free( struct kbase_csf_heap_context_allocator *const ctx_alloc, diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index 8729307..05a4fa0 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -190,6 +190,12 @@ static void kbase_jit_add_to_pending_alloc_list( * * @queue: The queue containing this JIT allocation * @cmd: The JIT allocation command + * + * Return: + * * 0 - allocation OK + * * -EINVAL - missing info or JIT ID still in use + * * -EAGAIN - Retry + * * -ENOMEM - no memory. unable to allocate */ static int kbase_kcpu_jit_allocate_process( struct kbase_kcpu_command_queue *queue, @@ -289,8 +295,8 @@ static int kbase_kcpu_jit_allocate_process( * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), + KBASE_REG_CPU_WR, &mapping); if (!ptr) { ret = -ENOMEM; goto fail; @@ -570,9 +576,11 @@ static int kbase_csf_queue_group_suspend_prepare( { struct kbase_context *const kctx = kcpu_queue->kctx; struct kbase_suspend_copy_buffer *sus_buf = NULL; + const u32 csg_suspend_buf_size = + kctx->kbdev->csf.global_iface.groups[0].suspend_size; u64 addr = suspend_buf->buffer; u64 page_addr = addr & PAGE_MASK; - u64 end_addr = addr + suspend_buf->size - 1; + u64 end_addr = addr + csg_suspend_buf_size - 1; u64 last_page_addr = end_addr & PAGE_MASK; int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; int pinned_pages = 0, ret = 0; @@ -580,8 +588,7 @@ static int kbase_csf_queue_group_suspend_prepare( lockdep_assert_held(&kctx->csf.kcpu_queues.lock); - if (suspend_buf->size < - kctx->kbdev->csf.global_iface.groups[0].suspend_size) + if (suspend_buf->size < csg_suspend_buf_size) return -EINVAL; ret = kbase_csf_queue_group_handle_is_valid(kctx, @@ -593,7 +600,7 @@ static int kbase_csf_queue_group_suspend_prepare( if (!sus_buf) return -ENOMEM; - sus_buf->size = suspend_buf->size; + sus_buf->size = csg_suspend_buf_size; sus_buf->nr_pages = nr_pages; sus_buf->offset = addr & ~PAGE_MASK; diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index 6300569..3edb4de 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -294,6 +294,8 @@ struct kbase_kcpu_command_queue { * queue will be created. * @newq: Pointer to the structure which contains information about * the new KCPU command queue to be created. + * + * Return: 0 if successful or a negative error code on failure. */ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq); @@ -307,6 +309,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, * queue is to be deleted. * @del: Pointer to the structure which specifies the KCPU command * queue to be deleted. + * + * Return: 0 if successful or a negative error code on failure. */ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del); @@ -320,6 +324,8 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, * @enq: Pointer to the structure which specifies the KCPU command * as well as the KCPU command queue into which the command * is to be enqueued. + * + * Return: 0 if successful or a negative error code on failure. */ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_enqueue *enq); @@ -337,11 +343,11 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); /** * kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context * for a GPU address space + * @kctx: Pointer to the kbase context being terminated. * * This function deletes any kernel CPU queues that weren't deleted before * context termination. * - * @kctx: Pointer to the kbase context being terminated. */ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c index 7b63132..d5d8318 100644 --- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c +++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c @@ -576,6 +576,7 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev) return 0; } +KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent); bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) { diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index f22a5d7..cd87027 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -164,12 +164,14 @@ static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev) * This function will force the Scheduler to exit the sleep state by doing the * wake up of MCU and suspension of on-slot groups. It is called at the time of * system suspend. + * + * Return: 0 on success. */ -static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev) +static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; unsigned long flags; - int ret; + int ret = 0; lockdep_assert_held(&scheduler->lock); WARN_ON(scheduler->state != SCHED_SLEEPING); @@ -177,12 +179,16 @@ static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev) kbase_pm_lock(kbdev); ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); - if (ret) - dev_warn(kbdev->dev, "[%llu] Wait for MCU wake up failed on forced scheduler suspend", - kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_unlock(kbdev); + if (ret) { + dev_warn(kbdev->dev, + "[%llu] Wait for MCU wake up failed on forced scheduler suspend", + kbase_backend_get_cycle_cnt(kbdev)); + goto out; + } - suspend_active_groups_on_powerdown(kbdev, true); + if (suspend_active_groups_on_powerdown(kbdev, true)) + goto out; kbase_pm_lock(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -191,12 +197,26 @@ static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ret = kbase_pm_wait_for_desired_state(kbdev); - if (ret) - dev_warn(kbdev->dev, "[%llu] Wait for pm state change failed on forced scheduler suspend", - kbase_backend_get_cycle_cnt(kbdev)); kbase_pm_unlock(kbdev); + if (ret) { + dev_warn(kbdev->dev, + "[%llu] Wait for pm state change failed on forced scheduler suspend", + kbase_backend_get_cycle_cnt(kbdev)); + goto out; + } scheduler->state = SCHED_SUSPENDED; + + return 0; + +out: + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.exit_gpu_sleep_mode = true; + kbdev->pm.backend.gpu_wakeup_override = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_csf_scheduler_invoke_tick(kbdev); + + return ret; } #endif @@ -445,6 +465,13 @@ static bool queue_group_idle_locked(struct kbase_queue_group *group) group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); } +static bool on_slot_group_idle_locked(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + return (group->run_state == KBASE_CSF_GROUP_IDLE); +} + static bool queue_group_scheduled(struct kbase_queue_group *group) { return (group->run_state != KBASE_CSF_GROUP_INACTIVE && @@ -582,6 +609,8 @@ static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev) * This function is usually called when Scheduler needs to be activated. * The PM reference count is acquired for the Scheduler and the power on * of GPU is initiated. + * + * Return: 0 if successful or a negative error code on failure. */ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) @@ -1243,8 +1272,16 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) static void update_hw_active(struct kbase_queue *queue, bool active) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + if (queue && queue->enabled) { + u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + + output_addr[CS_ACTIVE / sizeof(u32)] = active; + } +#else CSTD_UNUSED(queue); CSTD_UNUSED(active); +#endif } static void program_cs_extract_init(struct kbase_queue *queue) @@ -2099,6 +2136,10 @@ static void save_csg_slot(struct kbase_queue_group *group) bool sync_wait = false; bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & CSG_STATUS_STATE_IDLE_MASK; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + for (i = 0; i < max_streams; i++) + update_hw_active(group->bound_queues[i], false); +#endif /* CONFIG_MALI_NO_MALI */ for (i = 0; idle && i < max_streams; i++) { struct kbase_queue *const queue = group->bound_queues[i]; @@ -2385,6 +2426,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, protm_suspend_buf >> 32); } + /* Enable all interrupts for now */ kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); @@ -2414,7 +2456,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, /* Trace the programming of the CSG on the slot */ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id, - group->handle, slot); + group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0); dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", group->handle, kctx->tgid, kctx->id, slot, prio); @@ -3166,15 +3208,15 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) * flagged after the completion of a CSG status * update command * + * @kbdev: Pointer to the GPU device. + * @slot: The given slot for checking an occupying resident group's idle + * state. + * * This function is called at the start of scheduling tick to check the * idle status of a queue group resident on a CSG slot. * The caller must make sure the corresponding status update command has * been called and completed before checking this status. * - * @kbdev: Pointer to the GPU device. - * @slot: The given slot for checking an occupying resident group's idle - * state. - * * Return: true if the group resident on slot is idle, otherwise false. */ static bool group_on_slot_is_idle(struct kbase_device *kbdev, @@ -3194,16 +3236,16 @@ static bool group_on_slot_is_idle(struct kbase_device *kbdev, * slots_update_state_changed() - Check the handshake state of a subset of * command group slots. * - * Checks the state of a subset of slots selected through the slots_mask - * bit_map. Records which slots' handshake completed and send it back in the - * slots_done bit_map. - * * @kbdev: The GPU device. * @field_mask: The field mask for checking the state in the csg_req/ack. * @slots_mask: A bit_map specifying the slots to check. * @slots_done: A cleared bit_map for returning the slots that * have finished update. * + * Checks the state of a subset of slots selected through the slots_mask + * bit_map. Records which slots' handshake completed and send it back in the + * slots_done bit_map. + * * Return: true if the slots_done is set for at least one slot. * Otherwise false. */ @@ -3237,10 +3279,6 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on * the specified groups. * - * This function waits for the acknowledgement of the request that have - * already been placed for the CSG slots by the caller. Currently used for - * the CSG priority update and status update requests. - * * @kbdev: Pointer to the GPU device. * @field_mask: The field mask for checking the state in the csg_req/ack. * @slot_mask: Bitmap reflecting the slots, the function will modify @@ -3248,6 +3286,10 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, * bits. * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. * + * This function waits for the acknowledgment of the request that have + * already been placed for the CSG slots by the caller. Currently used for + * the CSG priority update and status update requests. + * * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For * timed out condition with unacknowledged slots, their bits remain * set in the slot_mask. @@ -3349,14 +3391,14 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, * scheduler_slot_protm_ack - Acknowledging the protected region requests * from the resident group on a given slot. * - * The function assumes that the given slot is in stable running state and - * has already been judged by the caller on that any pending protected region - * requests of the resident group should be acknowledged. - * * @kbdev: Pointer to the GPU device. * @group: Pointer to the resident group on the given slot. * @slot: The slot that the given group is actively operating on. * + * The function assumes that the given slot is in stable running state and + * has already been judged by the caller on that any pending protected region + * requests of the resident group should be acknowledged. + * * Return: true if the group has pending protm request(s) and is acknowledged. * The caller should arrange to enter the protected mode for servicing * it. Otherwise return false, indicating the group has no pending protm @@ -3426,15 +3468,15 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, * scheduler_group_check_protm_enter - Request the given group to be evaluated * for triggering the protected mode. * + * @kbdev: Pointer to the GPU device. + * @input_grp: Pointer to the GPU queue group. + * * The function assumes the given group is either an active running group or * the scheduler internally maintained field scheduler->top_grp. * * If the GPU is not already running in protected mode and the input group * has protected region requests from its bound queues, the requests are * acknowledged and the GPU is instructed to enter the protected mode. - * - * @kbdev: Pointer to the GPU device. - * @input_grp: Pointer to the GPU queue group. */ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, struct kbase_queue_group *const input_grp) @@ -3538,7 +3580,7 @@ static void scheduler_apply(struct kbase_device *kbdev) } } - /* Initialize the remaining avialable csg slots for the tick/tock */ + /* Initialize the remaining available csg slots for the tick/tock */ scheduler->remaining_tick_slots = available_csg_slots; /* If there are spare slots, apply heads in the list */ @@ -3615,8 +3657,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, group->scan_seq_num = scheduler->csg_scan_count_for_tick++; if (queue_group_idle_locked(group)) { - list_add_tail(&group->link_to_schedule, - &scheduler->idle_groups_to_schedule); + if (on_slot_group_idle_locked(group)) + list_add_tail(&group->link_to_schedule, + &scheduler->idle_groups_to_schedule); continue; } @@ -3640,6 +3683,8 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, * fairness of scheduling within a single * kbase_context. * + * @kbdev: Pointer to the GPU device. + * * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned * the highest slot priority) is guaranteed to get the resources that it * needs we only rotate the kbase_context corresponding to it - @@ -3678,8 +3723,6 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will * be for a group in the next lowest priority level or in absence of those the * next kbase_context's queue groups. - * - * @kbdev: Pointer to the GPU device. */ static void scheduler_rotate_groups(struct kbase_device *kbdev) { @@ -3750,17 +3793,17 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev) * slots for which the IDLE notification was received * previously. * - * This function sends a CSG status update request for all the CSG slots - * present in the bitmap scheduler->csg_slots_idle_mask and wait for the - * request to complete. - * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by - * this function. - * * @kbdev: Pointer to the GPU device. * @csg_bitmap: Bitmap of the CSG slots for which * the status update request completed successfully. * @failed_csg_bitmap: Bitmap of the CSG slots for which * the status update request timedout. + * + * This function sends a CSG status update request for all the CSG slots + * present in the bitmap scheduler->csg_slots_idle_mask and wait for the + * request to complete. + * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by + * this function. */ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) @@ -3832,6 +3875,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, * resident on CSG slots for which the * IDLE notification was received previously. * + * @kbdev: Pointer to the GPU device. + * * This function is called at the start of scheduling tick/tock to reconfirm * the idle status of queue groups resident on CSG slots for * which idle notification was received previously, i.e. all the CSG slots @@ -3845,8 +3890,6 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, * updated accordingly. * The bits corresponding to slots for which the status update request timedout * remain set in scheduler->csg_slots_idle_mask. - * - * @kbdev: Pointer to the GPU device. */ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) { @@ -3901,7 +3944,7 @@ static void scheduler_scan_idle_groups(struct kbase_device *kbdev) list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, link_to_schedule) { - WARN_ON(!queue_group_idle_locked(group)); + WARN_ON(!on_slot_group_idle_locked(group)); if (!scheduler->ngrp_to_schedule) { /* keep the top csg's origin */ @@ -3955,6 +3998,18 @@ static struct kbase_queue_group *get_tock_top_group( return NULL; } +/** + * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon + * suspend or GPU IDLE. + * + * @kbdev: Pointer to the device + * @system_suspend: Flag to indicate it's for system suspend. + * + * This function will suspend all active CSG groups upon either + * system suspend, runtime suspend or GPU IDLE. + * + * Return: 0 on success, -1 otherwise. + */ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend) { @@ -3964,8 +4019,8 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, int ret = suspend_active_queue_groups(kbdev, slot_mask); if (ret) { - /* The suspend of CSGs failed, trigger the GPU reset and wait - * for it to complete to be in a deterministic state. + /* The suspend of CSGs failed, + * trigger the GPU reset to be in a deterministic state. */ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", kbase_backend_get_cycle_cnt(kbdev), @@ -3975,13 +4030,6 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); - if (system_suspend) { - mutex_unlock(&scheduler->lock); - kbase_reset_gpu_allow(kbdev); - kbase_reset_gpu_wait(kbdev); - kbase_reset_gpu_prevent_and_wait(kbdev); - mutex_lock(&scheduler->lock); - } return -1; } @@ -4059,6 +4107,8 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) * This function is called on GPU idle notification to trigger the power down of * GPU. Scheduler's state is changed to suspended and all the active queue * groups are suspended before halting the MCU firmware. + * + * Return: true if scheduler will be suspended or false if suspend is aborted. */ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) { @@ -4104,6 +4154,8 @@ static void gpu_idle_worker(struct work_struct *work) disable_gpu_idle_fw_timer(kbdev); scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); if (scheduler_is_idle_suspendable) { + KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL, + kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev) && scheduler->total_runnable_grps) @@ -4174,8 +4226,7 @@ static int scheduler_prepare(struct kbase_device *kbdev) /* Adds those idle but runnable groups to the scanout list */ scheduler_scan_idle_groups(kbdev); - /* After adding the idle CSGs, the two counts should be the same */ - WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule); + WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule); KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, scheduler->num_active_address_spaces | @@ -4705,8 +4756,11 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) * due to the extra context ref-count, which prevents the * L2 powering down cache clean operation in the non racing * case. + * LSC is being flushed together to cover buslogging usecase, + * where GPU reset is done regularly to avoid the log buffer + * overflow. */ - kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, kbdev->reset_timeout_ms); if (ret2) { @@ -5055,13 +5109,18 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, unsigned int target_page_nr = 0, i = 0; u64 offset = sus_buf->offset; size_t to_copy = sus_buf->size; + const u32 csg_suspend_buf_nr_pages = + PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); if (scheduler->state != SCHED_SUSPENDED) { /* Similar to the case of HW counters, need to flush - * the GPU cache before reading from the suspend buffer + * the GPU L2 cache before reading from the suspend buffer * pages as they are mapped and cached on GPU side. + * Flushing LSC is not done here, since only the flush of + * CSG suspend buffer contents is needed from the L2 cache. */ - kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_start_cache_clean( + kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); kbase_gpu_wait_cache_clean(kbdev); } else { /* Make sure power down transitions have completed, @@ -5073,7 +5132,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, kbase_pm_wait_for_desired_state(kbdev); } - for (i = 0; i < PFN_UP(sus_buf->size) && + for (i = 0; i < csg_suspend_buf_nr_pages && target_page_nr < sus_buf->nr_pages; i++) { struct page *pg = as_page(group->normal_suspend_buf.phy[i]); @@ -5252,7 +5311,7 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) * This function will evaluate the sync condition, if any, of all the queues * bound to the given group. * - * Return true if the sync condition of at least one queue has been satisfied. + * Return: true if the sync condition of at least one queue has been satisfied. */ static bool check_sync_update_for_on_slot_group( struct kbase_queue_group *group) @@ -5341,7 +5400,7 @@ static bool check_sync_update_for_on_slot_group( * protected mode that has a higher priority than the active protected mode * group. * - * Return true if the sync condition of at least one queue in a group has been + * Return: true if the sync condition of at least one queue in a group has been * satisfied. */ static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) @@ -5604,8 +5663,14 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) flush_work(&kbdev->csf.scheduler.gpu_idle_work); mutex_lock(&kbdev->csf.scheduler.lock); - if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED)) + if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { + /* The power policy could prevent the Scheduler from + * getting suspended when GPU becomes idle. + */ + WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev)); scheduler_suspend(kbdev); + } + mutex_unlock(&kbdev->csf.scheduler.lock); cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); cancel_tick_timer(kbdev); @@ -5692,12 +5757,16 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, * available, so need to drop the lock before cancellation. */ cancel_work_sync(&scheduler->tick_work); - } else if (!currently_enabled && enable) { + return; + } + + if (!currently_enabled && enable) { scheduler->timer_enabled = true; scheduler_enable_tick_timer_nolock(kbdev); - mutex_unlock(&scheduler->lock); } + + mutex_unlock(&scheduler->lock); } void kbase_csf_scheduler_kick(struct kbase_device *kbdev) @@ -5718,18 +5787,20 @@ out: mutex_unlock(&scheduler->lock); } -void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) +int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) { + int result = 0; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; /* Cancel any potential queued delayed work(s) */ cancel_work_sync(&scheduler->tick_work); cancel_tock_work(scheduler); - if (kbase_reset_gpu_prevent_and_wait(kbdev)) { + result = kbase_reset_gpu_prevent_and_wait(kbdev); + if (result) { dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n"); - return; + return result; } mutex_lock(&scheduler->lock); @@ -5742,18 +5813,31 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) */ if (scheduler->state == SCHED_SLEEPING) { dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend"); - force_scheduler_to_exit_sleep(kbdev); + result = force_scheduler_to_exit_sleep(kbdev); + if (result) { + dev_warn(kbdev->dev, "Scheduler failed to exit from sleep"); + goto exit; + } } #endif if (scheduler->state != SCHED_SUSPENDED) { - suspend_active_groups_on_powerdown(kbdev, true); - dev_info(kbdev->dev, "Scheduler PM suspend"); - scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + result = suspend_active_groups_on_powerdown(kbdev, true); + if (result) { + dev_warn(kbdev->dev, "failed to suspend active groups"); + goto exit; + } else { + dev_info(kbdev->dev, "Scheduler PM suspend"); + scheduler_suspend(kbdev); + cancel_tick_timer(kbdev); + } } + +exit: mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); + + return result; } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend); diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h index 73ebb66..068a45b 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.h +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h @@ -23,6 +23,7 @@ #define _KBASE_CSF_SCHEDULER_H_ #include "mali_kbase_csf.h" +#include "mali_kbase_csf_event.h" /** * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue @@ -250,14 +251,14 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev); * kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue * group and copy suspend buffer. * - * This function is called to suspend a queue group and copy the suspend_buffer - * contents to the input buffer provided. - * * @group: Pointer to the queue group to be suspended. * @sus_buf: Pointer to the structure which contains details of the * user buffer and its kernel pinned pages to which we need to copy * the group suspend buffer. * + * This function is called to suspend a queue group and copy the suspend_buffer + * contents to the input buffer provided. + * * Return: 0 on success, or negative on failure. */ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, @@ -425,8 +426,10 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev); * * This function will make the scheduler suspend all the running queue groups * and drop its power managemenet reference. + * + * Return: 0 on success. */ -void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev); +int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev); /** * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 06a7824..62fb241 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -28,13 +28,13 @@ /** * encode_chunk_ptr - Encode the address and size of a chunk as an integer. * + * @chunk_size: Size of a tiler heap chunk, in bytes. + * @chunk_addr: GPU virtual address of the same tiler heap chunk. + * * The size and address of the next chunk in a list are packed into a single * 64-bit value for storage in a chunk's header. This function returns that * value. * - * @chunk_size: Size of a tiler heap chunk, in bytes. - * @chunk_addr: GPU virtual address of the same tiler heap chunk. - * * Return: Next chunk pointer suitable for writing into a chunk header. */ static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) @@ -76,14 +76,14 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk( /** * link_chunk - Link a chunk into a tiler heap * + * @heap: Pointer to the tiler heap. + * @chunk: Pointer to the heap chunk to be linked. + * * Unless the @chunk is the first in the kernel's list of chunks belonging to * a given tiler heap, this function stores the size and address of the @chunk * in the header of the preceding chunk. This requires the GPU memory region * containing the header to be be mapped temporarily, which can fail. * - * @heap: Pointer to the tiler heap. - * @chunk: Pointer to the heap chunk to be linked. - * * Return: 0 if successful or a negative error code on failure. */ static int link_chunk(struct kbase_csf_tiler_heap *const heap, @@ -118,15 +118,15 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap, /** * init_chunk - Initialize and link a tiler heap chunk * - * Zero-initialize a new chunk's header (including its pointer to the next - * chunk, which doesn't exist yet) and then update the previous chunk's - * header to link the new chunk into the chunk list. - * * @heap: Pointer to the tiler heap. * @chunk: Pointer to the heap chunk to be initialized and linked. * @link_with_prev: Flag to indicate if the new chunk needs to be linked with * the previously allocated chunk. * + * Zero-initialize a new chunk's header (including its pointer to the next + * chunk, which doesn't exist yet) and then update the previous chunk's + * header to link the new chunk into the chunk list. + * * Return: 0 if successful or a negative error code on failure. */ static int init_chunk(struct kbase_csf_tiler_heap *const heap, @@ -163,14 +163,14 @@ static int init_chunk(struct kbase_csf_tiler_heap *const heap, /** * create_chunk - Create a tiler heap chunk * - * This function allocates a chunk of memory for a tiler heap and adds it to - * the end of the list of chunks associated with that heap. The size of the - * chunk is not a parameter because it is configured per-heap not per-chunk. - * * @heap: Pointer to the tiler heap for which to allocate memory. * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be * linked with the previously allocated chunk. * + * This function allocates a chunk of memory for a tiler heap and adds it to + * the end of the list of chunks associated with that heap. The size of the + * chunk is not a parameter because it is configured per-heap not per-chunk. + * * Return: 0 if successful or a negative error code on failure. */ static int create_chunk(struct kbase_csf_tiler_heap *const heap, @@ -237,15 +237,15 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap, /** * delete_chunk - Delete a tiler heap chunk * + * @heap: Pointer to the tiler heap for which @chunk was allocated. + * @chunk: Pointer to a chunk to be deleted. + * * This function frees a tiler heap chunk previously allocated by @create_chunk * and removes it from the list of chunks associated with the heap. * * WARNING: The deleted chunk is not unlinked from the list of chunks used by * the GPU, therefore it is only safe to use this function when * deleting a heap. - * - * @heap: Pointer to the tiler heap for which @chunk was allocated. - * @chunk: Pointer to a chunk to be deleted. */ static void delete_chunk(struct kbase_csf_tiler_heap *const heap, struct kbase_csf_tiler_heap_chunk *const chunk) @@ -264,10 +264,10 @@ static void delete_chunk(struct kbase_csf_tiler_heap *const heap, /** * delete_all_chunks - Delete all chunks belonging to a tiler heap * + * @heap: Pointer to a tiler heap. + * * This function empties the list of chunks associated with a tiler heap by * freeing all chunks previously allocated by @create_chunk. - * - * @heap: Pointer to a tiler heap. */ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) { @@ -284,12 +284,12 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) /** * create_initial_chunks - Create the initial list of chunks for a tiler heap * - * This function allocates a given number of chunks for a tiler heap and - * adds them to the list of chunks associated with that heap. - * * @heap: Pointer to the tiler heap for which to allocate memory. * @nchunks: Number of chunks to create. * + * This function allocates a given number of chunks for a tiler heap and + * adds them to the list of chunks associated with that heap. + * * Return: 0 if successful or a negative error code on failure. */ static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, @@ -310,12 +310,12 @@ static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, /** * delete_heap - Delete a tiler heap * + * @heap: Pointer to a tiler heap to be deleted. + * * This function frees any chunks allocated for a tiler heap previously * initialized by @kbase_csf_tiler_heap_init and removes it from the list of * heaps associated with the kbase context. The heap context structure used by * the firmware is also freed. - * - * @heap: Pointer to a tiler heap to be deleted. */ static void delete_heap(struct kbase_csf_tiler_heap *heap) { @@ -346,15 +346,15 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) /** * find_tiler_heap - Find a tiler heap from the address of its heap context * + * @kctx: Pointer to the kbase context to search for a tiler heap. + * @heap_gpu_va: GPU virtual address of a heap context structure. + * * Each tiler heap managed by the kernel has an associated heap context * structure used by the firmware. This function finds a tiler heap object from * the GPU virtual address of its associated heap context. The heap context * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the * same @kctx. * - * @kctx: Pointer to the kbase context to search for a tiler heap. - * @heap_gpu_va: GPU virtual address of a heap context structure. - * * Return: pointer to the tiler heap object, or NULL if not found. */ static struct kbase_csf_tiler_heap *find_tiler_heap( @@ -495,8 +495,11 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va); mutex_unlock(&kctx->csf.tiler_heaps.lock); + kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; + kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count; + if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) + kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; } - return err; } @@ -505,27 +508,36 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, { int err = 0; struct kbase_csf_tiler_heap *heap = NULL; + u32 chunk_count = 0; + u64 heap_size = 0; mutex_lock(&kctx->csf.tiler_heaps.lock); heap = find_tiler_heap(kctx, heap_gpu_va); - if (likely(heap)) + if (likely(heap)) { + chunk_count = heap->chunk_count; + heap_size = heap->chunk_size * chunk_count; delete_heap(heap); - else + } else err = -EINVAL; mutex_unlock(&kctx->csf.tiler_heaps.lock); - + if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) + kctx->running_total_tiler_heap_memory -= heap_size; + else + dev_warn(kctx->kbdev->dev, + "Running total tiler heap memory lower than expected!"); + if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count)) + kctx->running_total_tiler_heap_nr_chunks -= chunk_count; + else + dev_warn(kctx->kbdev->dev, + "Running total tiler chunk count lower than expected!"); return err; } /** * alloc_new_chunk - Allocate a new chunk for the tiler heap. * - * This function will allocate a new chunk for the chunked tiler heap depending - * on the settings provided by userspace when the heap was created and the - * heap's statistics (like number of render passes in-flight). - * * @heap: Pointer to the tiler heap. * @nr_in_flight: Number of render passes that are in-flight, must not be zero. * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. @@ -534,6 +546,10 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, * @new_chunk_ptr: Where to store the GPU virtual address & size of the new * chunk allocated for the heap. * + * This function will allocate a new chunk for the chunked tiler heap depending + * on the settings provided by userspace when the heap was created and the + * heap's statistics (like number of render passes in-flight). + * * Return: 0 if a new chunk was allocated otherwise an appropriate negative * error code. */ diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h index 04c27f7..4031ad4 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h @@ -38,10 +38,10 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx); * kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a * GPU address space * + * @kctx: Pointer to the kbase context being terminated. + * * This function deletes any chunked tiler heaps that weren't deleted before * context termination. - * - * @kctx: Pointer to the kbase context being terminated. */ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); @@ -74,15 +74,15 @@ int kbase_csf_tiler_heap_init(struct kbase_context *kctx, /** * kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap. * + * @kctx: Pointer to the kbase context in which the tiler heap was initialized. + * @gpu_heap_va: The GPU virtual address of the context that was set up for the + * tiler heap. + * * This function will terminate a chunked tiler heap and cause all the chunks * (initial and those added during out-of-memory processing) to be freed. * It is the caller's responsibility to ensure no further operations on this * heap will happen before calling this function. * - * @kctx: Pointer to the kbase context in which the tiler heap was initialized. - * @gpu_heap_va: The GPU virtual address of the context that was set up for the - * tiler heap. - * * Return: 0 if successful or a negative error code on failure. */ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); @@ -90,12 +90,6 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); /** * kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap. * - * This function will allocate a new chunk for the chunked tiler heap depending - * on the settings provided by userspace when the heap was created and the - * heap's statistics (like number of render passes in-flight). - * It would return an appropriate error code if a new chunk couldn't be - * allocated. - * * @kctx: Pointer to the kbase context in which the tiler heap was initialized. * @gpu_heap_va: GPU virtual address of the heap context. * @nr_in_flight: Number of render passes that are in-flight, must not be zero. @@ -105,6 +99,12 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); * @new_chunk_ptr: Where to store the GPU virtual address & size of the new * chunk allocated for the heap. * + * This function will allocate a new chunk for the chunked tiler heap depending + * on the settings provided by userspace when the heap was created and the + * heap's statistics (like number of render passes in-flight). + * It would return an appropriate error code if a new chunk couldn't be + * allocated. + * * Return: 0 if a new chunk was allocated otherwise an appropriate negative * error code (like -EBUSY when a free chunk is expected to be * available upon completion of a render pass and -EINVAL when diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c index f46beed..96e0f28 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.c @@ -32,7 +32,7 @@ * @file: The seq_file for printing to * @data: The debugfs dentry private data, a pointer to kbase_context * - * Return: Negative error code or 0 on success. + * Return: 0 in any case. */ static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) { @@ -65,11 +65,41 @@ static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) return 0; } +/** + * kbasep_csf_tiler_heap_total_debugfs_show() - Print the total memory allocated + * for all tiler heaps in a context. + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase_context + * + * Return: 0 in any case. + */ +static int kbasep_csf_tiler_heap_total_debugfs_show(struct seq_file *file, void *data) +{ + struct kbase_context *kctx = file->private; + + seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", + MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); + seq_printf(file, "Total number of chunks of all heaps in the context: %lu\n", + (unsigned long)kctx->running_total_tiler_heap_nr_chunks); + seq_printf(file, "Total allocated memory of all heaps in the context: %llu\n", + (unsigned long long)kctx->running_total_tiler_heap_memory); + seq_printf(file, "Peak allocated tiler heap memory in the context: %llu\n", + (unsigned long long)kctx->peak_total_tiler_heap_memory); + + return 0; +} + static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file) { return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private); } +static int kbasep_csf_tiler_heap_total_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_csf_tiler_heap_total_debugfs_show, in->i_private); +} + static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = { .open = kbasep_csf_tiler_heap_debugfs_open, .read = seq_read, @@ -77,6 +107,13 @@ static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = { .release = single_release, }; +static const struct file_operations kbasep_csf_tiler_heap_total_debugfs_fops = { + .open = kbasep_csf_tiler_heap_total_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) { struct dentry *file; @@ -93,6 +130,21 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) } } +void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry, + kctx, &kbasep_csf_tiler_heap_total_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kctx->kbdev->dev, + "Unable to create total tiler heap allocated memory debugfs entry"); + } +} #else /* @@ -102,5 +154,9 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) { } +void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) +{ +} + #endif /* CONFIG_DEBUG_FS */ diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h index 92ae91a..27a9074 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h @@ -34,4 +34,11 @@ struct kbase_context; */ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx); +/** + * kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap + * + * @kctx: The kbase_context for which to create the debugfs entry + */ +void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx); + #endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c index 563faec..b01ac29 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c @@ -171,8 +171,8 @@ static int kbase_ts_converter_init( * * Return: The CPU timestamp. */ -void kbase_ts_converter_convert(const struct kbase_ts_converter *self, - u64 *gpu_ts) +static void __maybe_unused +kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts) { u64 old_gpu_ts = *gpu_ts; *gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) + @@ -477,7 +477,14 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, return 0; if (tl_reader_init_late(self, kbdev)) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + dev_warn( + kbdev->dev, + "CSFFW timeline is not available for MALI_NO_MALI builds!"); + return 0; +#else return -EINVAL; +#endif } tl_reader_reset(self); @@ -521,14 +528,5 @@ void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self) void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self) { - u64 gpu_cycle = 0; - struct kbase_device *kbdev = self->kbdev; - - if (!kbdev) - return; - kbase_csf_tl_reader_flush_buffer(self); - - get_cpu_gpu_time(kbdev, NULL, NULL, &gpu_cycle); - KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET(kbdev, gpu_cycle); } diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h index 891a8f3..4523ba2 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h @@ -40,8 +40,7 @@ struct kbase_tlstream; struct kbase_device; /** - * struct kbase_ts_converter - - * System timestamp to CPU timestamp converter state. + * struct kbase_ts_converter - System timestamp to CPU timestamp converter state. * * @multiplier: Numerator of the converter's fraction. * @divisor: Denominator of the converter's fraction. @@ -145,8 +144,7 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self); int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self); /** - * kbase_csf_tl_reader_start() - - * Start asynchronous copying of CSFFW timeline stream. + * kbase_csf_tl_reader_start() - Start asynchronous copying of CSFFW timeline stream. * * @self: CSFFW TL Reader instance. * @kbdev: Kbase device. @@ -157,8 +155,7 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev); /** - * kbase_csf_tl_reader_stop() - - * Stop asynchronous copying of CSFFW timeline stream. + * kbase_csf_tl_reader_stop() - Stop asynchronous copying of CSFFW timeline stream. * * @self: CSFFW TL Reader instance. */ @@ -166,8 +163,7 @@ void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self); #if IS_ENABLED(CONFIG_DEBUG_FS) /** - * kbase_csf_tl_reader_debugfs_init() - - * Initialize debugfs for CSFFW Timelime Stream Reader. + * kbase_csf_tl_reader_debugfs_init() - Initialize debugfs for CSFFW Timelime Stream Reader. * * @kbdev: Kbase device. */ @@ -175,8 +171,7 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev); #endif /** - * kbase_csf_tl_reader_reset() - - * Reset CSFFW timeline reader, it should be called before reset CSFFW. + * kbase_csf_tl_reader_reset() - Reset CSFFW timeline reader, it should be called before reset CSFFW. * * @self: CSFFW TL Reader instance. */ diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c index a6343c8..0c72f00 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c @@ -38,12 +38,6 @@ /** * struct firmware_trace_buffer - Trace Buffer within the MCU firmware * - * The firmware relays information to the host by writing on memory buffers - * which are allocated and partially configured by the host. These buffers - * are called Trace Buffers: each of them has a specific purpose and is - * identified by a name and a set of memory addresses where the host can - * set pointers to host-allocated structures. - * * @kbdev: Pointer to the Kbase device. * @node: List head linking all trace buffers to * kbase_device:csf.firmware_trace_buffers @@ -73,6 +67,12 @@ * @num_pages: Size of the data buffer, in pages. * @trace_enable_init_mask: Initial value for the trace enable bit mask. * @name: NULL terminated string which contains the name of the trace buffer. + * + * The firmware relays information to the host by writing on memory buffers + * which are allocated and partially configured by the host. These buffers + * are called Trace Buffers: each of them has a specific purpose and is + * identified by a name and a set of memory addresses where the host can + * set pointers to host-allocated structures. */ struct firmware_trace_buffer { struct kbase_device *kbdev; @@ -100,14 +100,14 @@ struct firmware_trace_buffer { /** * struct firmware_trace_buffer_data - Configuration data for trace buffers * - * Describe how to set up a trace buffer interface. - * Trace buffers are identified by name and they require a data buffer and - * an initial mask of values for the trace enable bits. - * * @name: Name identifier of the trace buffer * @trace_enable_init_mask: Initial value to assign to the trace enable bits * @size: Size of the data buffer to allocate for the trace buffer, in pages. * The size of a data buffer must always be a power of 2. + * + * Describe how to set up a trace buffer interface. + * Trace buffers are identified by name and they require a data buffer and + * an initial mask of values for the trace enable bits. */ struct firmware_trace_buffer_data { char name[64]; @@ -121,14 +121,13 @@ struct firmware_trace_buffer_data { * This table contains the configuration data for the trace buffers that are * expected to be parsed from the firmware. */ -static const struct firmware_trace_buffer_data -trace_buffer_data[] = { -#ifndef MALI_KBASE_BUILD - { "fwutf", {0}, 1 }, +static const struct firmware_trace_buffer_data trace_buffer_data[] = { +#if MALI_UNIT_TEST + { "fwutf", { 0 }, 1 }, #endif - { FW_TRACE_BUF_NAME, {0}, 4 }, - { "benchmark", {0}, 2 }, - { "timeline", {0}, KBASE_CSF_TL_BUFFER_NR_PAGES }, + { FW_TRACE_BUF_NAME, { 0 }, 4 }, + { "benchmark", { 0 }, 2 }, + { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, }; int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h index b9f481d..823ace7 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h @@ -34,6 +34,8 @@ struct kbase_device; /** * kbase_csf_firmware_trace_buffers_init - Initialize trace buffers * + * @kbdev: Device pointer + * * Allocate resources for trace buffers. In particular: * - One memory page of GPU-readable, CPU-writable memory is used for * the Extract variables of all trace buffers. @@ -52,8 +54,6 @@ struct kbase_device; * populated with data from the firmware image parsing. * * Return: 0 if success, or an error code on failure. - * - * @kbdev: Device pointer */ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev); @@ -67,6 +67,11 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); /** * kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section * + * @kbdev: Kbase device structure + * @entry: Pointer to the section + * @size: Size (in bytes) of the section + * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE + * * Read a "trace buffer" section adding metadata for the related trace buffer * to the kbase_device:csf.firmware_trace_buffers list. * @@ -74,11 +79,6 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); * will not be initialized. * * Return: 0 if successful, negative error code on failure. - * - * @kbdev: Kbase device structure - * @entry: Pointer to the section - * @size: Size (in bytes) of the section - * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE */ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, const u32 *entry, @@ -86,8 +86,9 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, bool updatable); /** - * kbase_csf_firmware_reload_trace_buffers_data - - * Reload trace buffers data for firmware reboot + * kbase_csf_firmware_reload_trace_buffers_data - Reload trace buffers data for firmware reboot + * + * @kbdev: Device pointer * * Helper function used when rebooting the firmware to reload the initial setup * for all the trace buffers which have been previously parsed and initialized. @@ -99,44 +100,40 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, * * In other words, the re-initialization done by this function will be * equivalent but not necessarily identical to the original initialization. - * - * @kbdev: Device pointer */ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev); /** * kbase_csf_firmware_get_trace_buffer - Get a trace buffer * - * Return: handle to a trace buffer, given the name, or NULL if a trace buffer - * with that name couldn't be found. - * * @kbdev: Device pointer * @name: Name of the trace buffer to find + * + * Return: handle to a trace buffer, given the name, or NULL if a trace buffer + * with that name couldn't be found. */ struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( struct kbase_device *kbdev, const char *name); /** - * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - - * Get number of trace enable bits for a trace buffer - * - * Return: Number of trace enable bits in a trace buffer. + * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer * * @trace_buffer: Trace buffer handle + * + * Return: Number of trace enable bits in a trace buffer. */ unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( const struct firmware_trace_buffer *trace_buffer); /** - * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - - * Update a trace enable bit - * - * Update the value of a given trace enable bit. + * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - Update a trace enable bit * * @trace_buffer: Trace buffer handle * @bit: Bit to update * @value: New value for the given bit * + * Update the value of a given trace enable bit. + * * Return: 0 if successful, negative error code on failure. */ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( @@ -146,9 +143,9 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( /** * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate * - * Return: True if the trace buffer is empty, or false otherwise. - * * @trace_buffer: Trace buffer handle + * + * Return: True if the trace buffer is empty, or false otherwise. */ bool kbase_csf_firmware_trace_buffer_is_empty( const struct firmware_trace_buffer *trace_buffer); @@ -156,14 +153,14 @@ bool kbase_csf_firmware_trace_buffer_is_empty( /** * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer * + * @trace_buffer: Trace buffer handle + * @data: Pointer to a client-allocated where data shall be written. + * @num_bytes: Maximum number of bytes to read from the trace buffer. + * * Read available data from a trace buffer. The client provides a data buffer * of a given size and the maximum number of bytes to read. * * Return: Number of bytes read from the trace buffer. - * - * @trace_buffer: Trace buffer handle - * @data: Pointer to a client-allocated where data shall be written. - * @num_bytes: Maximum number of bytes to read from the trace buffer. */ unsigned int kbase_csf_firmware_trace_buffer_read_data( struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h index d05f802..2506ce1 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_codes_csf.h @@ -97,6 +97,13 @@ int dummy_array[] = { /* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */ KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK), + /* info_val[63:0] = GPU cycle counter, used mainly for benchmarking + * purpose. + */ + KBASE_KTRACE_CODE_MAKE_CODE(GPU_IDLE_HANDLING_START), + KBASE_KTRACE_CODE_MAKE_CODE(MCU_HALTED), + KBASE_KTRACE_CODE_MAKE_CODE(MCU_IN_SLEEP), + /* * Group events */ diff --git a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h index 4b23fc9..9ee7f81 100644 --- a/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +++ b/mali_kbase/debug/backend/mali_kbase_debug_linux_ktrace_csf.h @@ -58,6 +58,9 @@ DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END); DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN); DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END); DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK); +DEFINE_MALI_ADD_EVENT(GPU_IDLE_HANDLING_START); +DEFINE_MALI_ADD_EVENT(MCU_HALTED); +DEFINE_MALI_ADD_EVENT(MCU_IN_SLEEP); DECLARE_EVENT_CLASS(mali_csf_grp_q_template, TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace.h b/mali_kbase/debug/mali_kbase_debug_ktrace.h index f943696..f1e6d3d 100644 --- a/mali_kbase/debug/mali_kbase_debug_ktrace.h +++ b/mali_kbase/debug/mali_kbase_debug_ktrace.h @@ -49,6 +49,7 @@ /** * kbase_ktrace_init - initialize kbase ktrace. * @kbdev: kbase device + * Return: 0 if successful or a negative error code on failure. */ int kbase_ktrace_init(struct kbase_device *kbdev); diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h b/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h index d9bd351..ba93f29 100644 --- a/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h +++ b/mali_kbase/debug/mali_kbase_debug_ktrace_internal.h @@ -63,6 +63,8 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, * @ktrace: kbase device's ktrace * * This may also empty the oldest entry in the ringbuffer to make space. + * + * Return: ktrace message */ struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index 7b37a96..8a4d2e2 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -24,11 +24,15 @@ #include <mali_kbase_hwaccess_backend.h> #include <mali_kbase_hwcnt_backend_csf_if_fw.h> +#include <mali_kbase_hwcnt_watchdog_if_timer.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_reset_gpu.h> #include <csf/mali_kbase_csf.h> #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_linux.h> +#endif #include <mali_kbase.h> #include <backend/gpu/mali_kbase_irq_internal.h> @@ -196,9 +200,31 @@ static void kbase_csf_early_term(struct kbase_device *kbdev) } /** + * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog + * interface. + * @kbdev: Device pointer + */ +static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_watchdog_if_timer_create( + &kbdev->hwcnt_watchdog_timer); +} + +/** + * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog + * interface. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer); +} + +/** * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend * firmware interface. * @kbdev: Device pointer + * Return: 0 if successful or a negative error code on failure. */ static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) { @@ -226,7 +252,7 @@ static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) return kbase_hwcnt_backend_csf_create( &kbdev->hwcnt_backend_csf_if_fw, KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, - &kbdev->hwcnt_gpu_iface); + &kbdev->hwcnt_watchdog_timer, &kbdev->hwcnt_gpu_iface); } /** @@ -239,8 +265,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) } static const struct kbase_device_init dev_init[] = { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + { kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed" }, +#else { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, +#endif { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, @@ -270,6 +301,9 @@ static const struct kbase_device_init dev_init[] = { "Clock rate trace manager initialization failed" }, { kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, + { kbase_device_hwcnt_watchdog_if_init, + kbase_device_hwcnt_watchdog_if_term, + "GPU hwcnt backend watchdog interface creation failed" }, { kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term, "GPU hwcnt backend CSF interface creation failed" }, @@ -283,7 +317,6 @@ static const struct kbase_device_init dev_init[] = { { kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" }, { NULL, kbase_device_firmware_hwcnt_term, NULL }, -#ifdef MALI_KBASE_BUILD { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, /* Sysfs init needs to happen before registering the device with @@ -305,7 +338,6 @@ static const struct kbase_device_init dev_init[] = { "GPU property population failed" }, { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, -#endif }; static void kbase_device_term_partial(struct kbase_device *kbdev, @@ -476,3 +508,4 @@ out: return ret; } +KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once); diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index 7288e8e..2e022eb 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -28,6 +28,9 @@ #include <mali_kbase_ctx_sched.h> #include <mali_kbase_reset_gpu.h> +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_linux.h> +#endif /* CONFIG_MALI_NO_MALI */ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include <arbiter/mali_kbase_arbiter_pm.h> @@ -156,8 +159,13 @@ static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) } static const struct kbase_device_init dev_init[] = { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + { kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed" }, +#else { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, +#endif { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_pm_init, kbase_device_pm_term, @@ -203,7 +211,6 @@ static const struct kbase_device_init dev_init[] = { "Performance counter instrumentation initialization failed" }, { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, -#ifdef MALI_KBASE_BUILD { kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, "Job fault debug initialization failed" }, { kbase_device_debugfs_init, kbase_device_debugfs_term, @@ -225,7 +232,6 @@ static const struct kbase_device_init dev_init[] = { "Misc device registration failed" }, { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed" }, -#endif { NULL, kbase_dummy_job_wa_cleanup, NULL }, { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index 518aaf9..dc53c43 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -275,6 +275,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) if (err) goto dma_set_mask_failed; + /* There is no limit for Mali, so set to max. We only do this if dma_parms * is already allocated by the platform. */ @@ -345,6 +346,7 @@ void kbase_device_misc_term(struct kbase_device *kbdev) kbase_device_all_as_term(kbdev); + if (kbdev->oom_notifier_block.notifier_call) unregister_oom_notifier(&kbdev->oom_notifier_block); } diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h index 517c16b..22ceca0 100644 --- a/mali_kbase/device/mali_kbase_device.h +++ b/mali_kbase/device/mali_kbase_device.h @@ -118,22 +118,42 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); bool kbase_is_gpu_removed(struct kbase_device *kbdev); /** + * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait + * @kbdev: Kbase device + * @flush_op: Flush command register value to be sent to HW + * + * Issue a cache flush command to hardware, then busy wait an irq status. + * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other + * threads through kbase_gpu_start_cache_clean(), and wake them up manually + * after the busy-wait is done. Any pended cache flush commands raised by + * other thread are handled in this function. + * hwaccess_lock must be held by the caller. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, + u32 flush_op); + +/** * kbase_gpu_start_cache_clean - Start a cache clean * @kbdev: Kbase device + * @flush_op: Flush command register value to be sent to HW * - * Issue a cache clean and invalidate command to hardware. This function will - * take hwaccess_lock. + * Issue a given cache flush command to hardware. + * This function will take hwaccess_lock. */ -void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op); /** * kbase_gpu_start_cache_clean_nolock - Start a cache clean * @kbdev: Kbase device + * @flush_op: Flush command register value to be sent to HW * - * Issue a cache clean and invalidate command to hardware. hwaccess_lock - * must be held by the caller. + * Issue a given cache flush command to hardware. + * hwaccess_lock must be held by the caller. */ -void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, + u32 flush_op); /** * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c index 4c98ae1..beacc7c 100644 --- a/mali_kbase/device/mali_kbase_device_hw.c +++ b/mali_kbase/device/mali_kbase_device_hw.c @@ -38,7 +38,98 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev) } #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ -void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) +static int busy_wait_cache_clean_irq(struct kbase_device *kbdev) +{ + /* Previously MMU-AS command was used for L2 cache flush on page-table update. + * And we're using the same max-loops count for GPU command, because amount of + * L2 cache flush overhead are same between them. + */ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + + /* Wait for the GPU cache clean operation to complete */ + while (--max_loops && + !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & + CLEAN_CACHES_COMPLETED)) { + ; + } + + /* reset gpu if time-out occurred */ + if (max_loops == 0) { + dev_err(kbdev->dev, + "CLEAN_CACHES_COMPLETED bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + return -EBUSY; + } + + /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + CLEAN_CACHES_COMPLETED); + + return 0; +} + +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, + u32 flush_op) +{ + u32 irq_mask; + int need_to_wake_up = 0; + int ret = 0; + + /* hwaccess_lock must be held to avoid any sync issue with + * kbase_gpu_start_cache_clean() / kbase_clean_caches_done() + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set. + * If it is set, it means there are threads waiting for + * CLEAN_CACHES_COMPLETED irq to be raised. + * We'll clear the irq mask bit and busy-wait for the cache + * clean operation to complete before submitting the cache + * clean command required after the GPU page table update. + * Pended flush commands will be merged to requested command. + */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + if (irq_mask & CLEAN_CACHES_COMPLETED) { + /* disable irq first */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + /* busy wait irq status to be enabled */ + ret = busy_wait_cache_clean_irq(kbdev); + if (ret) + return ret; + + /* merge pended command if there's any */ + flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE( + kbdev->cache_clean_queued, flush_op); + + /* enable wake up notify flag */ + need_to_wake_up = 1; + } else { + /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + CLEAN_CACHES_COMPLETED); + } + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + + /* 3. Busy-wait irq status to be enabled. */ + ret = busy_wait_cache_clean_irq(kbdev); + if (ret) + return ret; + + /* 4. Wake-up blocked threads when there is any. */ + if (need_to_wake_up) + kbase_gpu_cache_clean_wait_complete(kbdev); + + return ret; +} + +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, + u32 flush_op) { u32 irq_mask; @@ -47,10 +138,11 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) if (kbdev->cache_clean_in_progress) { /* If this is called while another clean is in progress, we * can't rely on the current one to flush any new changes in - * the cache. Instead, trigger another cache clean immediately - * after this one finishes. + * the cache. Instead, accumulate all cache clean operations + * and trigger that immediately after this one finishes. */ - kbdev->cache_clean_queued = true; + kbdev->cache_clean_queued = GPU_COMMAND_FLUSH_CACHE_MERGE( + kbdev->cache_clean_queued, flush_op); return; } @@ -59,19 +151,18 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED); - KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CACHE_CLN_INV_L2); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); kbdev->cache_clean_in_progress = true; } -void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op) { unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_gpu_start_cache_clean_nolock(kbdev); + kbase_gpu_start_cache_clean_nolock(kbdev, flush_op); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -79,7 +170,7 @@ void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); - kbdev->cache_clean_queued = false; + kbdev->cache_clean_queued = 0; kbdev->cache_clean_in_progress = false; wake_up(&kbdev->cache_clean_wait); } @@ -92,11 +183,14 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->cache_clean_queued) { - kbdev->cache_clean_queued = false; + u32 pended_flush_op = kbdev->cache_clean_queued; + + kbdev->cache_clean_queued = 0; - KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, + pended_flush_op); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CACHE_CLN_INV_L2); + pended_flush_op); } else { /* Disable interrupt */ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c index 2f4c9d9..e095986 100644 --- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -24,6 +24,9 @@ #include "mali_kbase_ipa_counter_common_jm.h" #include "mali_kbase.h" +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* CONFIG_MALI_NO_MALI */ /* Performance counter blocks base offsets */ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) @@ -94,9 +97,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, u32 counter_block_offset) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + const u32 sc_base = MEMSYS_BASE + + (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * + KBASE_IPA_NR_BYTES_PER_BLOCK); +#else const u32 sc_base = MEMSYS_BASE + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * KBASE_IPA_NR_BYTES_PER_BLOCK); +#endif return sc_base + counter_block_offset; } diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c index 8b05e68..c0c0cbb 100644 --- a/mali_kbase/ipa/mali_kbase_ipa.c +++ b/mali_kbase/ipa/mali_kbase_ipa.c @@ -537,18 +537,34 @@ static void opp_translate_freq_voltage(struct kbase_device *kbdev, unsigned long *freqs, unsigned long *volts) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* An arbitrary voltage and frequency value can be chosen for testing + * in no mali configuration which may not match with any OPP level. + */ + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq; + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage; + + freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq; + volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage; +#else u64 core_mask; + unsigned int i; kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); CSTD_UNUSED(core_mask); + /* Convert micro volts to milli volts */ + for (i = 0; i < kbdev->nr_clocks; i++) + volts[i] /= 1000; + if (kbdev->nr_clocks == 1) { freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; } +#endif } #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h index cb1c276..ac8f89b 100644 --- a/mali_kbase/jm/mali_kbase_jm_defs.h +++ b/mali_kbase/jm/mali_kbase_jm_defs.h @@ -653,8 +653,8 @@ static inline bool kbase_jd_katom_is_protected( /** * kbase_atom_is_younger - query if one atom is younger by age than another - * @katom_a the first atom - * @katom_a the second atom + * @katom_a: the first atom + * @katom_a: the second atom * * Return: true if the first atom is strictly younger than the second, false * otherwise. diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index 2e81cb1..0f2b106 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -37,41 +37,42 @@ enum base_hw_feature { BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_ASN_HASH, BASE_HW_FEATURE_GPU_SLEEP, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_generic[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tMIx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tHEx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tSIx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tDVx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tNOx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, @@ -80,7 +81,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tGOx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, @@ -89,50 +90,55 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tTRx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tNAx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tBEx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tBAx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tDUx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tODx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, @@ -140,7 +146,7 @@ static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tGRx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, @@ -148,7 +154,7 @@ static const enum base_hw_feature base_hw_features_tGRx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tVAx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, @@ -156,7 +162,7 @@ static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tTUx[] = { +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_L2_CONFIG, diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index d188120..ad45325 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -63,11 +63,11 @@ enum base_hw_issue { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_generic[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_generic[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, @@ -87,7 +87,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, @@ -107,7 +107,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, @@ -127,7 +127,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tMIx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, @@ -142,7 +142,7 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, @@ -155,7 +155,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, @@ -168,7 +168,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_11054, @@ -181,7 +181,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_TMIX_7891, @@ -193,7 +193,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tHEx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_7891, @@ -203,7 +203,7 @@ static const enum base_hw_issue base_hw_issues_model_tHEx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, @@ -216,7 +216,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, @@ -229,7 +229,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, @@ -241,7 +241,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, @@ -252,7 +252,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tSIx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, @@ -262,7 +262,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, @@ -273,7 +273,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tDVx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, @@ -283,7 +283,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, @@ -295,7 +295,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tNOx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, @@ -305,7 +305,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, @@ -317,7 +317,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, @@ -329,7 +329,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tGOx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, @@ -339,7 +339,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -355,7 +355,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -371,7 +371,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -386,7 +386,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tTRx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -398,7 +398,7 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -414,7 +414,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -429,7 +429,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tNAx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -441,7 +441,7 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -455,7 +455,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -468,7 +468,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -481,7 +481,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -494,7 +494,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tBEx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -506,7 +506,7 @@ static const enum base_hw_issue base_hw_issues_model_tBEx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -520,7 +520,7 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -533,7 +533,7 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -546,7 +546,7 @@ static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -559,7 +559,7 @@ static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tBAx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -571,7 +571,7 @@ static const enum base_hw_issue base_hw_issues_model_tBAx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -581,7 +581,7 @@ static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tDUx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -591,7 +591,7 @@ static const enum base_hw_issue base_hw_issues_model_tDUx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, @@ -599,7 +599,7 @@ static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tODx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -608,14 +608,14 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tGRx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -623,14 +623,14 @@ static const enum base_hw_issue base_hw_issues_model_tGRx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tVAx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -638,7 +638,7 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tTUx[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -646,7 +646,7 @@ static const enum base_hw_issue base_hw_issues_model_tTUx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index 2472c7c..0cbbf44 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -31,6 +31,10 @@ #include <ipa/mali_kbase_ipa_debugfs.h> #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_DEVFREQ */ +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include "backend/gpu/mali_kbase_model_linux.h" +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* CONFIG_MALI_NO_MALI */ #include "mali_kbase_mem_profile_debugfs_buf_size.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" @@ -52,7 +56,6 @@ #endif #include "mali_kbase_hwcnt_context.h" #include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_legacy.h" #include "mali_kbase_kinstr_prfcnt.h" #include "mali_kbase_vinstr.h" #if MALI_USE_CSF @@ -60,6 +63,7 @@ #include "csf/mali_kbase_csf_tiler_heap.h" #include "csf/mali_kbase_csf_csg_debugfs.h" #include "csf/mali_kbase_csf_cpu_queue_debugfs.h" +#include "csf/mali_kbase_csf_event.h" #endif #ifdef CONFIG_MALI_ARBITER_SUPPORT #include "arbiter/mali_kbase_arbiter_pm.h" @@ -342,15 +346,6 @@ static void kbase_file_delete(struct kbase_file *const kfile) #if IS_ENABLED(CONFIG_DEBUG_FS) kbasep_mem_profile_debugfs_remove(kctx); #endif - - mutex_lock(&kctx->legacy_hwcnt_lock); - /* If this client was performing hardware counter dumping and - * did not explicitly detach itself, destroy it now - */ - kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli); - kctx->legacy_hwcnt_cli = NULL; - mutex_unlock(&kctx->legacy_hwcnt_lock); - kbase_context_debugfs_term(kctx); kbase_destroy_context(kctx); @@ -905,62 +900,6 @@ static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); } -static int kbase_api_hwcnt_enable(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_enable *enable) -{ - int ret; - - mutex_lock(&kctx->legacy_hwcnt_lock); - if (enable->dump_buffer != 0) { - /* Non-zero dump buffer, so user wants to create the client */ - if (kctx->legacy_hwcnt_cli == NULL) { - ret = kbase_hwcnt_legacy_client_create( - kctx->kbdev->hwcnt_gpu_virt, - enable, - &kctx->legacy_hwcnt_cli); - } else { - /* This context already has a client */ - ret = -EBUSY; - } - } else { - /* Zero dump buffer, so user wants to destroy the client */ - if (kctx->legacy_hwcnt_cli != NULL) { - kbase_hwcnt_legacy_client_destroy( - kctx->legacy_hwcnt_cli); - kctx->legacy_hwcnt_cli = NULL; - ret = 0; - } else { - /* This context has no client to destroy */ - ret = -EINVAL; - } - } - mutex_unlock(&kctx->legacy_hwcnt_lock); - - return ret; -} - -static int kbase_api_hwcnt_dump(struct kbase_context *kctx) -{ - int ret; - - mutex_lock(&kctx->legacy_hwcnt_lock); - ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli); - mutex_unlock(&kctx->legacy_hwcnt_lock); - - return ret; -} - -static int kbase_api_hwcnt_clear(struct kbase_context *kctx) -{ - int ret; - - mutex_lock(&kctx->legacy_hwcnt_lock); - ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli); - mutex_unlock(&kctx->legacy_hwcnt_lock); - - return ret; -} - static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) { @@ -992,6 +931,17 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, return 0; } +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +static int kbase_api_hwcnt_set(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_values *values) +{ + gpu_model_set_dummy_prfcnt_sample( + (u32 __user *)(uintptr_t)values->data, + values->size); + + return 0; +} +#endif /* CONFIG_MALI_NO_MALI */ static int kbase_api_disjoint_query(struct kbase_context *kctx, struct kbase_ioctl_disjoint_query *query) @@ -1415,6 +1365,30 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx, return kbase_csf_queue_kick(kctx, kick); } +static int kbasep_cs_queue_group_create_1_6( + struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create_1_6 *create) +{ + union kbase_ioctl_cs_queue_group_create + new_create = { .in = { + .tiler_mask = create->in.tiler_mask, + .fragment_mask = + create->in.fragment_mask, + .compute_mask = create->in.compute_mask, + .cs_min = create->in.cs_min, + .priority = create->in.priority, + .tiler_max = create->in.tiler_max, + .fragment_max = create->in.fragment_max, + .compute_max = create->in.compute_max, + } }; + + int ret = kbase_csf_queue_group_create(kctx, &new_create); + + create->out.group_handle = new_create.out.group_handle; + create->out.group_uid = new_create.out.group_uid; + + return ret; +} static int kbasep_cs_queue_group_create(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create *create) { @@ -1873,28 +1847,20 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_hwcnt_reader_setup, kctx); break; - case KBASE_IOCTL_HWCNT_ENABLE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, - kbase_api_hwcnt_enable, - struct kbase_ioctl_hwcnt_enable, - kctx); - break; - case KBASE_IOCTL_HWCNT_DUMP: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, - kbase_api_hwcnt_dump, - kctx); - break; - case KBASE_IOCTL_HWCNT_CLEAR: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, - kbase_api_hwcnt_clear, - kctx); - break; case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO, kbase_api_get_cpu_gpu_timeinfo, union kbase_ioctl_get_cpu_gpu_timeinfo, kctx); break; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + case KBASE_IOCTL_HWCNT_SET: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, + kbase_api_hwcnt_set, + struct kbase_ioctl_hwcnt_values, + kctx); + break; +#endif /* CONFIG_MALI_NO_MALI */ #ifdef CONFIG_MALI_CINSTR_GWT case KBASE_IOCTL_CINSTR_GWT_START: KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, @@ -1949,6 +1915,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_cs_queue_kick, kctx); break; + case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6: + KBASE_HANDLE_IOCTL_INOUT( + KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6, + kbasep_cs_queue_group_create_1_6, + union kbase_ioctl_cs_queue_group_create_1_6, kctx); + break; case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, kbasep_cs_queue_group_create, @@ -2048,7 +2020,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (atomic_read(&kctx->event_count)) read_event = true; else - read_error = kbase_csf_read_error(kctx, &event_data); + read_error = kbase_csf_event_read_error(kctx, &event_data); if (!read_event && !read_error) { bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, @@ -2153,7 +2125,7 @@ int kbase_event_pending(struct kbase_context *ctx) WARN_ON_ONCE(!ctx); return (atomic_read(&ctx->event_count) != 0) || - kbase_csf_error_pending(ctx) || + kbase_csf_event_error_pending(ctx) || kbase_csf_cpu_queue_dump_needed(ctx); } #else @@ -3910,8 +3882,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, show_js_ctx_scheduling_mode, set_js_ctx_scheduling_mode); -#ifdef MALI_KBASE_BUILD - /* Number of entries in serialize_jobs_settings[] */ #define NR_SERIALIZE_JOBS_SETTINGS 5 /* Maximum string length in serialize_jobs_settings[].name */ @@ -4126,7 +4096,6 @@ static ssize_t store_serialize_jobs_sysfs(struct device *dev, static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, store_serialize_jobs_sysfs); -#endif /* MALI_KBASE_BUILD */ #endif /* !MALI_USE_CSF */ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) @@ -4222,6 +4191,15 @@ void kbase_protected_mode_term(struct kbase_device *kbdev) kfree(kbdev->protected_dev); } +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + return 0; +} +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ +} +#else /* CONFIG_MALI_NO_MALI */ static int kbase_common_reg_map(struct kbase_device *kbdev) { int err = 0; @@ -4257,6 +4235,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) kbdev->reg_size = 0; } } +#endif /* CONFIG_MALI_NO_MALI */ int registers_map(struct kbase_device * const kbdev) { @@ -4574,7 +4553,6 @@ void power_control_term(struct kbase_device *kbdev) #endif } -#ifdef MALI_KBASE_BUILD #if IS_ENABLED(CONFIG_DEBUG_FS) static void trigger_reset(struct kbase_device *kbdev) @@ -4847,7 +4825,6 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev) debugfs_remove_recursive(kbdev->mali_debugfs_directory); } #endif /* CONFIG_DEBUG_FS */ -#endif /* MALI_KBASE_BUILD */ int kbase_device_coherency_init(struct kbase_device *kbdev) { @@ -5238,10 +5215,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_set_drvdata(kbdev->dev, NULL); kbase_device_free(kbdev); } else { -#ifdef MALI_KBASE_BUILD dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); -#endif /* MALI_KBASE_BUILD */ kbase_increment_device_id(); #ifdef CONFIG_MALI_ARBITER_SUPPORT mutex_lock(&kbdev->pm.lock); @@ -5262,7 +5237,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) * * @dev: The device to suspend * - * Return: A standard Linux error code + * Return: A standard Linux error code on failure, 0 otherwise. */ static int kbase_device_suspend(struct device *dev) { @@ -5271,7 +5246,10 @@ static int kbase_device_suspend(struct device *dev) if (!kbdev) return -ENODEV; - kbase_pm_suspend(kbdev); + if (kbase_pm_suspend(kbdev)) { + dev_warn(kbdev->dev, "Abort suspend as GPU suspension failed"); + return -EBUSY; + } #ifdef CONFIG_MALI_MIDGARD_DVFS kbase_pm_metrics_stop(kbdev); @@ -5512,6 +5490,7 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ __stringify(BASE_UK_VERSION_MAJOR) "." \ __stringify(BASE_UK_VERSION_MINOR) ")"); MODULE_SOFTDEP("pre: memory_group_manager"); +MODULE_INFO(import_ns, "DMA_BUF"); #define CREATE_TRACE_POINTS /* Create the trace points (otherwise we just get code to call a tracepoint) */ diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index d06380d..8026e7f 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -23,6 +23,9 @@ #include <mali_kbase_defs.h> #include "mali_kbase_ctx_sched.h" #include "tl/mali_kbase_tracepoints.h" +#if !MALI_USE_CSF +#include <mali_kbase_hwaccess_jm.h> +#endif /* Helper for ktrace */ #if KBASE_KTRACE_ENABLE @@ -124,7 +127,6 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) kbdev, prev_kctx->id); prev_kctx->as_nr = KBASEP_AS_NR_INVALID; } - kctx->as_nr = free_as; kbdev->as_to_kctx[free_as] = kctx; KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( @@ -173,6 +175,9 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); +#if !MALI_USE_CSF + kbase_backend_slot_kctx_purge_locked(kbdev, kctx); +#endif } } diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 5b1fdd3..86e4042 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -742,6 +742,7 @@ struct kbase_process { * @hwcnt.addr: HW counter address * @hwcnt.addr_bytes: HW counter size in bytes * @hwcnt.backend: Kbase instrumentation backend + * @hwcnt_watchdog_timer: Hardware counter watchdog interface. * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. * @hwcnt_gpu_ctx: Context for GPU hardware counter access. * @hwaccess_lock must be held when calling @@ -770,8 +771,8 @@ struct kbase_process { * @cache_clean_in_progress: Set when a cache clean has been started, and * cleared when it has finished. This prevents multiple * cache cleans being done simultaneously. - * @cache_clean_queued: Set if a cache clean is invoked while another is in - * progress. If this happens, another cache clean needs + * @cache_clean_queued: Pended cache clean operations invoked while another is + * in progress. If this is not 0, another cache clean needs * to be triggered immediately after completion of the * current one. * @cache_clean_wait: Signalled when a cache clean has finished. @@ -979,6 +980,15 @@ struct kbase_device { char devname[DEVNAME_SIZE]; u32 id; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + void *model; + struct kmem_cache *irq_slab; + struct workqueue_struct *irq_workq; + atomic_t serving_job_irq; + atomic_t serving_gpu_irq; + atomic_t serving_mmu_irq; + spinlock_t reg_op_lock; +#endif /* CONFIG_MALI_NO_MALI */ struct kbase_pm_device_data pm; struct kbase_mem_pool_group mem_pools; @@ -1008,6 +1018,7 @@ struct kbase_device { #if MALI_USE_CSF struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; + struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer; #else struct kbase_hwcnt { spinlock_t lock; @@ -1037,7 +1048,7 @@ struct kbase_device { u64 lowest_gpu_freq_khz; bool cache_clean_in_progress; - bool cache_clean_queued; + u32 cache_clean_queued; wait_queue_head_t cache_clean_wait; void *platform_context; @@ -1205,6 +1216,7 @@ struct kbase_device { struct priority_control_manager_device *pcm_dev; struct notifier_block oom_notifier_block; + }; /** @@ -1562,6 +1574,12 @@ struct kbase_sub_alloc { * pages used for GPU allocations, done for the context, * to the memory consumed by the process. * @gpu_va_end: End address of the GPU va space (in 4KB page units) + * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all + * tiler heaps of the kbase context. + * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the + * kbase context. + * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the + * kbase context. * @jit_va: Indicates if a JIT_VA zone has been created. * @mem_profile_data: Buffer containing the profiling information provided by * Userspace, can be read through the mem_profile debugfs file. @@ -1588,11 +1606,6 @@ struct kbase_sub_alloc { * @slots_pullable: Bitmask of slots, indicating the slots for which the * context has pullable atoms in the runnable tree. * @work: Work structure used for deferred ASID assignment. - * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters - * client, there can be only such client per kbase - * context. - * @legacy_hwcnt_lock: Lock used to prevent concurrent access to - * @legacy_hwcnt_cli. * @completed_jobs: List containing completed atoms for which base_jd_event is * to be posted. * @work_count: Number of work items, corresponding to atoms, currently @@ -1775,6 +1788,11 @@ struct kbase_context { spinlock_t mm_update_lock; struct mm_struct __rcu *process_mm; u64 gpu_va_end; +#if MALI_USE_CSF + u32 running_total_tiler_heap_nr_chunks; + u64 running_total_tiler_heap_memory; + u64 peak_total_tiler_heap_memory; +#endif bool jit_va; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -1788,10 +1806,6 @@ struct kbase_context { struct list_head job_fault_resume_event_list; #endif /* CONFIG_DEBUG_FS */ - - struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; - struct mutex legacy_hwcnt_lock; - struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; u8 jit_max_allocations; u8 jit_current_allocations; diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index 967c08e..b5ba642 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -371,6 +371,7 @@ static void kbase_gpuprops_calculate_props( gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; #if MALI_USE_CSF + CSTD_UNUSED(gpu_id); gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22); diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h index 819ca13..3766310 100644 --- a/mali_kbase/mali_kbase_hwaccess_instr.h +++ b/mali_kbase/mali_kbase_hwaccess_instr.h @@ -144,4 +144,27 @@ void kbase_instr_backend_term(struct kbase_device *kbdev); void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); #endif +/** + * kbase_instr_hwcnt_on_unrecoverable_error() - JM HWC instr backend function + * called when unrecoverable errors + * are detected. + * @kbdev: Kbase device + * + * This should be called on encountering errors that can only be recovered from + * with reset, or that may put HWC logic in state that could result in hang. For + * example, when HW becomes unresponsive. + * + * Caller requires kbdev->hwaccess_lock held. + */ +void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev); + +/** + * kbase_instr_hwcnt_on_before_reset() - JM HWC instr backend function to be + * called immediately before a reset. + * Takes us out of the unrecoverable + * error state, if we were in it. + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev); + #endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h index 8689647..d0207f7 100644 --- a/mali_kbase/mali_kbase_hwaccess_jm.h +++ b/mali_kbase/mali_kbase_hwaccess_jm.h @@ -299,4 +299,21 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, */ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); +/** + * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked + * kctx + * + * @kbdev: Device pointer + * @kctx: The kbase context that needs to be purged from slot_rb[] + * + * For JM GPUs, the L1 read only caches may need a start_flush invalidation, + * potentially on all slots (even if the kctx was only using a single slot), + * following a context termination or address-space ID recycle. This function + * performs a clean-up purge on the given kctx which if it has been tracked by + * slot_rb[] objects. + * + * Caller must hold kbase_device->hwaccess_lock. + */ +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx); + #endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h index 36bbe2d..a8e4b95 100644 --- a/mali_kbase/mali_kbase_hwaccess_pm.h +++ b/mali_kbase/mali_kbase_hwaccess_pm.h @@ -85,8 +85,10 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); * Perform any backend-specific actions to suspend the GPU * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if suspend was successful. */ -void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); +int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); /** * Perform any backend-specific actions to resume the GPU from a suspend diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c index ea4893d..1fa6640 100644 --- a/mali_kbase/mali_kbase_hwcnt.c +++ b/mali_kbase/mali_kbase_hwcnt.c @@ -158,7 +158,6 @@ int kbase_hwcnt_context_init( return 0; - destroy_workqueue(hctx->wq); err_alloc_workqueue: kfree(hctx); err_alloc_hctx: diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c index 7ba1671..4602138 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c @@ -36,16 +36,24 @@ #define BASE_MAX_NR_CLOCKS_REGULATORS 2 #endif +/* Backend watch dog timer interval in milliseconds: 1 second. */ +#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) + /** * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. * * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is * an error. * - * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are - * waiting for an ACK, this ACK could come from either PRFCNT_ACK, + * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and + * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK, * PROTMODE_ENTER_ACK, or if an error occurs. * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been + * requested and we're waiting for an ACK - this ACK could come from either + * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here + * since watchdog request can't be triggered in protected mode. + * * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert * immediately after receiving the ACK, so we know which index corresponds to * the buffer we requested. @@ -60,18 +68,25 @@ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. * * Valid state transitions: - * IDLE -> REQUESTED (on dump request) - * REQUESTED -> QUERYING_INSERT (on dump ack) + * IDLE -> REQUESTED (on user dump request) + * IDLE -> WATCHDOG_REQUESTED (on watchdog request) + * IDLE -> QUERYING_INSERT (on user dump request in protected mode) + * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware) + * WATCHDOG_REQUESTED -> REQUESTED (on user dump request) + * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request) * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) * ACCUMULATING -> COMPLETED (on accumulation completion) - * COMPLETED -> REQUESTED (on dump request) + * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode) + * COMPLETED -> REQUESTED (on user dump request) + * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request) * COMPLETED -> IDLE (on disable) * ANY -> IDLE (on error) */ enum kbase_hwcnt_backend_csf_dump_state { KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED, KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, @@ -136,6 +151,7 @@ enum kbase_hwcnt_backend_csf_enable_state { * @counter_set: The performance counter set to use. * @metadata: Hardware counter metadata. * @prfcnt_info: Performance counter information. + * @watchdog_if: Watchdog interface object pointer. */ struct kbase_hwcnt_backend_csf_info { struct kbase_hwcnt_backend_csf *backend; @@ -146,6 +162,7 @@ struct kbase_hwcnt_backend_csf_info { enum kbase_hwcnt_set counter_set; const struct kbase_hwcnt_metadata *metadata; struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; + struct kbase_hwcnt_watchdog_interface *watchdog_if; }; /** @@ -192,6 +209,10 @@ struct kbase_hwcnt_csf_physical_layout { * @old_sample_buf: HWC sample buffer to save the previous values * for delta calculation, size * prfcnt_info.dump_bytes. + * @watchdog_last_seen_insert_idx: The insert index which watchdog has last + * seen, to check any new firmware automatic + * samples generated during the watchdog + * period. * @ring_buf: Opaque pointer for ring buffer object. * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. * @clk_enable_map: The enable map specifying enabled clock domains. @@ -204,6 +225,8 @@ struct kbase_hwcnt_csf_physical_layout { * it is completed accumulating up to the * insert_index_to_accumulate. * Should be initialized to the "complete" state. + * @user_requested: Flag to indicate a dump_request called from + * user. * @hwc_dump_workq: Single threaded work queue for HWC workers * execution. * @hwc_dump_work: Worker to accumulate samples. @@ -219,6 +242,7 @@ struct kbase_hwcnt_backend_csf { u64 *to_user_buf; u64 *accum_buf; u32 *old_sample_buf; + u32 watchdog_last_seen_insert_idx; struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; void *ring_buf_cpu_base; u64 clk_enable_map; @@ -226,6 +250,7 @@ struct kbase_hwcnt_backend_csf { u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; struct kbase_hwcnt_csf_physical_layout phys_layout; struct completion dump_completed; + bool user_requested; struct workqueue_struct *hwc_dump_workq; struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; @@ -594,6 +619,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->csf_if->set_extract_index( backend_csf->info->csf_if->ctx, insert_index_to_stop); + /* Update the watchdog last seen index to check any new FW auto samples + * in next watchdog callback. + */ + backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop; backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); } @@ -612,6 +641,67 @@ static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( } } +static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) +{ + struct kbase_hwcnt_backend_csf_info *csf_info = info; + struct kbase_hwcnt_backend_csf *backend_csf; + unsigned long flags; + + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); + + if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) { + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; + } + + backend_csf = csf_info->backend; + + /* Only do watchdog request when all conditions are met: */ + if (/* 1. Backend is enabled. */ + (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && + /* 2. FW is not in protected mode. */ + (!csf_info->fw_in_protected_mode) && + /* 3. dump state indicates no other dumping is in progress. */ + ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) || + (backend_csf->dump_state == + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { + u32 extract_index; + u32 insert_index; + + /* Read the raw extract and insert indexes from the CSF interface. */ + csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, + &extract_index, &insert_index); + + /* Do watchdog request if no new FW auto samples. */ + if (insert_index == + backend_csf->watchdog_last_seen_insert_idx) { + /* Trigger the watchdog request. */ + csf_info->csf_if->dump_request(csf_info->csf_if->ctx); + + /* A watchdog dump is required, change the state to + * start the request process. + */ + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED; + } + } + + /* Must schedule another callback when in the transitional state because + * this function can be called for the first time before the performance + * counter enabled interrupt. + */ + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) || + (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { + /* Reschedule the timer for next watchdog callback. */ + csf_info->watchdog_if->modify( + csf_info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + } + + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + /** * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. * @work: Work structure. @@ -826,6 +916,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock( struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; struct kbase_hwcnt_backend_csf_if_enable enable; + int err; if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) @@ -841,6 +932,13 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock( if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) return -EIO; + err = backend_csf->info->watchdog_if->enable( + backend_csf->info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS, + kbasep_hwcnt_backend_watchdog_timer_cb, backend_csf->info); + if (err) + return err; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; WARN_ON(!completion_done(&backend_csf->dump_completed)); kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( @@ -948,6 +1046,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + /* Deregister the timer and block until any timer callback has completed. + * We've transitioned out of the ENABLED state so we can guarantee it + * won't reschedule itself. + */ + backend_csf->info->watchdog_if->disable( + backend_csf->info->watchdog_if->timer); + /* Block until any async work has completed. We have transitioned out of * the ENABLED state so we can guarantee no new work will concurrently * be submitted. @@ -978,6 +1083,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) break; } + backend_csf->user_requested = false; + backend_csf->watchdog_last_seen_insert_idx = 0; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); @@ -1006,6 +1114,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_request = false; + bool watchdog_dumping = false; if (!backend_csf) return -EINVAL; @@ -1022,6 +1131,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->user_requested = true; backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return 0; @@ -1035,11 +1145,21 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, } /* Make sure that this is either the first request since enable or the - * previous dump has completed, so we can avoid midway through a dump. + * previous user dump has completed or a watchdog dump is in progress, + * so we can avoid midway through a user dump. + * If user request comes while a watchdog dumping is in progress, + * the user request takes the ownership of the watchdog dumping sample by + * changing the dump_state so the interrupt for the watchdog + * request can be processed instead of ignored. */ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) && + (backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) { + /* HWC is disabled or another user dump is ongoing, + * or we're on fault. + */ backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); /* HWC is disabled or another dump is ongoing, or we are on @@ -1051,6 +1171,10 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, /* Reset the completion so dump_wait() has something to wait on. */ reinit_completion(&backend_csf->dump_completed); + if (backend_csf->dump_state == + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) + watchdog_dumping = true; + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && !backend_csf->info->fw_in_protected_mode) { /* Only do the request if we are fully enabled and not in @@ -1078,15 +1202,29 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->user_requested = true; - if (do_request) - backend_csf->info->csf_if->dump_request( - backend_csf->info->csf_if->ctx); - else + if (do_request) { + /* If a watchdog dumping is in progress, don't need to do + * another request, just update the dump_state and take the + * ownership of the sample which watchdog requested. + */ + if (!watchdog_dumping) + backend_csf->info->csf_if->dump_request( + backend_csf->info->csf_if->ctx); + } else kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* Modify watchdog timer to delay the regular check time since + * just requested. + */ + backend_csf->info->watchdog_if->modify( + backend_csf->info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + return 0; } @@ -1105,11 +1243,18 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) wait_for_completion(&backend_csf->dump_completed); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); - /* Make sure the last dump actually succeeded. */ - errcode = (backend_csf->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ? - 0 : - -EIO; + /* Make sure the last dump actually succeeded when user requested is + * set. + */ + if (backend_csf->user_requested && + ((backend_csf->dump_state == + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) || + (backend_csf->dump_state == + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED))) + errcode = 0; + else + errcode = -EIO; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); @@ -1155,13 +1300,16 @@ static int kbasep_hwcnt_backend_csf_dump_get( (dst_enable_map->metadata != dst->metadata)) return -EINVAL; + /* Extract elapsed cycle count for each clock domain if enabled. */ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled( dst_enable_map->clk_enable_map, clk)) continue; - /* Extract elapsed cycle count for each clock domain. */ - dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk]; + /* Reset the counter to zero if accumulation is off. */ + if (!accumulate) + dst->clk_cnt_buf[clk] = 0; + dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk]; } /* We just return the user buffer without checking the current state, @@ -1279,6 +1427,8 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); + backend_csf->user_requested = false; + backend_csf->watchdog_last_seen_insert_idx = 0; *out_backend = backend_csf; return 0; @@ -1401,38 +1551,41 @@ static void kbasep_hwcnt_backend_csf_info_destroy( * used to create backend interface. * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. * MUST be power of 2. + * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used to create + * backend interface. * @out_info: Non-NULL pointer to where info is stored on success. * @return 0 on success, else error code. */ static int kbasep_hwcnt_backend_csf_info_create( struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, const struct kbase_hwcnt_backend_csf_info **out_info) { struct kbase_hwcnt_backend_csf_info *info = NULL; - WARN_ON(!csf_if); - WARN_ON(!out_info); - WARN_ON(!is_power_of_2(ring_buf_cnt)); + if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) || + WARN_ON(!is_power_of_2(ring_buf_cnt))) + return -EINVAL; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; + *info = (struct kbase_hwcnt_backend_csf_info) + { #if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) - info->counter_set = KBASE_HWCNT_SET_SECONDARY; + .counter_set = KBASE_HWCNT_SET_SECONDARY, #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) - info->counter_set = KBASE_HWCNT_SET_TERTIARY; + .counter_set = KBASE_HWCNT_SET_TERTIARY, #else - /* Default to primary */ - info->counter_set = KBASE_HWCNT_SET_PRIMARY; + /* Default to primary */ + .counter_set = KBASE_HWCNT_SET_PRIMARY, #endif - - info->backend = NULL; - info->csf_if = csf_if; - info->ring_buf_cnt = ring_buf_cnt; - info->fw_in_protected_mode = false; - info->unrecoverable_error_happened = false; - + .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt, + .fw_in_protected_mode = false, + .unrecoverable_error_happened = false, + .watchdog_if = watchdog_if, + }; *out_info = info; return 0; @@ -1653,6 +1806,14 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample( return; backend_csf = csf_info->backend; + /* Skip the dump_work if it's a watchdog request. */ + if (backend_csf->dump_state == + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) { + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + return; + } + /* If the current state is not REQUESTED, this HWC sample will be * skipped and processed in next dump_request. */ @@ -1831,14 +1992,15 @@ void kbase_hwcnt_backend_csf_metadata_term( } } -int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, - u32 ring_buf_cnt, - struct kbase_hwcnt_backend_interface *iface) +int kbase_hwcnt_backend_csf_create( + struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + struct kbase_hwcnt_backend_interface *iface) { int errcode; const struct kbase_hwcnt_backend_csf_info *info = NULL; - if (!iface || !csf_if) + if (!iface || !csf_if || !watchdog_if) return -EINVAL; /* The buffer count must be power of 2 */ @@ -1846,7 +2008,7 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, return -EINVAL; errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, - &info); + watchdog_if, &info); if (errcode) return errcode; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/mali_kbase_hwcnt_backend_csf.h index bfdf140..e0cafbe 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf.h +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.h @@ -29,6 +29,7 @@ #include "mali_kbase_hwcnt_backend.h" #include "mali_kbase_hwcnt_backend_csf_if.h" +#include "mali_kbase_hwcnt_watchdog_if.h" /** * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend @@ -37,6 +38,8 @@ * used to create backend interface. * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring * buffer, MUST be power of 2. + * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used + * to create backend interface. * @iface: Non-NULL pointer to backend interface structure that is filled * in on creation success. * @@ -44,9 +47,10 @@ * * Return: 0 on success, else error code. */ -int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, - u32 ring_buf_cnt, - struct kbase_hwcnt_backend_interface *iface); +int kbase_hwcnt_backend_csf_create( + struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c index 124224d..40cf6bb 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -38,6 +38,9 @@ #include <linux/log2.h> #include "mali_kbase_ccswe.h" +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* CONFIG_MALI_NO_MALI */ /** The number of nanoseconds in a second. */ #define NSECS_IN_SEC 1000000000ull /* ns */ @@ -217,6 +220,26 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) { +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + size_t dummy_model_blk_count; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + prfcnt_info->core_mask = + (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; + /* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */ + dummy_model_blk_count = + 2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask); + prfcnt_info->dump_bytes = + dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE; + prfcnt_info->prfcnt_block_size = + KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_HW_BYTES; + prfcnt_info->clk_cnt = 1; + prfcnt_info->clearing_samples = true; + fw_ctx->buf_bytes = prfcnt_info->dump_bytes; +#else struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; u32 prfcnt_size; @@ -261,6 +284,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( /* Total size must be multiple of block size. */ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0); +#endif } static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( @@ -355,6 +379,11 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev, + phys, num_pages); +#endif /* CONFIG_MALI_NO_MALI */ return 0; diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c index 56bb1b6..d041391 100644 --- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c +++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c @@ -28,6 +28,9 @@ #include "mali_kbase_hwaccess_time.h" #include "mali_kbase_ccswe.h" +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif /* CONFIG_MALI_NO_MALI */ #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include "backend/gpu/mali_kbase_pm_internal.h" @@ -140,6 +143,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, if (!kbdev || !info) return -EINVAL; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; + info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; +#else /* CONFIG_MALI_NO_MALI */ { const struct base_gpu_props *props = &kbdev->gpu_props.props; const size_t l2_count = props->l2_props.num_l2_slices; @@ -151,6 +159,7 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; } +#endif /* CONFIG_MALI_NO_MALI */ /* Determine the number of available clock domains. */ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { @@ -569,6 +578,11 @@ static int kbasep_hwcnt_backend_jm_dump_get( struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; size_t clk; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + struct kbase_device *kbdev; + unsigned long flags; + int errcode; +#endif /* CONFIG_MALI_NO_MALI */ if (!backend_jm || !dst || !dst_enable_map || (backend_jm->info->metadata != dst->metadata) || @@ -582,15 +596,32 @@ static int kbasep_hwcnt_backend_jm_dump_get( /* Dump sample to the internal 64-bit user buffer. */ kbasep_hwcnt_backend_jm_dump_sample(backend_jm); + /* Extract elapsed cycle count for each clock domain if enabled. */ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled( dst_enable_map->clk_enable_map, clk)) continue; - /* Extract elapsed cycle count for each clock domain. */ - dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; + /* Reset the counter to zero if accumulation is off. */ + if (!accumulate) + dst->clk_cnt_buf[clk] = 0; + dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk]; } +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, + &backend_jm->curr_config); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (errcode) + return errcode; +#endif /* CONFIG_MALI_NO_MALI */ return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, backend_jm->pm_core_mask, &backend_jm->curr_config, accumulate); @@ -700,6 +731,9 @@ static int kbasep_hwcnt_backend_jm_create( int errcode; struct kbase_device *kbdev; struct kbase_hwcnt_backend_jm *backend = NULL; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + size_t page_count; +#endif WARN_ON(!info); WARN_ON(!out_backend); @@ -739,6 +773,13 @@ static int kbasep_hwcnt_backend_jm_create( kbase_ccswe_init(&backend->ccswe_shader_cores); backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* The dummy model needs the CPU mapping. */ + page_count = PFN_UP(info->dump_bytes); + gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va, kbdev, + backend->vmap->cpu_pages, + page_count); +#endif /* CONFIG_MALI_NO_MALI */ *out_backend = backend; return 0; diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c deleted file mode 100644 index 5ca4c51..0000000 --- a/mali_kbase/mali_kbase_hwcnt_legacy.c +++ /dev/null @@ -1,179 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include "mali_kbase_hwcnt_legacy.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_gpu_narrow.h" -#include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> - -#include <linux/slab.h> -#include <linux/uaccess.h> - -/** - * struct kbase_hwcnt_legacy_client - Legacy hardware counter client. - * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned. - * @enable_map: Counter enable map. - * @dump_buf: Dump buffer used to manipulate dumps from virtualizer. - * @hvcli: Hardware counter virtualizer client. - * @dump_buf_user: Narrow dump buffer used to manipulate dumps before they are - * copied to user. - * @metadata_user: For compatibility with the user driver interface, this - * contains a narrowed version of the hardware counter metadata - * which is limited to 64 entries per block and 32-bit for each - * entry. - */ -struct kbase_hwcnt_legacy_client { - void __user *user_dump_buf; - struct kbase_hwcnt_enable_map enable_map; - struct kbase_hwcnt_dump_buffer dump_buf; - struct kbase_hwcnt_virtualizer_client *hvcli; - struct kbase_hwcnt_dump_buffer_narrow dump_buf_user; - const struct kbase_hwcnt_metadata_narrow *metadata_user; -}; - -int kbase_hwcnt_legacy_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_ioctl_hwcnt_enable *enable, - struct kbase_hwcnt_legacy_client **out_hlcli) -{ - int errcode; - struct kbase_hwcnt_legacy_client *hlcli; - const struct kbase_hwcnt_metadata *metadata; - struct kbase_hwcnt_physical_enable_map phys_em; - - if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli) - return -EINVAL; - - metadata = kbase_hwcnt_virtualizer_metadata(hvirt); - - hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL); - if (!hlcli) - return -ENOMEM; - - errcode = kbase_hwcnt_gpu_metadata_narrow_create(&hlcli->metadata_user, - metadata); - if (errcode) - goto error; - - errcode = kbase_hwcnt_dump_buffer_narrow_alloc(hlcli->metadata_user, - &hlcli->dump_buf_user); - if (errcode) - goto error; - - hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer; - - errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map); - if (errcode) - goto error; - - /* Translate from the ioctl enable map to the internal one */ - phys_em.fe_bm = enable->fe_bm; - phys_em.shader_bm = enable->shader_bm; - phys_em.tiler_bm = enable->tiler_bm; - phys_em.mmu_l2_bm = enable->mmu_l2_bm; - kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em); - - errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf); - if (errcode) - goto error; - - errcode = kbase_hwcnt_virtualizer_client_create( - hvirt, &hlcli->enable_map, &hlcli->hvcli); - if (errcode) - goto error; - - *out_hlcli = hlcli; - return 0; - -error: - kbase_hwcnt_legacy_client_destroy(hlcli); - return errcode; -} - -void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli) -{ - if (!hlcli) - return; - - kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli); - kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf); - kbase_hwcnt_enable_map_free(&hlcli->enable_map); - kbase_hwcnt_dump_buffer_narrow_free(&hlcli->dump_buf_user); - kbase_hwcnt_gpu_metadata_narrow_destroy(hlcli->metadata_user); - kfree(hlcli); -} - -int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli) -{ - int errcode; - u64 ts_start_ns; - u64 ts_end_ns; - - if (!hlcli) - return -EINVAL; - - /* Dump into the kernel buffer */ - errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, - &ts_start_ns, &ts_end_ns, &hlcli->dump_buf); - if (errcode) - return errcode; - - /* Patch the dump buf headers, to hide the counters that other hwcnt - * clients are using. - */ - kbase_hwcnt_gpu_patch_dump_headers( - &hlcli->dump_buf, &hlcli->enable_map); - - /* Copy the dump buffer to the userspace visible buffer. The strict - * variant will explicitly zero any non-enabled counters to ensure - * nothing except exactly what the user asked for is made visible. - * - * A narrow copy is required since virtualizer has a bigger buffer - * but user only needs part of it. - */ - kbase_hwcnt_dump_buffer_copy_strict_narrow( - &hlcli->dump_buf_user, &hlcli->dump_buf, &hlcli->enable_map); - - /* Copy into the user's buffer */ - errcode = copy_to_user(hlcli->user_dump_buf, - hlcli->dump_buf_user.dump_buf, - hlcli->dump_buf_user.md_narrow->dump_buf_bytes); - /* Non-zero errcode implies user buf was invalid or too small */ - if (errcode) - return -EFAULT; - - return 0; -} - -int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli) -{ - u64 ts_start_ns; - u64 ts_end_ns; - - if (!hlcli) - return -EINVAL; - - /* Dump with a NULL buffer to clear this client's counters */ - return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, - &ts_start_ns, &ts_end_ns, NULL); -} diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.h b/mali_kbase/mali_kbase_hwcnt_legacy.h deleted file mode 100644 index 163ae8d..0000000 --- a/mali_kbase/mali_kbase_hwcnt_legacy.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * Legacy hardware counter interface, giving userspace clients simple, - * synchronous access to hardware counters. - * - * Any functions operating on an single legacy hardware counter client instance - * must be externally synchronised. - * Different clients may safely be used concurrently. - */ - -#ifndef _KBASE_HWCNT_LEGACY_H_ -#define _KBASE_HWCNT_LEGACY_H_ - -struct kbase_hwcnt_legacy_client; -struct kbase_ioctl_hwcnt_enable; -struct kbase_hwcnt_virtualizer; - -/** - * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client. - * @hvirt: Non-NULL pointer to hardware counter virtualizer the client - * should be attached to. - * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid - * pointer to a user dump buffer large enough to hold a dump, and - * the counters that should be enabled. - * @out_hlcli: Non-NULL pointer to where the pointer to the created client will - * be stored on success. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_legacy_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_ioctl_hwcnt_enable *enable, - struct kbase_hwcnt_legacy_client **out_hlcli); - -/** - * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter - * client. - * @hlcli: Pointer to the legacy hardware counter client. - * - * Will safely destroy a client in any partial state of construction. - */ -void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli); - -/** - * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the - * client's user buffer. - * @hlcli: Non-NULL pointer to the legacy hardware counter client. - * - * This function will synchronously dump hardware counters into the user buffer - * specified on client creation, with the counters specified on client creation. - * - * The counters are automatically cleared after each dump, such that the next - * dump performed will return the counter values accumulated between the time of - * this function call and the next dump. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli); - -/** - * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter - * dump. - * @hlcli: Non-NULL pointer to the legacy hardware counter client. - * - * This function will synchronously clear the hardware counters, such that the - * next dump performed will return the counter values accumulated between the - * time of this function call and the next dump. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli); - -#endif /* _KBASE_HWCNT_LEGACY_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if.h b/mali_kbase/mali_kbase_hwcnt_watchdog_if.h new file mode 100644 index 0000000..1873318 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Virtual interface for hardware counter watchdog. + */ + +#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_ +#define _KBASE_HWCNT_WATCHDOG_IF_H_ + +#include <linux/types.h> + +/* + * Opaque structure of information used to create a watchdog timer interface. + */ +struct kbase_hwcnt_watchdog_info; + +/** + * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done + * + * @user_data: Pointer to the callback user data. + */ +typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data); + +/** + * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer + * + * @timer: Non-NULL pointer to a watchdog timer interface context + * @period_ms: Period in milliseconds of the watchdog timer + * @callback: Non-NULL pointer to a watchdog callback function + * @user_data: Pointer to the user data, used when watchdog timer callback is called + * + * Return: 0 if the watchdog timer enabled successfully, error code otherwise. + */ +typedef int kbase_hwcnt_watchdog_enable_fn( + const struct kbase_hwcnt_watchdog_info *timer, u32 period_ms, + kbase_hwcnt_watchdog_callback_fn *callback, void *user_data); + +/** + * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer + * + * @timer: Non-NULL pointer to a watchdog timer interface context + */ +typedef void +kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); + +/** + * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout + * + * @timer: Non-NULL pointer to a watchdog timer interface context + * @delay_ms: Watchdog timer expiration in milliseconds + */ +typedef void +kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer, + u32 delay_ms); + +/** + * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface. + * + * @timer: Immutable watchdog timer info + * @enable: Function ptr to enable watchdog + * @disable: Function ptr to disable watchdog + * @modify: Function ptr to modify watchdog + */ +struct kbase_hwcnt_watchdog_interface { + const struct kbase_hwcnt_watchdog_info *timer; + kbase_hwcnt_watchdog_enable_fn *enable; + kbase_hwcnt_watchdog_disable_fn *disable; + kbase_hwcnt_watchdog_modify_fn *modify; +}; + +#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */ diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c new file mode 100644 index 0000000..4a03080 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "mali_kbase_hwcnt_watchdog_if.h" +#include "mali_kbase_hwcnt_watchdog_if_timer.h" + +#include <linux/timer.h> +#include <linux/slab.h> + +/** + * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog + * interface. + * + * @watchdog_timer: Watchdog timer + * @timer_enabled: True if watchdog timer enabled, otherwise false + * @callback: Watchdog callback function + * @user_data: Pointer to user data passed as argument to the callback + * function + */ +struct kbase_hwcnt_watchdog_if_timer_info { + struct timer_list watchdog_timer; + bool timer_enabled; + kbase_hwcnt_watchdog_callback_fn *callback; + void *user_data; +}; + +/** + * kbasep_hwcnt_watchdog_callback() - Watchdog timer callback + * + * @timer: Timer structure + * + * Function to be called when watchdog timer expires. Will call the callback + * function provided at enable(). + */ +static void kbasep_hwcnt_watchdog_callback(struct timer_list *const timer) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const info = + container_of(timer, struct kbase_hwcnt_watchdog_if_timer_info, + watchdog_timer); + if (info->callback) + info->callback(info->user_data); +} + +static int kbasep_hwcnt_watchdog_if_timer_enable( + const struct kbase_hwcnt_watchdog_info *const timer, + u32 const period_ms, kbase_hwcnt_watchdog_callback_fn *const callback, + void *const user_data) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = + (void *)timer; + + if (WARN_ON(!timer) || WARN_ON(!callback)) + return -EINVAL; + + timer_info->callback = callback; + timer_info->user_data = user_data; + + mod_timer(&timer_info->watchdog_timer, + jiffies + msecs_to_jiffies(period_ms)); + timer_info->timer_enabled = true; + + return 0; +} + +static void kbasep_hwcnt_watchdog_if_timer_disable( + const struct kbase_hwcnt_watchdog_info *const timer) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = + (void *)timer; + + if (WARN_ON(!timer)) + return; + + if (!timer_info->timer_enabled) + return; + + del_timer_sync(&timer_info->watchdog_timer); + timer_info->timer_enabled = false; +} + +static void kbasep_hwcnt_watchdog_if_timer_modify( + const struct kbase_hwcnt_watchdog_info *const timer, u32 const delay_ms) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = + (void *)timer; + + if (WARN_ON(!timer)) + return; + + mod_timer(&timer_info->watchdog_timer, + jiffies + msecs_to_jiffies(delay_ms)); +} + +void kbase_hwcnt_watchdog_if_timer_destroy( + struct kbase_hwcnt_watchdog_interface *const watchdog_if) +{ + struct kbase_hwcnt_watchdog_if_timer_info *timer_info; + + if (WARN_ON(!watchdog_if)) + return; + + timer_info = (void *)watchdog_if->timer; + + if (WARN_ON(!timer_info)) + return; + + del_timer_sync(&timer_info->watchdog_timer); + kfree(timer_info); + + memset(watchdog_if, 0, sizeof(*watchdog_if)); +} + +int kbase_hwcnt_watchdog_if_timer_create( + struct kbase_hwcnt_watchdog_interface *const watchdog_if) +{ + struct kbase_hwcnt_watchdog_if_timer_info *timer_info; + + if (WARN_ON(!watchdog_if)) + return -EINVAL; + + timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL); + if (!timer_info) + return -ENOMEM; + + *timer_info = + (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = + false }; + + kbase_timer_setup(&timer_info->watchdog_timer, + kbasep_hwcnt_watchdog_callback); + + *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ + .timer = (void *)timer_info, + .enable = kbasep_hwcnt_watchdog_if_timer_enable, + .disable = kbasep_hwcnt_watchdog_if_timer_disable, + .modify = kbasep_hwcnt_watchdog_if_timer_modify, + }; + + return 0; +} diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h new file mode 100644 index 0000000..3bd69c3 --- /dev/null +++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend + */ + +#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ +#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ + +struct kbase_hwcnt_watchdog_interface; + +/** + * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend. + * + * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success + * + * Return: 0 on success, error otherwise. + */ +int kbase_hwcnt_watchdog_if_timer_create( + struct kbase_hwcnt_watchdog_interface *watchdog_if); + +/** + * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter + * backend. + * + * @watchdog_if: Pointer to watchdog interface to destroy + */ +void kbase_hwcnt_watchdog_if_timer_destroy( + struct kbase_hwcnt_watchdog_interface *watchdog_if); + +#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */ diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c index c892455..08824bd 100644 --- a/mali_kbase/mali_kbase_jd.c +++ b/mali_kbase/mali_kbase_jd.c @@ -619,8 +619,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) size_to_read = sizeof(u64[COUNT]); - ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read, - &mapping); + ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read, + KBASE_REG_CPU_RD, &mapping); if (!ptr) { dev_warn(kctx->kbdev->dev, diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c index ce996ca..27ff3bb 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.c +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c @@ -19,10 +19,10 @@ * */ +#include "mali_kbase.h" #include "mali_kbase_kinstr_prfcnt.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" -#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h> #include "mali_kbase_hwcnt_gpu.h" #include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h> #include "mali_malisw.h" @@ -44,14 +44,12 @@ */ #define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) -/* The minimum allowed interval between dumps, in microseconds - * (equivalent to 10KHz) - */ -#define DUMP_INTERVAL_MIN_US (DUMP_INTERVAL_MIN_NS / 1000) - /* The maximum allowed buffers per client */ #define MAX_BUFFER_COUNT 32 +/* The module printing prefix */ +#define KINSTR_PRFCNT_PREFIX "mali_kbase_kinstr_prfcnt: " + /** * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware * counters. @@ -80,11 +78,11 @@ struct kbase_kinstr_prfcnt_context { /** * struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data. - * @sample_meta: Pointer to samle metadata. + * @sample_meta: Pointer to sample metadata. * @dump_buf: Dump buffer containing sample data. */ struct kbase_kinstr_prfcnt_sample { - u64 *sample_meta; + struct prfcnt_metadata *sample_meta; struct kbase_hwcnt_dump_buffer dump_buf; }; @@ -92,7 +90,8 @@ struct kbase_kinstr_prfcnt_sample { * struct kbase_kinstr_prfcnt_sample_array - Array of sample data. * @page_addr: Address of allocated pages. A single allocation is used * for all Dump Buffers in the array. - * @page_order: The allocation order of the pages. + * @page_order: The allocation order of the pages, the order is on a + * logarithmic scale. * @sample_count: Number of allocated samples. * @samples: Non-NULL pointer to the array of Dump Buffers. */ @@ -107,59 +106,91 @@ struct kbase_kinstr_prfcnt_sample_array { * struct kbase_kinstr_prfcnt_client_config - Client session configuration. * @prfcnt_mode: Sampling mode: either manual or periodic. * @counter_set: Set of performance counter blocks. + * @scope: Scope of performance counters to capture. * @buffer_count: Number of buffers used to store samples. - * @period_us: Sampling period, in microseconds, or 0 if manual mode. + * @period_ns: Sampling period, in nanoseconds, or 0 if manual mode. * @phys_em: Enable map used by the GPU. */ struct kbase_kinstr_prfcnt_client_config { u8 prfcnt_mode; u8 counter_set; + u8 scope; u16 buffer_count; - u64 period_us; + u64 period_ns; struct kbase_hwcnt_physical_enable_map phys_em; }; /** + * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to + * carry out for a kinstr_prfcnt_client. + * @dump_work: Worker for performing asynchronous counter dumps. + * @user_data: User data for asynchronous dump in progress. + * @ts_end_ns: End timestamp of most recent async dump. + */ +struct kbase_kinstr_prfcnt_async { + struct work_struct dump_work; + u64 user_data; + u64 ts_end_ns; +}; + +/** * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached * to a kinstr_prfcnt context. - * @kinstr_ctx: kinstr_prfcnt context client is attached to. - * @hvcli: Hardware counter virtualizer client. - * @node: Node used to attach this client to list in kinstr_prfcnt - * context. - * @next_dump_time_ns: Time in ns when this client's next periodic dump must - * occur. If 0, not a periodic client. - * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic - * client. - * @config: Configuration of the client session. - * @enable_map: Counters enable map. - * @tmp_buf: Temporary buffer to use before handing over dump to - * client. - * @sample_arr: Array of dump buffers allocated by this client. - * @dump_bufs_meta: Metadata of dump buffers. - * @meta_idx: Index of metadata being accessed by userspace. - * @read_idx: Index of buffer read by userspace. - * @write_idx: Index of buffer being written by dump worker. - * @waitq: Client's notification queue. - * @sample_size: Size of the data required for one sample, in bytes. - * @sample_count: Number of samples the client is able to capture. + * @kinstr_ctx: kinstr_prfcnt context client is attached to. + * @hvcli: Hardware counter virtualizer client. + * @node: Node used to attach this client to list in + * kinstr_prfcnt context. + * @cmd_sync_lock: Lock coordinating the reader interface for commands + * that need interacting with the async sample dump + * worker thread. + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic + * client. + * @sample_flags: Flags for the current active dumping sample, marking + * the conditions/events during the dump duration. + * @active: True if the client has been started. + * @config: Configuration of the client session. + * @enable_map: Counters enable map. + * @tmp_buf: Temporary buffer to use before handing over dump to + * client. + * @sample_arr: Array of dump buffers allocated by this client. + * @read_idx: Index of buffer read by userspace. + * @write_idx: Index of buffer being written by dump worker. + * @waitq: Client's notification queue. + * @sample_size: Size of the data required for one sample, in bytes. + * @sample_count: Number of samples the client is able to capture. + * @sync_sample_count: Number of available spaces for synchronous samples. + * It can differ from sample_count if asynchronous + * sample requests are reserving space in the buffer. + * @user_data: User data associated with the session. + * This is set when the session is started and stopped. + * This value is ignored for control commands that + * provide another value. + * @async: Asynchronous sampling operations to carry out in this + * client's session. */ struct kbase_kinstr_prfcnt_client { struct kbase_kinstr_prfcnt_context *kinstr_ctx; struct kbase_hwcnt_virtualizer_client *hvcli; struct list_head node; + struct mutex cmd_sync_lock; u64 next_dump_time_ns; u32 dump_interval_ns; + u32 sample_flags; + bool active; struct kbase_kinstr_prfcnt_client_config config; struct kbase_hwcnt_enable_map enable_map; struct kbase_hwcnt_dump_buffer tmp_buf; struct kbase_kinstr_prfcnt_sample_array sample_arr; - struct kbase_hwcnt_reader_metadata *dump_bufs_meta; - atomic_t meta_idx; atomic_t read_idx; atomic_t write_idx; wait_queue_head_t waitq; size_t sample_size; size_t sample_count; + atomic_t sync_sample_count; + u64 user_data; + struct kbase_kinstr_prfcnt_async async; }; static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { @@ -188,21 +219,6 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { }; /** - * kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready() - Check if client has ready - * buffers. - * @cli: Non-NULL pointer to kinstr_prfcnt client. - * - * Return: Non-zero if client has at least one dumping buffer filled that was - * not notified to user yet. - */ -static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready( - struct kbase_kinstr_prfcnt_client *cli) -{ - WARN_ON(!cli); - return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); -} - -/** * kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll. * @filp: Non-NULL pointer to file structure. * @wait: Non-NULL pointer to poll table. @@ -210,8 +226,15 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready( * Return: POLLIN if data can be read without blocking, 0 if data can not be * read without blocking, else error code. */ -static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, - poll_table *wait) +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +static unsigned int +kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, + struct poll_table_struct *wait) +#else +static __poll_t +kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, + struct poll_table_struct *wait) +#endif { struct kbase_kinstr_prfcnt_client *cli; @@ -225,13 +248,776 @@ static unsigned int kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, poll_wait(filp, &cli->waitq, wait); - if (kbasep_kinstr_prfcnt_hwcnt_reader_buffer_ready(cli)) + if (atomic_read(&cli->write_idx) != atomic_read(&cli->read_idx)) return POLLIN; return 0; } /** + * kbasep_kinstr_prfcnt_next_dump_time_ns() - Calculate the next periodic + * dump time. + * @cur_ts_ns: Current time in nanoseconds. + * @interval: Interval between dumps in nanoseconds. + * + * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump + * time that occurs after cur_ts_ns. + */ +static u64 kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns, u32 interval) +{ + /* Non-periodic client */ + if (interval == 0) + return 0; + + /* + * Return the next interval after the current time relative to t=0. + * This means multiple clients with the same period will synchronize, + * regardless of when they were started, allowing the worker to be + * scheduled less frequently. + */ + do_div(cur_ts_ns, interval); + + return (cur_ts_ns + 1) * interval; +} + +/** + * kbasep_kinstr_prfcnt_timestamp_ns() - Get the current time in nanoseconds. + * + * Return: Current time in nanoseconds. + */ +static u64 kbasep_kinstr_prfcnt_timestamp_ns(void) +{ + return ktime_get_raw_ns(); +} + +/** + * kbasep_kinstr_prfcnt_reschedule_worker() - Update next dump times for all + * periodic kinstr_prfcnt clients, + * then reschedule the dump worker + * appropriately. + * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. + * + * If there are no periodic clients, then the dump worker will not be + * rescheduled. Else, the dump worker will be rescheduled for the next + * periodic client dump. + */ +static void kbasep_kinstr_prfcnt_reschedule_worker( + struct kbase_kinstr_prfcnt_context *kinstr_ctx) +{ + u64 cur_ts_ns; + u64 shortest_period_ns = U64_MAX; + struct kbase_kinstr_prfcnt_client *pos; + + WARN_ON(!kinstr_ctx); + lockdep_assert_held(&kinstr_ctx->lock); + cur_ts_ns = kbasep_kinstr_prfcnt_timestamp_ns(); + + /* + * This loop fulfills 2 separate tasks that don't affect each other: + * + * 1) Determine the shortest period. + * 2) Update the next dump time of clients that have already been + * dumped. It's important not to alter the next dump time of clients + * that haven't been dumped yet. + * + * For the sake of efficiency, the rescheduling decision ignores the time + * of the next dump and just uses the shortest period among all periodic + * clients. It is more efficient to serve multiple dump requests at once, + * rather than trying to reschedule the worker to serve each request + * individually. + */ + list_for_each_entry(pos, &kinstr_ctx->clients, node) { + /* Ignore clients that are not periodic or not active. */ + if (pos->active && pos->dump_interval_ns > 0) { + shortest_period_ns = + MIN(shortest_period_ns, pos->dump_interval_ns); + + /* Next dump should happen exactly one period after the last dump. + * If last dump was overdue and scheduled to happen more than one + * period ago, compensate for that by scheduling next dump in the + * immediate future. + */ + if (pos->next_dump_time_ns < cur_ts_ns) + pos->next_dump_time_ns = + MAX(cur_ts_ns + 1, + pos->next_dump_time_ns + + pos->dump_interval_ns); + } + } + + /* Cancel the timer if it is already pending */ + hrtimer_cancel(&kinstr_ctx->dump_timer); + + /* Start the timer if there are periodic clients and kinstr_prfcnt is not + * suspended. + */ + if ((shortest_period_ns != U64_MAX) && + (kinstr_ctx->suspend_count == 0)) { + u64 next_schedule_time_ns = + kbasep_kinstr_prfcnt_next_dump_time_ns( + cur_ts_ns, shortest_period_ns); + hrtimer_start(&kinstr_ctx->dump_timer, + ns_to_ktime(next_schedule_time_ns - cur_ts_ns), + HRTIMER_MODE_REL); + } +} + +static enum prfcnt_block_type +kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) +{ + enum prfcnt_block_type block_type; + + switch (type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + block_type = PRFCNT_BLOCK_TYPE_FE; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + block_type = PRFCNT_BLOCK_TYPE_TILER; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + block_type = PRFCNT_BLOCK_TYPE_MEMORY; + break; + + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + default: + block_type = PRFCNT_BLOCK_TYPE_RESERVED; + break; + } + + return block_type; +} + +/** + * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta + * item array. + * @dst: Non-NULL pointer to the sample's dump buffer object. + * @block_meta_base: Non-NULL double pointer to the start of the block meta + * data items. + * @base_addr: Address of allocated pages for array of samples. Used + * to calculate offset of block values. + * @counter_set: The SET which blocks represent. + */ +int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst, + struct prfcnt_metadata **block_meta_base, + u64 base_addr, u8 counter_set) +{ + size_t grp, blk, blk_inst; + struct prfcnt_metadata **ptr_md = block_meta_base; + const struct kbase_hwcnt_metadata *metadata; + + if (!dst || !*block_meta_base) + return -EINVAL; + + metadata = dst->metadata; + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u64 *dst_blk; + + /* Skip unused blocks */ + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK; + (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION; + (*ptr_md)->u.block_md.block_type = + kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( + kbase_hwcnt_metadata_block_type(metadata, grp, + blk)); + (*ptr_md)->u.block_md.block_idx = (u8)blk_inst; + (*ptr_md)->u.block_md.set = counter_set; + (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; + (*ptr_md)->u.block_md.values_offset = (u32)((u64)(uintptr_t)dst_blk - base_addr); + + /* update the buf meta data block pointer to next item */ + (*ptr_md)++; + } + + return 0; +} + +/** + * kbasep_kinstr_prfcnt_set_sample_metadata() - Set sample metadata for sample + * output. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @dump_buf: Non-NULL pointer to dump buffer where sample is stored. + * @ptr_md: Non-NULL pointer to sample metadata. + */ +static void kbasep_kinstr_prfcnt_set_sample_metadata( + struct kbase_kinstr_prfcnt_client *cli, + struct kbase_hwcnt_dump_buffer *dump_buf, + struct prfcnt_metadata *ptr_md) +{ + u8 clk_cnt, i; + + clk_cnt = cli->kinstr_ctx->metadata->clk_cnt; + + /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item */ + ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_SAMPLE; + ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; + ptr_md->u.sample_md.seq = atomic_read(&cli->write_idx); + ptr_md->u.sample_md.flags = cli->sample_flags; + + /* Place the PRFCNT_SAMPLE_META_TYPE_CLOCK optionally as the 2nd */ + ptr_md++; + if (clk_cnt > MAX_REPORTED_DOMAINS) + clk_cnt = MAX_REPORTED_DOMAINS; + + /* Handle the prfcnt_clock_metadata meta item */ + ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_CLOCK; + ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; + ptr_md->u.clock_md.num_domains = clk_cnt; + for (i = 0; i < clk_cnt; i++) + ptr_md->u.clock_md.cycles[i] = dump_buf->clk_cnt_buf[i]; + + /* Dealing with counter blocks */ + ptr_md++; + if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items( + dump_buf, &ptr_md, cli->sample_arr.page_addr, cli->config.counter_set))) + return; + + /* Handle the last sentinel item */ + ptr_md->hdr.item_type = FLEX_LIST_TYPE_NONE; + ptr_md->hdr.item_version = 0; +} + +/** + * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample + * for output. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @buf_idx: The index to the sample array for saving the sample. + */ +static void kbasep_kinstr_prfcnt_client_output_empty_sample( + struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx) +{ + struct kbase_hwcnt_dump_buffer *dump_buf; + struct prfcnt_metadata *ptr_md; + + if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) + return; + + dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; + ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + + kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map); + + /* Use end timestamp from most recent async dump */ + ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns; + ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns; + + kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); +} + +/** + * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @buf_idx: The index to the sample array for saving the sample. + * @user_data: User data to return to the user. + * @ts_start_ns: Time stamp for the start point of the sample dump. + * @ts_end_ns: Time stamp for the end point of the sample dump. + */ +static void kbasep_kinstr_prfcnt_client_output_sample( + struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx, + u64 user_data, u64 ts_start_ns, u64 ts_end_ns) +{ + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf; + struct prfcnt_metadata *ptr_md; + + if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) + return; + + dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; + ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &cli->enable_map); + + /* Copy the temp buffer to the userspace visible buffer. The strict + * variant will explicitly zero any non-enabled counters to ensure + * nothing except exactly what the user asked for is made visible. + */ + kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, + &cli->enable_map); + + /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item. + * Set timestamp and user data for real dump. + */ + ptr_md->u.sample_md.timestamp_start = ts_start_ns; + ptr_md->u.sample_md.timestamp_end = ts_end_ns; + ptr_md->u.sample_md.user_data = user_data; + + kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); +} + +/** + * kbasep_kinstr_prfcnt_client_dump() - Perform a dump for a client. + * @cli: Non-NULL pointer to a kinstr_prfcnt client. + * @event_id: Event type that triggered the dump. + * @user_data: User data to return to the user. + * @async_dump: Whether this is an asynchronous dump or not. + * @empty_sample: Sample block data will be 0 if this is true. + * + * Return: 0 on success, else error code. + */ +static int +kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, + enum base_hwcnt_reader_event event_id, + u64 user_data, bool async_dump, + bool empty_sample) +{ + int ret; + u64 ts_start_ns = 0; + u64 ts_end_ns = 0; + unsigned int write_idx; + unsigned int read_idx; + size_t available_samples_count; + + WARN_ON(!cli); + lockdep_assert_held(&cli->kinstr_ctx->lock); + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + /* Check if there is a place to copy HWC block into. Calculate the + * number of available samples count, by taking into account the type + * of dump. + * Asynchronous dumps have the ability to reserve space in the samples + * array for future dumps, unlike synchronous dumps. Because of that, + * the samples count for synchronous dumps is managed by a variable + * called sync_sample_count, that originally is defined as equal to the + * size of the whole array but later decreases every time an + * asynchronous dump request is pending and then re-increased every + * time an asynchronous dump request is completed. + */ + available_samples_count = async_dump ? + cli->sample_arr.sample_count : + atomic_read(&cli->sync_sample_count); + if (write_idx - read_idx == available_samples_count) { + /* For periodic sampling, the current active dump + * will be accumulated in the next sample, when + * a buffer becomes available. + */ + if (event_id == BASE_HWCNT_READER_EVENT_PERIODIC) + cli->sample_flags |= SAMPLE_FLAG_OVERFLOW; + return -EBUSY; + } + + /* For the rest of the function, use the actual sample_count + * that represents the real size of the array. + */ + write_idx %= cli->sample_arr.sample_count; + + if (!empty_sample) { + ret = kbase_hwcnt_virtualizer_client_dump( + cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf); + /* HWC dump error, set the sample with error flag */ + if (ret) + cli->sample_flags |= SAMPLE_FLAG_ERROR; + + /* Make the sample ready and copy it to the userspace mapped buffer */ + kbasep_kinstr_prfcnt_client_output_sample( + cli, write_idx, user_data, ts_start_ns, ts_end_ns); + } else { + if (!async_dump) { + struct prfcnt_metadata *ptr_md; + /* User data will not be updated for empty samples. */ + ptr_md = cli->sample_arr.samples[write_idx].sample_meta; + ptr_md->u.sample_md.user_data = user_data; + } + + /* Make the sample ready and copy it to the userspace mapped buffer */ + kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx); + } + + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&cli->write_idx); + if (async_dump) { + /* Remember the end timestamp of async dump for empty samples */ + if (!empty_sample) + cli->async.ts_end_ns = ts_end_ns; + + atomic_inc(&cli->sync_sample_count); + } + wake_up_interruptible(&cli->waitq); + /* Reset the flags for the next sample dump */ + cli->sample_flags = 0; + + return 0; +} + +static int +kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + int ret; + u64 tm_start, tm_end; + + WARN_ON(!cli); + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is already started, the command is a no-op */ + if (cli->active) + return 0; + + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); + + mutex_lock(&cli->kinstr_ctx->lock); + /* Enable HWC from the configuration of the client creation */ + ret = kbase_hwcnt_virtualizer_client_set_counters( + cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); + + if (!ret) { + atomic_set(&cli->sync_sample_count, cli->sample_count); + cli->active = true; + cli->user_data = user_data; + cli->sample_flags = 0; + + if (cli->dump_interval_ns) + kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); + } + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int kbasep_kinstr_prfcnt_client_wait_async_done( + struct kbase_kinstr_prfcnt_client *cli) +{ + lockdep_assert_held(&cli->cmd_sync_lock); + + return wait_event_interruptible(cli->waitq, + atomic_read(&cli->sync_sample_count) == + cli->sample_count); +} + +static int +kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + int ret; + u64 tm_start = 0; + u64 tm_end = 0; + struct kbase_hwcnt_physical_enable_map phys_em; + struct kbase_hwcnt_dump_buffer *tmp_buf = NULL; + unsigned int write_idx; + unsigned int read_idx; + + WARN_ON(!cli); + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is not started, the command is invalid */ + if (!cli->active) + return -EINVAL; + + /* Wait until pending async sample operation done */ + ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); + + if (ret < 0) + return -ERESTARTSYS; + + phys_em.fe_bm = 0; + phys_em.tiler_bm = 0; + phys_em.mmu_l2_bm = 0; + phys_em.shader_bm = 0; + + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); + + mutex_lock(&cli->kinstr_ctx->lock); + + /* Check whether one has the buffer to hold the last sample */ + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + /* Check if there is a place to save the last stop produced sample */ + if (write_idx - read_idx < cli->sample_arr.sample_count) + tmp_buf = &cli->tmp_buf; + + ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, + &cli->enable_map, + &tm_start, &tm_end, + &cli->tmp_buf); + /* If the last stop sample is in error, set the sample flag */ + if (ret) + cli->sample_flags |= SAMPLE_FLAG_ERROR; + + if (tmp_buf) { + write_idx %= cli->sample_arr.sample_count; + /* Handle the last stop sample */ + kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, + &cli->config.phys_em); + /* As this is a stop sample, mark it as MANUAL */ + kbasep_kinstr_prfcnt_client_output_sample( + cli, write_idx, user_data, tm_start, tm_end); + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&cli->write_idx); + wake_up_interruptible(&cli->waitq); + } + + cli->active = false; + cli->user_data = user_data; + + if (cli->dump_interval_ns) + kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + int ret; + bool empty_sample = false; + + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is not started, or not manual, the command invalid */ + if (!cli->active || cli->dump_interval_ns) + return -EINVAL; + + /* Wait until pending async sample operation done, this is required to + * satisfy the stated sample sequence following their issuing order, + * reflected by the sample start timestamp. + */ + if (atomic_read(&cli->sync_sample_count) != cli->sample_count) { + /* Return empty sample instead of performing real dump. + * As there is an async dump currently in-flight which will + * have the desired information. + */ + empty_sample = true; + ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli); + + if (ret < 0) + return -ERESTARTSYS; + } + + mutex_lock(&cli->kinstr_ctx->lock); + + ret = kbasep_kinstr_prfcnt_client_dump(cli, + BASE_HWCNT_READER_EVENT_MANUAL, + user_data, false, empty_sample); + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli, + u64 user_data) +{ + unsigned int write_idx; + unsigned int read_idx; + unsigned int active_async_dumps; + unsigned int new_async_buf_idx; + int ret; + + lockdep_assert_held(&cli->cmd_sync_lock); + + /* If the client is not started, or not manual, the command invalid */ + if (!cli->active || cli->dump_interval_ns) + return -EINVAL; + + mutex_lock(&cli->kinstr_ctx->lock); + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + active_async_dumps = + cli->sample_count - atomic_read(&cli->sync_sample_count); + new_async_buf_idx = write_idx + active_async_dumps; + + /* Check if there is a place to copy HWC block into. + * If successful, reserve space in the buffer for the asynchronous + * operation to make sure that it can actually take place. + * Because we reserve space for asynchronous dumps we need to take that + * in consideration here. + */ + ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ? + -EBUSY : + 0; + + if (ret == -EBUSY) { + mutex_unlock(&cli->kinstr_ctx->lock); + return ret; + } + + if (active_async_dumps > 0) { + struct prfcnt_metadata *ptr_md; + unsigned int buf_idx = + new_async_buf_idx % cli->sample_arr.sample_count; + /* Instead of storing user_data, write it directly to future + * empty sample. + */ + ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + ptr_md->u.sample_md.user_data = user_data; + + atomic_dec(&cli->sync_sample_count); + } else { + cli->async.user_data = user_data; + atomic_dec(&cli->sync_sample_count); + + kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt, + &cli->async.dump_work); + } + + mutex_unlock(&cli->kinstr_ctx->lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli) +{ + WARN_ON(!cli); + lockdep_assert_held(&cli->cmd_sync_lock); + + mutex_lock(&cli->kinstr_ctx->lock); + + /* Discard (Clear) all internally buffered samples */ + atomic_set(&cli->read_idx, atomic_read(&cli->write_idx)); + + mutex_unlock(&cli->kinstr_ctx->lock); + + return 0; +} + +/** + * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session. + * @cli: Non-NULL pointer to kinstr_prfcnt client. + * @control_cmd: Control command to execute. + * + * Return: 0 on success, else error code. + */ +static int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_control_cmd *control_cmd) +{ + int ret = 0; + + mutex_lock(&cli->cmd_sync_lock); + + switch (control_cmd->cmd) { + case PRFCNT_CONTROL_CMD_START: + ret = kbasep_kinstr_prfcnt_client_start(cli, + control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_STOP: + ret = kbasep_kinstr_prfcnt_client_stop(cli, + control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_SAMPLE_SYNC: + ret = kbasep_kinstr_prfcnt_client_sync_dump( + cli, control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: + ret = kbasep_kinstr_prfcnt_client_async_dump( + cli, control_cmd->user_data); + break; + case PRFCNT_CONTROL_CMD_DISCARD: + ret = kbasep_kinstr_prfcnt_client_discard(cli); + break; + default: + ret = -EINVAL; + break; + } + + mutex_unlock(&cli->cmd_sync_lock); + + return ret; +} + +static int +kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_sample_access *sample_access) +{ + unsigned int write_idx; + unsigned int read_idx; + u64 sample_offset_bytes; + struct prfcnt_metadata *sample_meta; + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + if (write_idx == read_idx) + return -EINVAL; + + read_idx %= cli->sample_arr.sample_count; + sample_offset_bytes = + (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta - + (u64)(uintptr_t)cli->sample_arr.page_addr; + sample_meta = + (struct prfcnt_metadata *)cli->sample_arr.samples[read_idx] + .sample_meta; + + /* Verify that a valid sample has been dumped in the read_idx. + * There are situations where this may not be the case, + * for instance if the client is trying to get an asynchronous + * sample which has not been dumped yet. + */ + if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE) + return -EINVAL; + if (sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) + return -EINVAL; + + sample_access->sequence = sample_meta->u.sample_md.seq; + sample_access->sample_offset_bytes = sample_offset_bytes; + + /* read_idx is not incremented here, because the interface allows + * only one sample to be "in flight" between kernel space and user space. + */ + + return 0; +} + +static int +kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, + struct prfcnt_sample_access *sample_access) +{ + unsigned int write_idx; + unsigned int read_idx; + u64 sample_offset_bytes; + + write_idx = atomic_read(&cli->write_idx); + read_idx = atomic_read(&cli->read_idx); + + if (write_idx == read_idx) + return -EINVAL; + + if (sample_access->sequence != read_idx) + return -EINVAL; + + read_idx %= cli->sample_arr.sample_count; + sample_offset_bytes = + (u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta - + (u64)(uintptr_t)cli->sample_arr.page_addr; + + if (sample_access->sample_offset_bytes != sample_offset_bytes) + return -EINVAL; + + atomic_inc(&cli->read_idx); + + return 0; +} + +/** * kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl. * @filp: Non-NULL pointer to file structure. * @cmd: User command. @@ -243,10 +1029,11 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - long rcode; + long rcode = 0; struct kbase_kinstr_prfcnt_client *cli; + void __user *uarg = (void __user *)arg; - if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) + if (!filp) return -EINVAL; cli = filp->private_data; @@ -255,8 +1042,36 @@ static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, return -EINVAL; switch (_IOC_NR(cmd)) { + case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_CMD): { + struct prfcnt_control_cmd control_cmd; + int err; + + err = copy_from_user(&control_cmd, uarg, sizeof(control_cmd)); + if (err) + return -EFAULT; + rcode = kbasep_kinstr_prfcnt_cmd(cli, &control_cmd); + } break; + case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE): { + struct prfcnt_sample_access sample_access; + int err; + + memset(&sample_access, 0, sizeof(sample_access)); + rcode = kbasep_kinstr_prfcnt_get_sample(cli, &sample_access); + err = copy_to_user(uarg, &sample_access, sizeof(sample_access)); + if (err) + return -EFAULT; + } break; + case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE): { + struct prfcnt_sample_access sample_access; + int err; + + err = copy_from_user(&sample_access, uarg, + sizeof(sample_access)); + if (err) + return -EFAULT; + rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access); + } break; default: - pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); rcode = -EINVAL; break; } @@ -279,7 +1094,6 @@ static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, if (!filp || !vma) return -EINVAL; - cli = filp->private_data; if (!cli) @@ -334,10 +1148,10 @@ kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) return; kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); - kfree(cli->dump_bufs_meta); kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); kbase_hwcnt_enable_map_free(&cli->enable_map); + mutex_destroy(&cli->cmd_sync_lock); kfree(cli); } @@ -377,6 +1191,31 @@ static const struct file_operations kinstr_prfcnt_client_fops = { .release = kbasep_kinstr_prfcnt_hwcnt_reader_release, }; +size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata) +{ + size_t grp, blk, blk_inst; + size_t md_count = 0; + + if (!metadata) + return 0; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + /* Skip unused blocks */ + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) + continue; + + md_count++; + } + + /* add counts for clock_meta and sample meta, respectively */ + md_count += 2; + + /* Reserve one for last sentinel item. */ + md_count++; + + return md_count; +} + static size_t kbasep_kinstr_prfcnt_get_sample_size( const struct kbase_hwcnt_metadata *metadata, struct kbase_hwcnt_dump_buffer *dump_buf) @@ -384,19 +1223,12 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size( size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; size_t sample_meta_bytes; - size_t block_count = 0; - size_t grp, blk, blk_inst; + size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata); if (!metadata) return 0; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - block_count++; - - /* Reserve one for last sentinel item. */ - block_count++; - - sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count; + sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; @@ -411,7 +1243,68 @@ static size_t kbasep_kinstr_prfcnt_get_sample_size( */ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) { - /* Do nothing. */ + struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( + work, struct kbase_kinstr_prfcnt_context, dump_work); + struct kbase_kinstr_prfcnt_client *pos; + u64 cur_time_ns; + + mutex_lock(&kinstr_ctx->lock); + + cur_time_ns = kbasep_kinstr_prfcnt_timestamp_ns(); + + list_for_each_entry(pos, &kinstr_ctx->clients, node) { + if (pos->active && (pos->next_dump_time_ns != 0) && + (pos->next_dump_time_ns < cur_time_ns)) + kbasep_kinstr_prfcnt_client_dump( + pos, BASE_HWCNT_READER_EVENT_PERIODIC, + pos->user_data, false, false); + } + + kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx); + + mutex_unlock(&kinstr_ctx->lock); +} + +/** + * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client + * to take a single asynchronous + * sample. + * @work: Work structure. + */ +static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work) +{ + struct kbase_kinstr_prfcnt_async *cli_async = + container_of(work, struct kbase_kinstr_prfcnt_async, dump_work); + struct kbase_kinstr_prfcnt_client *cli = container_of( + cli_async, struct kbase_kinstr_prfcnt_client, async); + + mutex_lock(&cli->kinstr_ctx->lock); + /* While the async operation is in flight, a sync stop might have been + * executed, for which the dump should be skipped. Further as we are + * doing an async dump, we expect that there is reserved buffer for + * this to happen. This is to avoid the rare corner case where the + * user side has issued a stop/start pair before the async work item + * get the chance to execute. + */ + if (cli->active && + (atomic_read(&cli->sync_sample_count) < cli->sample_count)) + kbasep_kinstr_prfcnt_client_dump(cli, + BASE_HWCNT_READER_EVENT_MANUAL, + cli->async.user_data, true, + false); + + /* While the async operation is in flight, more async dump requests + * may have been submitted. In this case, no more async dumps work + * will be queued. Instead space will be reserved for that dump and + * an empty sample will be return after handling the current async + * dump. + */ + while (cli->active && + (atomic_read(&cli->sync_sample_count) < cli->sample_count)) { + kbasep_kinstr_prfcnt_client_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true); + } + mutex_unlock(&cli->kinstr_ctx->lock); } /** @@ -422,6 +1315,17 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) static enum hrtimer_restart kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer) { + struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( + timer, struct kbase_kinstr_prfcnt_context, dump_timer); + + /* We don't need to check kinstr_ctx->suspend_count here. + * Suspend and resume functions already ensure that the worker + * is cancelled when the driver is suspended, and resumed when + * the suspend_count reaches 0. + */ + kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, + &kinstr_ctx->dump_work); + return HRTIMER_NORESTART; } @@ -555,20 +1459,14 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc( size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; size_t sample_meta_bytes; - size_t block_count = 0; + size_t md_count; size_t sample_size; - size_t grp, blk, blk_inst; if (!metadata || !sample_arr) return -EINVAL; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - block_count++; - - /* Reserve one for last sentinel item. */ - block_count++; - - sample_meta_bytes = sizeof(struct prfcnt_metadata) * block_count; + md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata); + sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt; @@ -602,7 +1500,8 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc( /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */ samples[sample_idx].dump_buf.metadata = metadata; samples[sample_idx].sample_meta = - (u64 *)(uintptr_t)(addr + sample_meta_offset); + (struct prfcnt_metadata *)(uintptr_t)( + addr + sample_meta_offset); samples[sample_idx].dump_buf.dump_buf = (u64 *)(uintptr_t)(addr + dump_buf_offset); samples[sample_idx].dump_buf.clk_cnt_buf = @@ -724,6 +1623,31 @@ static int kbasep_kinstr_prfcnt_parse_request_enable( } /** + * kbasep_kinstr_prfcnt_parse_request_scope - Parse a scope request + * @req_scope: Performance counters scope request to parse. + * @config: Client object the session configuration should be written to. + * + * This function parses a performance counters scope request. + * There are only 2 acceptable outcomes: either the client leaves the scope + * as undefined, or all the scope requests are set to the same value. + * + * Return: 0 on success, else error code. + */ +static int kbasep_kinstr_prfcnt_parse_request_scope( + const struct prfcnt_request_scope *req_scope, + struct kbase_kinstr_prfcnt_client_config *config) +{ + int err = 0; + + if (config->scope == PRFCNT_SCOPE_RESERVED) + config->scope = req_scope->scope; + else if (config->scope != req_scope->scope) + err = -EINVAL; + + return err; +} + +/** * kbasep_kinstr_prfcnt_parse_setup - Parse session setup * @kinstr_ctx: Pointer to the kinstr_prfcnt context. * @setup: Session setup information to parse. @@ -742,35 +1666,48 @@ static int kbasep_kinstr_prfcnt_parse_setup( { uint32_t i; struct prfcnt_request_item *req_arr; + unsigned int item_count = setup->in.request_item_count; + unsigned long bytes; int err = 0; - if (!setup->in.requests_ptr || (setup->in.request_item_count == 0) || - (setup->in.request_item_size == 0)) { + /* Limiting the request items to 2x of the expected: acommodating + * moderate duplications but rejecting excessive abuses. + */ + if (!setup->in.requests_ptr || (item_count < 2) || + (setup->in.request_item_size == 0) || + item_count > 2 * kinstr_ctx->info_item_count) { return -EINVAL; } - req_arr = - (struct prfcnt_request_item *)(uintptr_t)setup->in.requests_ptr; + bytes = item_count * sizeof(*req_arr); + req_arr = kmalloc(bytes, GFP_KERNEL); + if (!req_arr) + return -ENOMEM; - if (req_arr[setup->in.request_item_count - 1].hdr.item_type != - FLEX_LIST_TYPE_NONE) { - return -EINVAL; + if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr), + bytes)) { + err = -EFAULT; + goto free_buf; } - if (req_arr[setup->in.request_item_count - 1].hdr.item_version != 0) - return -EINVAL; + if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE || + req_arr[item_count - 1].hdr.item_version != 0) { + err = -EINVAL; + goto free_buf; + } /* The session configuration can only feature one value for some - * properties (like capture mode and block counter set), but the client - * may potential issue multiple requests and try to set more than one - * value for those properties. While issuing multiple requests for the + * properties (like capture mode, block counter set and scope), but the + * client may potential issue multiple requests and try to set more than + * one value for those properties. While issuing multiple requests for the * same property is allowed by the protocol, asking for different values * is illegal. Leaving these properties as undefined is illegal, too. */ config->prfcnt_mode = PRFCNT_MODE_RESERVED; config->counter_set = KBASE_HWCNT_SET_UNDEFINED; + config->scope = PRFCNT_SCOPE_RESERVED; - for (i = 0; i < setup->in.request_item_count - 1; i++) { + for (i = 0; i < item_count - 1; i++) { if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) { err = -EINVAL; break; @@ -797,17 +1734,20 @@ static int kbasep_kinstr_prfcnt_parse_setup( break; if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) { - config->period_us = + config->period_ns = req_arr[i] .u.req_mode.mode_config.periodic - .period_us; + .period_ns; - if ((config->period_us != 0) && - (config->period_us < - DUMP_INTERVAL_MIN_US)) { - config->period_us = - DUMP_INTERVAL_MIN_US; + if ((config->period_ns != 0) && + (config->period_ns < + DUMP_INTERVAL_MIN_NS)) { + config->period_ns = + DUMP_INTERVAL_MIN_NS; } + + if (config->period_ns == 0) + err = -EINVAL; } break; @@ -816,6 +1756,11 @@ static int kbasep_kinstr_prfcnt_parse_setup( &req_arr[i].u.req_enable, config); break; + case PRFCNT_REQUEST_TYPE_SCOPE: + err = kbasep_kinstr_prfcnt_parse_request_scope( + &req_arr[i].u.req_scope, config); + break; + default: err = -EINVAL; break; @@ -825,14 +1770,19 @@ static int kbasep_kinstr_prfcnt_parse_setup( break; } - /* Verify that properties (like capture mode and block counter set) - * have been defined by the user space client. - */ - if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) - err = -EINVAL; +free_buf: + kfree(req_arr); - if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) - err = -EINVAL; + if (!err) { + /* Verify that properties (like capture mode and block counter + * set) have been defined by the user space client. + */ + if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) + err = -EINVAL; + + if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) + err = -EINVAL; + } return err; } @@ -872,8 +1822,12 @@ static int kbasep_kinstr_prfcnt_client_create( goto error; cli->config.buffer_count = MAX_BUFFER_COUNT; - cli->dump_interval_ns = cli->config.period_us * NSEC_PER_USEC; + cli->dump_interval_ns = cli->config.period_ns; cli->next_dump_time_ns = 0; + cli->active = false; + atomic_set(&cli->write_idx, 0); + atomic_set(&cli->read_idx, 0); + err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); @@ -888,6 +1842,7 @@ static int kbasep_kinstr_prfcnt_client_create( kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); cli->sample_count = cli->config.buffer_count; + atomic_set(&cli->sync_sample_count, cli->sample_count); cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size( kinstr_ctx->metadata, &cli->tmp_buf); @@ -914,15 +1869,6 @@ static int kbasep_kinstr_prfcnt_client_create( if (err < 0) goto error; - err = -ENOMEM; - - cli->dump_bufs_meta = - kmalloc_array(cli->config.buffer_count, - sizeof(*cli->dump_bufs_meta), GFP_KERNEL); - - if (!cli->dump_bufs_meta) - goto error; - err = kbase_hwcnt_virtualizer_client_create( kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli); @@ -930,6 +1876,9 @@ static int kbasep_kinstr_prfcnt_client_create( goto error; init_waitqueue_head(&cli->waitq); + INIT_WORK(&cli->async.dump_work, + kbasep_kinstr_prfcnt_async_dump_worker); + mutex_init(&cli->cmd_sync_lock); *out_vcli = cli; return 0; @@ -965,48 +1914,11 @@ static void kbasep_kinstr_prfcnt_get_request_info_list( *arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests); } -static enum prfcnt_block_type -kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) -{ - enum prfcnt_block_type block_type; - - switch (type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: - block_type = PRFCNT_BLOCK_TYPE_FE; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - block_type = PRFCNT_BLOCK_TYPE_TILER; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: - block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - block_type = PRFCNT_BLOCK_TYPE_MEMORY; - break; - - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: - default: - block_type = PRFCNT_BLOCK_TYPE_RESERVED; - break; - } - - return block_type; -} - -static int kbasep_kinstr_prfcnt_get_block_info_list( - const struct kbase_hwcnt_metadata *metadata, size_t block_set, - struct prfcnt_enum_item *item_arr, size_t *arr_idx) +int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, + size_t block_set, struct prfcnt_enum_item *item_arr, + size_t *arr_idx) { - size_t grp; - size_t blk; + size_t grp, blk; if (!metadata || !item_arr || !arr_idx) return -EINVAL; @@ -1015,19 +1927,30 @@ static int kbasep_kinstr_prfcnt_get_block_info_list( for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++, (*arr_idx)++) { + size_t blk_inst; + size_t unused_blk_inst_count = 0; + size_t blk_inst_count = + kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk); + item_arr[*arr_idx].hdr.item_type = PRFCNT_ENUM_TYPE_BLOCK; item_arr[*arr_idx].hdr.item_version = PRFCNT_READER_API_VERSION; item_arr[*arr_idx].u.block_counter.set = block_set; - item_arr[*arr_idx].u.block_counter.block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( kbase_hwcnt_metadata_block_type( metadata, grp, blk)); + + /* Count number of unused blocks to updated number of instances */ + for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) { + if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, + blk_inst)) + unused_blk_inst_count++; + } + item_arr[*arr_idx].u.block_counter.num_instances = - kbase_hwcnt_metadata_block_instance_count( - metadata, grp, blk); + blk_inst_count - unused_blk_inst_count; item_arr[*arr_idx].u.block_counter.num_values = kbase_hwcnt_metadata_block_values_count( metadata, grp, blk); @@ -1086,8 +2009,11 @@ static int kbasep_kinstr_prfcnt_enum_info_list( if (enum_info->info_item_count != kinstr_ctx->info_item_count) return -EINVAL; - prfcnt_item_arr = - (struct prfcnt_enum_item *)(uintptr_t)enum_info->info_list_ptr; + prfcnt_item_arr = kcalloc(enum_info->info_item_count, + sizeof(*prfcnt_item_arr), GFP_KERNEL); + if (!prfcnt_item_arr) + return -ENOMEM; + kbasep_kinstr_prfcnt_get_request_info_list(kinstr_ctx, prfcnt_item_arr, &arr_idx); metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); @@ -1118,6 +2044,16 @@ static int kbasep_kinstr_prfcnt_enum_info_list( FLEX_LIST_TYPE_NONE; prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0; + if (!err) { + unsigned long bytes = + enum_info->info_item_count * sizeof(*prfcnt_item_arr); + + if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr), + prfcnt_item_arr, bytes)) + err = -EFAULT; + } + + kfree(prfcnt_item_arr); return err; } diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h index 83d76be..c42408b 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.h +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h @@ -26,6 +26,8 @@ #ifndef _KBASE_KINSTR_PRFCNT_H_ #define _KBASE_KINSTR_PRFCNT_H_ +#include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h> + struct kbase_kinstr_prfcnt_context; struct kbase_hwcnt_virtualizer; struct kbase_ioctl_hwcnt_reader_setup; @@ -76,6 +78,49 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) */ void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx); +#if MALI_KERNEL_TEST_API +/** + * kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types + * with their information. + * @metadata: Non-NULL pointer to the hardware counter metadata. + * @block_set: Which SET the blocks will represent. + * @item_arr: Non-NULL pointer to array of enumeration items to populate. + * @arr_idx: Non-NULL pointer to index of array @item_arr. + * + * Populate list of counter blocks with information for enumeration. + * + * Return: 0 on success, else error code. + */ +int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, + size_t block_set, struct prfcnt_enum_item *item_arr, + size_t *arr_idx); + +/** + * kbasep_kinstr_prfcnt_get_sample_md_count() - Get count of sample + * metadata items. + * @metadata: Non-NULL pointer to the hardware counter metadata. + * + * Return: Number of metadata items for available blocks in each sample. + */ +size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta + * item array. + * @dst: Non-NULL pointer to the sample's dump buffer object. + * @block_meta_base: Non-NULL double pointer to the start of the block meta + * data items. + * @base_addr: Address of allocated pages for array of samples. Used + * to calculate offset of block values. + * @counter_set: The SET which blocks represent. + * + * Return: 0 on success, else error code. + */ +int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst, + struct prfcnt_metadata **block_meta_base, + u64 base_addr, u8 counter_set); +#endif /* MALI_KERNEL_TEST_API */ + /** * kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information. * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 320ffef..de854f3 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -4468,8 +4468,8 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; - ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, - &mapping); + ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, + KBASE_REG_CPU_RD, &mapping); if (!ptr) { dev_warn(kctx->kbdev->dev, "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 95533f5..9cb4088 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -287,6 +287,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * that triggered incremental rendering by growing too much. * @rbtree: Backlink to the red-black tree of memory regions. * @start_pfn: The Page Frame Number in GPU virtual address space. + * @user_data: The address of GPU command queue when VA region represents + * a ring buffer. * @nr_pages: The size of the region in pages. * @initial_commit: Initial commit, for aligning the start address and * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. @@ -324,6 +326,7 @@ struct kbase_va_region { struct list_head link; struct rb_root *rbtree; u64 start_pfn; + void *user_data; size_t nr_pages; size_t initial_commit; size_t threshold_pages; @@ -476,6 +479,7 @@ struct kbase_va_region { struct list_head jit_node; u16 jit_usage_id; u8 jit_bin_id; + #if MALI_JIT_PRESSURE_LIMIT_BASE /* Pointer to an object in GPU memory defining an end of an allocated * region diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index 527bec4..d252373 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -1029,7 +1029,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, enum kbase_sync_type sync_fn) { int ret = -EINVAL; - struct dma_buf *dma_buf; + struct dma_buf __maybe_unused *dma_buf; enum dma_data_direction dir = DMA_BIDIRECTIONAL; lockdep_assert_held(&kctx->reg_lock); @@ -3214,8 +3214,12 @@ static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, * assigned one, otherwise a dummy page. Always return the * dummy page in no mali builds. */ +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); +#else if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); +#endif return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET + (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); } @@ -3461,8 +3465,12 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) /* Don't map in the actual register page if GPU is powered down. * Always map in the dummy page in no mali builds. */ +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); +#else if (!kbdev->pm.backend.gpu_powered) pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); +#endif ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_FW, vma, diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index 4078da1..af154d5 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -144,7 +144,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_context_idle); -void kbase_pm_driver_suspend(struct kbase_device *kbdev) +int kbase_pm_driver_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -162,7 +162,7 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); if (WARN_ON(kbase_pm_is_suspending(kbdev))) { mutex_unlock(&kbdev->pm.lock); - return; + return 0; } kbdev->pm.suspending = true; mutex_unlock(&kbdev->pm.lock); @@ -193,7 +193,12 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) */ kbasep_js_suspend(kbdev); #else - kbase_csf_scheduler_pm_suspend(kbdev); + if (kbase_csf_scheduler_pm_suspend(kbdev)) { + mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = false; + mutex_unlock(&kbdev->pm.lock); + return -1; + } #endif /* Wait for the active count to reach zero. This is not the same as @@ -209,7 +214,12 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) /* NOTE: We synchronize with anything that was just finishing a * kbase_pm_context_idle() call by locking the pm.lock below */ - kbase_hwaccess_pm_suspend(kbdev); + if (kbase_hwaccess_pm_suspend(kbdev)) { + mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = false; + mutex_unlock(&kbdev->pm.lock); + return -1; + } #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbdev->arb.arb_if) { @@ -218,6 +228,8 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); } #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + return 0; } void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) @@ -273,16 +285,19 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); } -void kbase_pm_suspend(struct kbase_device *kbdev) +int kbase_pm_suspend(struct kbase_device *kbdev) { + int result = 0; #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbdev->arb.arb_if) kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); else - kbase_pm_driver_suspend(kbdev); + result = kbase_pm_driver_suspend(kbdev); #else - kbase_pm_driver_suspend(kbdev); + result = kbase_pm_driver_suspend(kbdev); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + return result; } void kbase_pm_resume(struct kbase_device *kbdev) diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h index 980a8d1..730feea 100644 --- a/mali_kbase/mali_kbase_pm.h +++ b/mali_kbase/mali_kbase_pm.h @@ -165,8 +165,10 @@ void kbase_pm_context_idle(struct kbase_device *kbdev); * @note the mechanisms used here rely on all user-space threads being frozen * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up * the GPU e.g. via atom submission. + * + * Return: 0 on success. */ -void kbase_pm_suspend(struct kbase_device *kbdev); +int kbase_pm_suspend(struct kbase_device *kbdev); /** * Resume the GPU, allow register accesses to it, and resume running atoms on @@ -207,8 +209,10 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data); * @note the mechanisms used here rely on all user-space threads being frozen * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up * the GPU e.g. via atom submission. + * + * Return: 0 on success. */ -void kbase_pm_driver_suspend(struct kbase_device *kbdev); +int kbase_pm_driver_suspend(struct kbase_device *kbdev); /** * kbase_pm_driver_resume() - Put GPU and driver in resume diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.h b/mali_kbase/mali_kbase_regs_history_debugfs.h index 26decb4..1b4196d 100644 --- a/mali_kbase/mali_kbase_regs_history_debugfs.h +++ b/mali_kbase/mali_kbase_regs_history_debugfs.h @@ -69,7 +69,7 @@ void kbase_io_history_dump(struct kbase_device *kbdev); */ void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); -#else /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#else /* !defined(CONFIG_DEBUG_FS) || IS_ENABLED(CONFIG_MALI_NO_MALI) */ #define kbase_io_history_init(...) ((int)0) diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index bee3513..df34854 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -95,7 +95,8 @@ static int kbasep_read_soft_event_status( unsigned char *mapped_evt; struct kbase_vmap_struct map; - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), + KBASE_REG_CPU_RD, &map); if (!mapped_evt) return -EFAULT; @@ -116,7 +117,8 @@ static int kbasep_write_soft_event_status( (new_status != BASE_JD_SOFT_EVENT_RESET)) return -EINVAL; - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), + KBASE_REG_CPU_WR, &map); if (!mapped_evt) return -EFAULT; @@ -1203,8 +1205,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), + KBASE_REG_CPU_WR, &mapping); if (!ptr) { /* * Leave the allocations "live" as the JIT free atom @@ -1649,7 +1651,12 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) if (copy_to_user((__user void *)(uintptr_t)katom->jc, &fence, sizeof(fence)) != 0) { kbase_sync_fence_out_remove(katom); - kbase_sync_fence_close_fd(fd); + /* fd should have been closed here, but there's + * no good way of doing that. Since + * copy_to_user() very rarely fails, and the fd + * will get closed on process termination this + * won't be a problem. + */ fence.basep.fd = -EINVAL; return -EINVAL; } diff --git a/mali_kbase/mali_kbase_sync.h b/mali_kbase/mali_kbase_sync.h index ad05cdf..11cb8b9 100644 --- a/mali_kbase/mali_kbase_sync.h +++ b/mali_kbase/mali_kbase_sync.h @@ -157,21 +157,6 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); #endif /* !MALI_USE_CSF */ -/** - * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence - * @fd: File descriptor to close - */ -static inline void kbase_sync_fence_close_fd(int fd) -{ -#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE - close_fd(fd); -#elif KERNEL_VERSION(4, 17, 0) <= LINUX_VERSION_CODE - ksys_close(fd); -#else - sys_close(fd); -#endif -} - #if !MALI_USE_CSF /** * kbase_sync_fence_in_info_get() - Retrieves information about input fence diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h index 3ddfcd9..92c8d31 100644 --- a/mali_kbase/mali_malisw.h +++ b/mali_kbase/mali_malisw.h @@ -98,7 +98,14 @@ /* LINUX_VERSION_CODE < 5.4 */ #if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -#define fallthrough CSTD_NOP(...) /* fallthrough */ -#endif +#if defined(GCC_VERSION) && GCC_VERSION >= 70000 +#ifndef __fallthrough +#define __fallthrough __attribute__((fallthrough)) +#endif /* __fallthrough */ +#define fallthrough __fallthrough +#else +#define fallthrough CSTD_NOP(...) /* fallthrough */ +#endif /* GCC_VERSION >= 70000 */ +#endif /* KERNEL_VERSION(5, 4, 0) */ #endif /* _MALISW_H_ */ diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 5f6cc7a..a450d38 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -41,10 +41,91 @@ #include <mmu/mali_kbase_mmu_internal.h> #include <mali_kbase_cs_experimental.h> #include <device/mali_kbase_device.h> +#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h> +#if !MALI_USE_CSF +#include <mali_kbase_hwaccess_jm.h> +#endif #include <mali_kbase_trace_gpu_mem.h> /** + * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done + * through GPU_CONTROL interface + * @kbdev: kbase device to check GPU model ID on. + * + * This function returns whether a cache flush for page table update should + * run through GPU_CONTROL interface or MMU_AS_CONTROL interface. + * + * Return: True if cache flush should be done on GPU command. + */ +static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) +{ + uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id & + GPU_ID2_ARCH_MAJOR) >> + GPU_ID2_ARCH_MAJOR_SHIFT; + + return arch_maj_cur > 11; +} + +/** + * mmu_flush_invalidate_on_gpu_ctrl() - Flush and invalidate the GPU caches + * through GPU_CONTROL interface. + * @kbdev: kbase device to issue the MMU operation on. + * @as: address space to issue the MMU operation on. + * @op_param: parameters for the operation. + * + * This wrapper function alternates AS_COMMAND_FLUSH_PT and AS_COMMAND_FLUSH_MEM + * to equivalent GPU_CONTROL command FLUSH_CACHES. + * The function first issue LOCK to MMU-AS with kbase_mmu_hw_do_operation(). + * And issues cache-flush with kbase_gpu_cache_flush_and_busy_wait() function + * then issue UNLOCK to MMU-AS with kbase_mmu_hw_do_operation(). + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +static int +mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, + struct kbase_as *as, + struct kbase_mmu_hw_op_param *op_param) +{ + u32 flush_op; + int ret, ret2; + + if (WARN_ON(kbdev == NULL) || + WARN_ON(as == NULL) || + WARN_ON(op_param == NULL)) + return -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + /* Translate operation to command */ + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) { + flush_op = GPU_COMMAND_CACHE_CLN_INV_L2; + } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { + flush_op = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; + } else { + dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", + op_param->op); + return -EINVAL; + } + + /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ + op_param->op = KBASE_MMU_OP_LOCK; + ret = kbase_mmu_hw_do_operation(kbdev, as, op_param); + if (ret) + return ret; + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, flush_op); + + /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ + op_param->op = KBASE_MMU_OP_UNLOCK; + ret2 = kbase_mmu_hw_do_operation(kbdev, as, op_param); + + return ret ?: ret2; +} + +/** * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. * @kctx: The KBase context. * @vpfn: The virtual page frame number to start the flush on. @@ -244,7 +325,11 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, .kctx_id = kctx_id, .mmu_sync_info = mmu_sync_info, }; - kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param); + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, &op_param); + } else { + kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -934,7 +1019,13 @@ page_fault_retry: .kctx_id = kctx->id, .mmu_sync_info = mmu_sync_info, }; - kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param); + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, + &op_param); + } else { + kbase_mmu_hw_do_operation(kbdev, faulting_as, + &op_param); + } mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ @@ -1046,11 +1137,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, int i; struct page *p; -#ifdef CONFIG_MALI_2MB_ALLOC - p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]); -#else /* CONFIG_MALI_2MB_ALLOC */ p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); -#endif /* CONFIG_MALI_2MB_ALLOC */ if (!p) return 0; @@ -1087,12 +1174,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, return page_to_phys(p); alloc_free: - -#ifdef CONFIG_MALI_2MB_ALLOC - kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], p, false); -#else /* CONFIG_MALI_2MB_ALLOC */ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); -#endif /* CONFIG_MALI_2MB_ALLOC */ return 0; } @@ -1341,11 +1423,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, */ mutex_unlock(&kctx->mmu.mmu_lock); err = kbase_mem_pool_grow( -#ifdef CONFIG_MALI_2MB_ALLOC - &kbdev->mem_pools.large[ -#else &kbdev->mem_pools.small[ -#endif kctx->mmu.group_id], MIDGARD_MMU_BOTTOMLEVEL); mutex_lock(&kctx->mmu.mmu_lock); @@ -1433,11 +1511,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, p = pfn_to_page(PFN_DOWN(pgd)); -#ifdef CONFIG_MALI_2MB_ALLOC - kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], -#else kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], -#endif p, dirty); atomic_sub(1, &kbdev->memdev.used_pages); @@ -1523,11 +1597,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, */ mutex_unlock(&mmut->mmu_lock); err = kbase_mem_pool_grow( -#ifdef CONFIG_MALI_2MB_ALLOC - &kbdev->mem_pools.large[mmut->group_id], -#else &kbdev->mem_pools.small[mmut->group_id], -#endif cur_level); mutex_lock(&mmut->mmu_lock); } while (!err); @@ -1681,8 +1751,15 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, .kctx_id = kctx->id, .mmu_sync_info = mmu_sync_info, }; - err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr], - &op_param); + + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + err = mmu_flush_invalidate_on_gpu_ctrl( + kbdev, &kbdev->as[kctx->as_nr], &op_param); + } else { + err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } + if (err) { /* Flush failed to complete, assume the * GPU has hung and perform a reset to recover @@ -1744,7 +1821,13 @@ kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, else op_param.op = KBASE_MMU_OP_FLUSH_PT; - err = kbase_mmu_hw_do_operation(kbdev, as, &op_param); + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, as, &op_param); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + err = kbase_mmu_hw_do_operation(kbdev, as, &op_param); + } if (err) { /* Flush failed to complete, assume the GPU has hung and @@ -1850,6 +1933,15 @@ void kbase_mmu_disable(struct kbase_context *kctx) kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0); kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +#if !MALI_USE_CSF + /* + * JM GPUs has some L1 read only caches that need to be invalidated + * with START_FLUSH configuration. Purge the MMU disabled kctx from + * the slot_rb tracking field so such invalidation is performed when + * a new katom is executed on the affected slots. + */ + kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); +#endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); @@ -2271,11 +2363,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, int err; err = kbase_mem_pool_grow( -#ifdef CONFIG_MALI_2MB_ALLOC - &kbdev->mem_pools.large[mmut->group_id], -#else &kbdev->mem_pools.small[mmut->group_id], -#endif MIDGARD_MMU_BOTTOMLEVEL); if (err) { kbase_mmu_term(kbdev, mmut); diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 45a628c..fe721fc 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -30,8 +30,9 @@ struct kbase_context; struct kbase_mmu_table; /** - * MMU-synchronous caller info. A pointer to this type is passed down from the outer-most callers - * in the kbase module - where the information resides as to the synchronous / asynchronous + * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. + * A pointer to this type is passed down from the outer-most callers in the kbase + * module - where the information resides as to the synchronous / asynchronous * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to * existing GPU work does it come from requests (like ioctl) from user-space, power management, * etc. diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h index 7c0e95e..7cdf426 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw.h +++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h @@ -78,7 +78,7 @@ enum kbase_mmu_op_type { * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_operation() * @vpfn: MMU Virtual Page Frame Number to start the operation on. * @nr: Number of pages to work on. - * @type: Operation type (written to ASn_COMMAND). + * @op: Operation type (written to ASn_COMMAND). * @kctx_id: Kernel context ID for MMU command tracepoint * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. */ diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 6306946..0ebc1bc 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -128,23 +128,19 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) { unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); - /* Wait for the MMU status to indicate there is no active command, in - * case one is pending. Do not log remaining register accesses. - */ - while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) - val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + /* Wait for the MMU status to indicate there is no active command. */ + while (--max_loops && + kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & + AS_STATUS_AS_ACTIVE) { + ; + } if (max_loops == 0) { dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); return -1; } - /* If waiting in loop was performed, log last read value. */ - if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) - kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); - return 0; } @@ -216,6 +212,11 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, int ret; u64 lock_addr = 0x0; + if (WARN_ON(kbdev == NULL) || + WARN_ON(as == NULL) || + WARN_ON(op_param == NULL)) + return -EINVAL; + lockdep_assert_held(&kbdev->mmu_hw_mutex); if (op_param->op == KBASE_MMU_OP_UNLOCK) { diff --git a/mali_kbase/tests/include/kutf/kutf_utils.h b/mali_kbase/tests/include/kutf/kutf_utils.h index 18dcc3d..5f6d769 100644 --- a/mali_kbase/tests/include/kutf/kutf_utils.h +++ b/mali_kbase/tests/include/kutf/kutf_utils.h @@ -54,6 +54,7 @@ * Return: Returns pointer to allocated string, or NULL on error. */ const char *kutf_dsprintf(struct kutf_mempool *pool, - const char *fmt, ...); + const char *fmt, ...) __printf(2, 3); + #endif /* _KERNEL_UTF_UTILS_H_ */ diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c index c101563..567c5f1 100644 --- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c +++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c @@ -44,6 +44,12 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) GPU_ID2_ARCH_MAJOR) >> GPU_ID2_ARCH_MAJOR_SHIFT; u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8; + u32 const supports_gpu_sleep = +#ifdef KBASE_PM_RUNTIME + kbdev->pm.backend.gpu_sleep_supported; +#else + false; +#endif /* KBASE_PM_RUNTIME */ /* Summarize the Address Space objects. */ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) @@ -62,11 +68,11 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) kbdev); /* Trace the creation of a new kbase device and set its properties. */ - __kbase_tlstream_tl_kbase_new_device( - summary, kbdev->gpu_props.props.raw_props.gpu_id, - kbdev->gpu_props.num_cores, kbdev->csf.global_iface.group_num, - kbdev->nr_hw_address_spaces, num_sb_entries, - kbdev_has_cross_stream_sync); + __kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores, + kbdev->csf.global_iface.group_num, + kbdev->nr_hw_address_spaces, num_sb_entries, + kbdev_has_cross_stream_sync, supports_gpu_sleep); /* Lock the context list, to ensure no changes to the list are made * while we're summarizing the contexts and their contents. @@ -89,7 +95,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) __kbase_tlstream_tl_kbase_device_program_csg( summary, kbdev->gpu_props.props.raw_props.gpu_id, - group->kctx->id, group->handle, slot_i); + group->kctx->id, group->handle, slot_i, 0); } /* Reset body stream buffers while holding the kctx lock. diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c index 54e51f8..abbed05 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.c +++ b/mali_kbase/tl/mali_kbase_tracepoints.c @@ -120,8 +120,14 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, + KBASE_TL_KBASE_CSFFW_FW_RELOADING, + KBASE_TL_KBASE_CSFFW_FW_ENABLING, + KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, + KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, + KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, + KBASE_TL_KBASE_CSFFW_FW_DISABLING, + KBASE_TL_KBASE_CSFFW_FW_OFF, KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, - KBASE_TL_KBASE_CSFFW_RESET, KBASE_TL_JS_SCHED_START, KBASE_TL_JS_SCHED_END, KBASE_TL_JD_SUBMIT_ATOM_START, @@ -312,12 +318,12 @@ enum tl_msg_id_aux { "gpu") \ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ "New KBase Device", \ - "@IIIIII", \ - "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync") \ + "@IIIIIII", \ + "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ "CSG is programmed to a slot", \ - "@IIII", \ - "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \ + "@IIIII", \ + "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ "CSG is deprogrammed from a slot", \ "@II", \ @@ -506,14 +512,38 @@ enum tl_msg_id_aux { "KCPU Queue ends a group suspend", \ "@pI", \ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_RELOADING, \ + "CSF FW is being reloaded", \ + "@L", \ + "csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_ENABLING, \ + "CSF FW is being enabled", \ + "@L", \ + "csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, \ + "CSF FW sleep is requested", \ + "@L", \ + "csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, \ + "CSF FW wake up is requested", \ + "@L", \ + "csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, \ + "CSF FW halt is requested", \ + "@L", \ + "csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_DISABLING, \ + "CSF FW is being disabled", \ + "@L", \ + "csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_OFF, \ + "CSF FW is off", \ + "@L", \ + "csffw_cycle") \ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ "An overflow has happened with the CSFFW Timeline stream", \ "@LL", \ "csffw_timestamp,csffw_cycle") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_RESET, \ - "A reset has happened with the CSFFW", \ - "@L", \ - "csffw_cycle") \ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \ "Scheduling starts", \ "@I", \ @@ -2046,7 +2076,8 @@ void __kbase_tlstream_tl_kbase_new_device( u32 kbase_device_max_num_csgs, u32 kbase_device_as_count, u32 kbase_device_sb_entry_count, - u32 kbase_device_has_cross_stream_sync) + u32 kbase_device_has_cross_stream_sync, + u32 kbase_device_supports_gpu_sleep) { const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) @@ -2056,6 +2087,7 @@ void __kbase_tlstream_tl_kbase_new_device( + sizeof(kbase_device_as_count) + sizeof(kbase_device_sb_entry_count) + sizeof(kbase_device_has_cross_stream_sync) + + sizeof(kbase_device_supports_gpu_sleep) ; char *buffer; unsigned long acq_flags; @@ -2077,6 +2109,8 @@ void __kbase_tlstream_tl_kbase_new_device( pos, &kbase_device_sb_entry_count, sizeof(kbase_device_sb_entry_count)); pos = kbasep_serialize_bytes(buffer, pos, &kbase_device_has_cross_stream_sync, sizeof(kbase_device_has_cross_stream_sync)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_supports_gpu_sleep, sizeof(kbase_device_supports_gpu_sleep)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2086,7 +2120,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg( u32 kbase_device_id, u32 kernel_ctx_id, u32 gpu_cmdq_grp_handle, - u32 kbase_device_csg_slot_index) + u32 kbase_device_csg_slot_index, + u32 kbase_device_csg_slot_resumed) { const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; const size_t msg_size = sizeof(msg_id) + sizeof(u64) @@ -2094,6 +2129,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg( + sizeof(kernel_ctx_id) + sizeof(gpu_cmdq_grp_handle) + sizeof(kbase_device_csg_slot_index) + + sizeof(kbase_device_csg_slot_resumed) ; char *buffer; unsigned long acq_flags; @@ -2111,6 +2147,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg( pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); pos = kbasep_serialize_bytes(buffer, pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -3309,14 +3347,12 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( +void __kbase_tlstream_tl_kbase_csffw_fw_reloading( struct kbase_tlstream *stream, - u64 csffw_timestamp, u64 csffw_cycle) { - const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_RELOADING; const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(csffw_timestamp) + sizeof(csffw_cycle) ; char *buffer; @@ -3328,18 +3364,104 @@ void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, - pos, &csffw_timestamp, sizeof(csffw_timestamp)); + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_fw_enabling( + struct kbase_tlstream *stream, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_ENABLING; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( + struct kbase_tlstream *stream, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( + struct kbase_tlstream *stream, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_fw_request_halt( + struct kbase_tlstream *stream, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &csffw_cycle, sizeof(csffw_cycle)); kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_csffw_reset( +void __kbase_tlstream_tl_kbase_csffw_fw_disabling( struct kbase_tlstream *stream, u64 csffw_cycle) { - const u32 msg_id = KBASE_TL_KBASE_CSFFW_RESET; + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_DISABLING; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(csffw_cycle) ; @@ -3357,6 +3479,54 @@ void __kbase_tlstream_tl_kbase_csffw_reset( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_csffw_fw_off( + struct kbase_tlstream *stream, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_OFF; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_timestamp) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_timestamp, sizeof(csffw_timestamp)); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_js_sched_start( struct kbase_tlstream *stream, u32 dummy) diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h index 3fc871c..aa10bc0 100644 --- a/mali_kbase/tl/mali_kbase_tracepoints.h +++ b/mali_kbase/tl/mali_kbase_tracepoints.h @@ -310,13 +310,15 @@ void __kbase_tlstream_tl_kbase_new_device( u32 kbase_device_max_num_csgs, u32 kbase_device_as_count, u32 kbase_device_sb_entry_count, - u32 kbase_device_has_cross_stream_sync); + u32 kbase_device_has_cross_stream_sync, + u32 kbase_device_supports_gpu_sleep); void __kbase_tlstream_tl_kbase_device_program_csg( struct kbase_tlstream *stream, u32 kbase_device_id, u32 kernel_ctx_id, u32 gpu_cmdq_grp_handle, - u32 kbase_device_csg_slot_index); + u32 kbase_device_csg_slot_index, + u32 kbase_device_csg_slot_resumed); void __kbase_tlstream_tl_kbase_device_deprogram_csg( struct kbase_tlstream *stream, u32 kbase_device_id, @@ -498,13 +500,31 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( struct kbase_tlstream *stream, const void *kcpu_queue, u32 execute_error); -void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( +void __kbase_tlstream_tl_kbase_csffw_fw_reloading( + struct kbase_tlstream *stream, + u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_fw_enabling( + struct kbase_tlstream *stream, + u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( + struct kbase_tlstream *stream, + u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( + struct kbase_tlstream *stream, + u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_fw_request_halt( struct kbase_tlstream *stream, - u64 csffw_timestamp, u64 csffw_cycle); -void __kbase_tlstream_tl_kbase_csffw_reset( +void __kbase_tlstream_tl_kbase_csffw_fw_disabling( struct kbase_tlstream *stream, u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_fw_off( + struct kbase_tlstream *stream, + u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle); void __kbase_tlstream_tl_js_sched_start( struct kbase_tlstream *stream, u32 dummy); @@ -1684,6 +1704,7 @@ struct kbase_tlstream; * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the * physical hardware has available * @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported + * @kbase_device_supports_gpu_sleep: Whether GPU sleep is supported */ #if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ @@ -1693,14 +1714,15 @@ struct kbase_tlstream; kbase_device_max_num_csgs, \ kbase_device_as_count, \ kbase_device_sb_entry_count, \ - kbase_device_has_cross_stream_sync \ + kbase_device_has_cross_stream_sync, \ + kbase_device_supports_gpu_sleep \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ __kbase_tlstream_tl_kbase_new_device( \ __TL_DISPATCH_STREAM(kbdev, obj), \ - kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count, kbase_device_sb_entry_count, kbase_device_has_cross_stream_sync); \ + kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count, kbase_device_sb_entry_count, kbase_device_has_cross_stream_sync, kbase_device_supports_gpu_sleep); \ } while (0) #else #define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ @@ -1710,7 +1732,8 @@ struct kbase_tlstream; kbase_device_max_num_csgs, \ kbase_device_as_count, \ kbase_device_sb_entry_count, \ - kbase_device_has_cross_stream_sync \ + kbase_device_has_cross_stream_sync, \ + kbase_device_supports_gpu_sleep \ ) \ do { } while (0) #endif /* MALI_USE_CSF */ @@ -1724,6 +1747,7 @@ struct kbase_tlstream; * @kernel_ctx_id: Unique ID for the KBase Context * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + * @kbase_device_csg_slot_resumed: Whether the csg is being resumed */ #if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ @@ -1731,14 +1755,15 @@ struct kbase_tlstream; kbase_device_id, \ kernel_ctx_id, \ gpu_cmdq_grp_handle, \ - kbase_device_csg_slot_index \ + kbase_device_csg_slot_index, \ + kbase_device_csg_slot_resumed \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ __kbase_tlstream_tl_kbase_device_program_csg( \ __TL_DISPATCH_STREAM(kbdev, obj), \ - kbase_device_id, kernel_ctx_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index); \ + kbase_device_id, kernel_ctx_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index, kbase_device_csg_slot_resumed); \ } while (0) #else #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ @@ -1746,7 +1771,8 @@ struct kbase_tlstream; kbase_device_id, \ kernel_ctx_id, \ gpu_cmdq_grp_handle, \ - kbase_device_csg_slot_index \ + kbase_device_csg_slot_index, \ + kbase_device_csg_slot_resumed \ ) \ do { } while (0) #endif /* MALI_USE_CSF */ @@ -3146,59 +3172,221 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - - * An overflow has happened with the CSFFW Timeline stream + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING - + * CSF FW is being reloaded * * @kbdev: Kbase device - * @csffw_timestamp: Timestamp of a CSFFW event * @csffw_cycle: Cycle number of a CSFFW event */ #if MALI_USE_CSF -#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \ kbdev, \ - csffw_timestamp, \ csffw_cycle \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ - __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ + __kbase_tlstream_tl_kbase_csffw_fw_reloading( \ __TL_DISPATCH_STREAM(kbdev, obj), \ - csffw_timestamp, csffw_cycle); \ + csffw_cycle); \ } while (0) #else -#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \ kbdev, \ - csffw_timestamp, \ csffw_cycle \ ) \ do { } while (0) #endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET - - * A reset has happened with the CSFFW + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING - + * CSF FW is being enabled * * @kbdev: Kbase device * @csffw_cycle: Cycle number of a CSFFW event */ #if MALI_USE_CSF -#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \ kbdev, \ csffw_cycle \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ - __kbase_tlstream_tl_kbase_csffw_reset( \ + __kbase_tlstream_tl_kbase_csffw_fw_enabling( \ __TL_DISPATCH_STREAM(kbdev, obj), \ csffw_cycle); \ } while (0) #else -#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \ + kbdev, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP - + * CSF FW sleep is requested + * + * @kbdev: Kbase device + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \ + kbdev, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \ + kbdev, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP - + * CSF FW wake up is requested + * + * @kbdev: Kbase device + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \ + kbdev, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \ + kbdev, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT - + * CSF FW halt is requested + * + * @kbdev: Kbase device + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \ kbdev, \ csffw_cycle \ ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_fw_request_halt( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \ + kbdev, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING - + * CSF FW is being disabled + * + * @kbdev: Kbase device + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \ + kbdev, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_fw_disabling( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \ + kbdev, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF - + * CSF FW is off + * + * @kbdev: Kbase device + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \ + kbdev, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_fw_off( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \ + kbdev, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - + * An overflow has happened with the CSFFW Timeline stream + * + * @kbdev: Kbase device + * @csffw_timestamp: Timestamp of a CSFFW event + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_timestamp, csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ do { } while (0) #endif /* MALI_USE_CSF */ |