From 0207d6c3b7a2002f15c60d08617e956faf5ba90c Mon Sep 17 00:00:00 2001 From: Siddharth Kapoor Date: Fri, 7 Jan 2022 19:09:01 +0800 Subject: Mali Valhall Android DDK r35p0 KMD Provenance: 3e260085ac (collaborate/EAC/v_r35p0) VX504X08X-BU-00000-r35p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r35p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r35p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r35p0-01eac0 - Valhall Android Renderscript AOSP parts Signed-off-by: Siddharth Kapoor Change-Id: Id9ef73da49680e2935a827c40d54169545f7162e --- .../midgard/backend/gpu/mali_kbase_model_dummy.h | 57 ++++++++ .../gpu/arm/midgard/csf/mali_gpu_csf_registers.h | 13 ++ .../gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h | 56 +++++++- .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 11 ++ .../midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h | 6 + .../uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 4 +- .../uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 154 ++++++++++++++++++--- .../uapi/gpu/arm/midgard/mali_kbase_ioctl.h | 28 ---- 8 files changed, 279 insertions(+), 50 deletions(-) create mode 100644 common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h (limited to 'common') diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h new file mode 100644 index 0000000..9d677ca --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _UAPI_KBASE_MODEL_DUMMY_H_ +#define _UAPI_KBASE_MODEL_DUMMY_H_ + +#include + +#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) +#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ + (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) + +#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4 +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60 +#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \ + (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + \ + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK) +#define KBASE_DUMMY_MODEL_BLOCK_SIZE \ + (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) +#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 +#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ + (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) +#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \ + KBASE_DUMMY_MODEL_COUNTER_PER_CORE) + +#define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) +#define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) +#define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull) +#define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull) + +#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h index a5dc745..1d15f57 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h @@ -251,8 +251,20 @@ /* CS_KERNEL_INPUT_BLOCK register set definitions */ /* GLB_VERSION register */ #define GLB_VERSION_PATCH_SHIFT (0) +#define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT) +#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT) +#define GLB_VERSION_PATCH_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK)) #define GLB_VERSION_MINOR_SHIFT (16) +#define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT) +#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT) +#define GLB_VERSION_MINOR_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK)) #define GLB_VERSION_MAJOR_SHIFT (24) +#define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT) +#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT) +#define GLB_VERSION_MAJOR_SET(reg_val, value) \ + (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK)) /* CS_REQ register */ #define CS_REQ_STATE_SHIFT 0 @@ -935,6 +947,7 @@ (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) + /* End of CSG_INPUT_BLOCK register set definitions */ /* CSG_OUTPUT_BLOCK register set definitions */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index ec4870c..3df8a01 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -46,10 +46,14 @@ * trace configurations with CSF trace_command. * 1.6: * - Added new HW performance counters interface to all GPUs. + * 1.7: + * - Added reserved field to QUEUE_GROUP_CREATE ioctl for future use + * 1.8: + * - Removed Kernel legacy HWC interface */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 5 +#define BASE_UK_VERSION_MINOR 8 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -178,6 +182,50 @@ struct kbase_ioctl_cs_queue_terminate { #define KBASE_IOCTL_CS_QUEUE_TERMINATE \ _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate) +/** + * union kbase_ioctl_cs_queue_group_create_1_6 - Create a GPU command queue + * group + * @in: Input parameters + * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @in.compute_mask: Mask of compute endpoints the group is allowed to use. + * @in.cs_min: Minimum number of CSs required. + * @in.priority: Queue group's priority within a process. + * @in.tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @in.fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @in.compute_max: Maximum number of compute endpoints the group is allowed + * to use. + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.group_handle: Handle of a newly created queue group. + * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. + */ +union kbase_ioctl_cs_queue_group_create_1_6 { + struct { + __u64 tiler_mask; + __u64 fragment_mask; + __u64 compute_mask; + __u8 cs_min; + __u8 priority; + __u8 tiler_max; + __u8 fragment_max; + __u8 compute_max; + __u8 padding[3]; + + } in; + struct { + __u8 group_handle; + __u8 padding[3]; + __u32 group_uid; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \ + _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6) + /** * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group * @in: Input parameters @@ -209,7 +257,7 @@ union kbase_ioctl_cs_queue_group_create { __u8 fragment_max; __u8 compute_max; __u8 padding[3]; - + __u64 reserved; } in; struct { __u8 group_handle; @@ -218,8 +266,8 @@ union kbase_ioctl_cs_queue_group_create { } out; }; -#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ - _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create) +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ + _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create) /** * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h index 4001a4c..b1720ed 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -250,6 +250,17 @@ GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE)) +/* Clean and invalidate L2, LSC, and Other caches */ +#define GPU_COMMAND_CACHE_CLN_INV_FULL \ + GPU_COMMAND_CODE_PAYLOAD( \ + GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE)) + +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) + /* Places the GPU in protected mode */ #define GPU_COMMAND_SET_PROTECTED_MODE \ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h index dcadcc7..ecf812c 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -175,6 +175,7 @@ /* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ #define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) #define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8) #define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) #define JS_CONFIG_START_MMU (1u << 10) #define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) @@ -264,6 +265,11 @@ /* GPU_COMMAND cache flush alias to CSF command payload */ #define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES #define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES + +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ + ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) /* IRQ flags */ #define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 2598e20..d957dea 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -121,9 +121,11 @@ * - Added ioctl 55: set_limited_core_count. * 11.32: * - Added new HW performance counters interface to all GPUs. + * 11.33: + * - Removed Kernel legacy HWC interface */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 31 +#define BASE_UK_VERSION_MINOR 33 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h index 15843ee..2cdd29c 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -126,6 +126,7 @@ enum prfcnt_list_type { #define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0) #define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1) +#define PRFCNT_REQUEST_TYPE_SCOPE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 2) #define PRFCNT_SAMPLE_META_TYPE_SAMPLE \ FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0) @@ -150,6 +151,7 @@ struct prfcnt_item_header { * @PRFCNT_BLOCK_TYPE_TILER: Tiler. * @PRFCNT_BLOCK_TYPE_MEMORY: Memory System. * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core. + * @PRFCNT_BLOCK_TYPE_RESERVED: Reserved. */ enum prfcnt_block_type { PRFCNT_BLOCK_TYPE_FE, @@ -160,10 +162,11 @@ enum prfcnt_block_type { }; /** - * enum prfcnt_block_set - Type of performance counter block set. + * enum prfcnt_set - Type of performance counter block set. * @PRFCNT_SET_PRIMARY: Primary. * @PRFCNT_SET_SECONDARY: Secondary. * @PRFCNT_SET_TERTIARY: Tertiary. + * @PRFCNT_SET_RESERVED: Reserved. */ enum prfcnt_set { PRFCNT_SET_PRIMARY, @@ -176,19 +179,19 @@ enum prfcnt_set { * struct prfcnt_enum_block_counter - Performance counter block descriptor. * @block_type: Type of performance counter block. * @set: Which SET this represents: primary, secondary or tertiary. + * @pad: Padding bytes. * @num_instances: How many instances of this block type exist in the hardware. * @num_values: How many entries in the values array there are for samples * from this block. - * @pad: Padding bytes. * @counter_mask: Bitmask that indicates the availability of counters in this * block. */ struct prfcnt_enum_block_counter { __u8 block_type; __u8 set; - __u8 num_instances; - __u8 num_values; - __u8 pad[4]; + __u8 pad[2]; + __u16 num_instances; + __u16 num_values; __u64 counter_mask[2]; }; @@ -206,12 +209,14 @@ struct prfcnt_enum_request { /** * struct prfcnt_enum_item - Performance counter enumeration item. - * @hdr: Header describing the type of item in the list. - * @block_counter: Performance counter block descriptor. - * @request: Request descriptor. + * @hdr: Header describing the type of item in the list. + * @u: Structure containing discriptor for enumeration item type. + * @u.block_counter: Performance counter block descriptor. + * @u.request: Request descriptor. */ struct prfcnt_enum_item { struct prfcnt_item_header hdr; + /** union u - union of block_counter and request */ union { struct prfcnt_enum_block_counter block_counter; struct prfcnt_enum_request request; @@ -222,6 +227,7 @@ struct prfcnt_enum_item { * enum prfcnt_mode - Capture mode for counter sampling. * @PRFCNT_MODE_MANUAL: Manual sampling mode. * @PRFCNT_MODE_PERIODIC: Periodic sampling mode. + * @PRFCNT_MODE_RESERVED: Reserved. */ enum prfcnt_mode { PRFCNT_MODE_MANUAL, @@ -231,16 +237,19 @@ enum prfcnt_mode { /** * struct prfcnt_request_mode - Mode request descriptor. - * @mode: Capture mode for the session, either manual or periodic. - * @pad: Padding bytes. - * @period_us: Period in microseconds, for periodic mode. + * @mode: Capture mode for the session, either manual or periodic. + * @pad: Padding bytes. + * @mode_config: Structure containing configuration for periodic mode. + * @mode_config.period: Periodic config. + * @mode_config.period.period_ns: Period in nanoseconds, for periodic mode. */ struct prfcnt_request_mode { __u8 mode; __u8 pad[7]; + /** union mode_config - request mode configuration*/ union { struct { - __u64 period_us; + __u64 period_ns; } periodic; } mode_config; }; @@ -260,17 +269,41 @@ struct prfcnt_request_enable { __u64 enable_mask[2]; }; +/** + * enum prfcnt_scope - Scope of performance counters. + * @PRFCNT_SCOPE_GLOBAL: Global scope. + * @PRFCNT_SCOPE_RESERVED: Reserved. + */ +enum prfcnt_scope { + PRFCNT_SCOPE_GLOBAL, + PRFCNT_SCOPE_RESERVED = 255, +}; + +/** + * struct prfcnt_request_scope - Scope request descriptor. + * @scope: Scope of the performance counters to capture. + * @pad: Padding bytes. + */ +struct prfcnt_request_scope { + __u8 scope; + __u8 pad[7]; +}; + /** * struct prfcnt_request_item - Performance counter request item. - * @hdr: Header describing the type of item in the list. - * @req_mode: Mode request descriptor. - * @req_enable: Enable request descriptor. + * @hdr: Header describing the type of item in the list. + * @u: Structure containing descriptor for request type. + * @u.req_mode: Mode request descriptor. + * @u.req_enable: Enable request descriptor. + * @u.req_scope: Scope request descriptor. */ struct prfcnt_request_item { struct prfcnt_item_header hdr; + /** union u - union on req_mode and req_enable */ union { struct prfcnt_request_mode req_mode; struct prfcnt_request_enable req_enable; + struct prfcnt_request_scope req_scope; } u; }; @@ -278,12 +311,19 @@ struct prfcnt_request_item { * enum prfcnt_request_type - Type of request descriptor. * @PRFCNT_REQUEST_MODE: Specify the capture mode to be used for the session. * @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture. + * @PRFCNT_REQUEST_SCOPE: Specify the scope of the performance counters. */ enum prfcnt_request_type { PRFCNT_REQUEST_MODE, PRFCNT_REQUEST_ENABLE, + PRFCNT_REQUEST_SCOPE, }; +/* This sample contains overflows from dump duration stretch because the sample buffer was full */ +#define SAMPLE_FLAG_OVERFLOW (1u << 0) +/* This sample has had an error condition for sample duration */ +#define SAMPLE_FLAG_ERROR (1u << 30) + /** * struct prfcnt_sample_metadata - Metadata for counter sample data. * @timestamp_start: Earliest timestamp that values in this sample represent. @@ -292,6 +332,7 @@ enum prfcnt_request_type { * GET_SAMPLE. * @user_data: User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_* * @flags: Property flags. + * @pad: Padding bytes. */ struct prfcnt_sample_metadata { __u64 timestamp_start; @@ -302,18 +343,25 @@ struct prfcnt_sample_metadata { __u32 pad; }; +/* Maximum number of domains a metadata for clock cycles can refer to */ +#define MAX_REPORTED_DOMAINS (4) + /** * struct prfcnt_clock_metadata - Metadata for clock cycles. * @num_domains: Number of domains this metadata refers to. + * @pad: Padding bytes. * @cycles: Number of cycles elapsed in each counter domain between - * timestamp_start and timestamp_end. + * timestamp_start and timestamp_end. Valid only for the + * first @p num_domains. */ struct prfcnt_clock_metadata { __u32 num_domains; __u32 pad; - __u64 *cycles; + __u64 cycles[MAX_REPORTED_DOMAINS]; }; +/* This block state is unknown */ +#define BLOCK_STATE_UNKNOWN (0) /* This block was powered on for at least some portion of the sample */ #define BLOCK_STATE_ON (1 << 0) /* This block was powered off for at least some portion of the sample */ @@ -336,10 +384,12 @@ struct prfcnt_clock_metadata { * @block_type: Type of performance counter block. * @block_idx: Index of performance counter block. * @set: Set of performance counter block. + * @pad_u8: Padding bytes. * @block_state: Bits set indicate the states which the block is known * to have operated in during this sample. * @values_offset: Offset from the start of the mmapped region, to the values * for this block. The values themselves are an array of __u64. + * @pad_u32: Padding bytes. */ struct prfcnt_block_metadata { __u8 block_type; @@ -351,6 +401,14 @@ struct prfcnt_block_metadata { __u32 pad_u32; }; +/** + * struct prfcnt_metadata - Performance counter metadata item. + * @hdr: Header describing the type of item in the list. + * @u: Structure containing descriptor for metadata type. + * @u.sample_md: Counter sample data metadata descriptor. + * @u.clock_md: Clock cycles metadata descriptor. + * @u.block_md: Counter block metadata descriptor. + */ struct prfcnt_metadata { struct prfcnt_item_header hdr; union { @@ -360,5 +418,67 @@ struct prfcnt_metadata { } u; }; +/** + * enum prfcnt_control_cmd_code - Control command code for client session. + * @PRFCNT_CONTROL_CMD_START: Start the counter data dump run for + * the calling client session. + * @PRFCNT_CONTROL_CMD_STOP: Stop the counter data dump run for the + * calling client session. + * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC: Trigger a synchronous manual sample. + * @PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: Trigger an asynchronous manual sample. + * @PRFCNT_CONTROL_CMD_DISCARD: Discard all samples which have not yet + * been consumed by userspace. Note that + * this can race with new samples if + * HWC_CMD_STOP is not called first. + */ +enum prfcnt_control_cmd_code { + PRFCNT_CONTROL_CMD_START = 1, + PRFCNT_CONTROL_CMD_STOP, + PRFCNT_CONTROL_CMD_SAMPLE_SYNC, + PRFCNT_CONTROL_CMD_SAMPLE_ASYNC, + PRFCNT_CONTROL_CMD_DISCARD, +}; + +/** struct prfcnt_control_cmd - Control command + * @cmd: Control command for the session. + * @pad: Padding bytes. + * @user_data: Pointer to user data, which will be returned as part of + * sample metadata. It only affects a single sample if used + * with CMD_SAMPLE_SYNC or CMD_SAMPLE_ASYNC. It affects all + * samples between CMD_START and CMD_STOP if used with the + * periodic sampling. + */ +struct prfcnt_control_cmd { + __u16 cmd; + __u16 pad[3]; + __u64 user_data; +}; + +/** struct prfcnt_sample_access - Metadata to access a sample. + * @sequence: Sequence number for the sample. + * For GET_SAMPLE, it will be set by the kernel. + * For PUT_SAMPLE, it shall be equal to the same value + * provided by the kernel for GET_SAMPLE. + * @sample_offset_bytes: Offset from the start of the mapped area to the first + * entry in the metadata list (sample_metadata) for this + * sample. + */ +struct prfcnt_sample_access { + __u64 sequence; + __u64 sample_offset_bytes; +}; + +/* The ids of ioctl commands, on a reader file descriptor, magic number */ +#define KBASE_KINSTR_PRFCNT_READER 0xBF +/* Ioctl ID for issuing a session operational command */ +#define KBASE_IOCTL_KINSTR_PRFCNT_CMD \ + _IOW(KBASE_KINSTR_PRFCNT_READER, 0x00, struct prfcnt_control_cmd) +/* Ioctl ID for fetching a dumpped sample */ +#define KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE \ + _IOR(KBASE_KINSTR_PRFCNT_READER, 0x01, struct prfcnt_sample_access) +/* Ioctl ID for release internal buffer of the previously fetched sample */ +#define KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE \ + _IOW(KBASE_KINSTR_PRFCNT_READER, 0x10, struct prfcnt_sample_access) + #endif /* _UAPI_KBASE_HWCNT_READER_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h index 8e1ed55..63dd3c8 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -170,34 +170,6 @@ struct kbase_ioctl_hwcnt_reader_setup { #define KBASE_IOCTL_HWCNT_READER_SETUP \ _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) -/** - * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection - * @dump_buffer: GPU address to write counters to - * @fe_bm: counters selection bitmask (Front end) - * @shader_bm: counters selection bitmask (Shader) - * @tiler_bm: counters selection bitmask (Tiler) - * @mmu_l2_bm: counters selection bitmask (MMU_L2) - */ -struct kbase_ioctl_hwcnt_enable { - __u64 dump_buffer; - __u32 fe_bm; - __u32 shader_bm; - __u32 tiler_bm; - __u32 mmu_l2_bm; -}; - -/* This IOCTL is deprecated as of R33, and will be removed in R35. */ -#define KBASE_IOCTL_HWCNT_ENABLE \ - _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) - -/* This IOCTL is deprecated as of R33, and will be removed in R35. */ -#define KBASE_IOCTL_HWCNT_DUMP \ - _IO(KBASE_IOCTL_TYPE, 10) - -/* This IOCTL is deprecated as of R33, and will be removed in R35. */ -#define KBASE_IOCTL_HWCNT_CLEAR \ - _IO(KBASE_IOCTL_TYPE, 11) - /** * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. * @data: Counter samples for the dummy model. -- cgit v1.2.3