From c30533582604fe0365bc3ce4e9e8e19dec3109da Mon Sep 17 00:00:00 2001 From: Jack Diver Date: Fri, 2 Sep 2022 11:38:04 +0000 Subject: Mali Valhall Android DDK r38p1-01eac0 VX504X08X-BU-00000-r38p1-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r38p1-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r38p1-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r38p1-01eac0 - Valhall Android Renderscript AOSP parts Signed-off-by: Jack Diver Change-Id: I242060ad8ddc14475bda657cbbbe6b6c26ecfd57 --- common/include/linux/memory_group_manager.h | 23 ++ common/include/linux/version_compat_defs.h | 31 +++ .../midgard/backend/gpu/mali_kbase_model_dummy.h | 11 +- .../gpu/arm/midgard/csf/mali_base_csf_kernel.h | 240 +++++-------------- .../gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h | 80 ++++++- .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 30 +++ .../gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h | 6 +- .../uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h | 263 +++++---------------- .../uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 4 +- .../uapi/gpu/arm/midgard/mali_base_common_kernel.h | 231 ++++++++++++++++++ .../uapi/gpu/arm/midgard/mali_base_kernel.h | 70 +----- .../uapi/gpu/arm/midgard/mali_base_mem_priv.h | 5 +- .../uapi/gpu/arm/midgard/mali_kbase_ioctl.h | 5 +- common/include/uapi/gpu/arm/midgard/mali_uk.h | 70 ------ 14 files changed, 530 insertions(+), 539 deletions(-) create mode 100644 common/include/linux/version_compat_defs.h create mode 100644 common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h delete mode 100644 common/include/uapi/gpu/arm/midgard/mali_uk.h (limited to 'common/include') diff --git a/common/include/linux/memory_group_manager.h b/common/include/linux/memory_group_manager.h index efa35f5..b0609df 100644 --- a/common/include/linux/memory_group_manager.h +++ b/common/include/linux/memory_group_manager.h @@ -43,6 +43,8 @@ struct memory_group_manager_import_data; * @mgm_free_page: Callback to free physical memory in a group * @mgm_get_import_memory_id: Callback to get the group ID for imported memory * @mgm_update_gpu_pte: Callback to modify a GPU page table entry + * @mgm_pte_to_original_pte: Callback to get the original PTE entry as given + * to mgm_update_gpu_pte * @mgm_vmf_insert_pfn_prot: Callback to map a physical memory page for the CPU */ struct memory_group_manager_ops { @@ -127,6 +129,27 @@ struct memory_group_manager_ops { u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev, int group_id, int mmu_level, u64 pte); + /* + * mgm_pte_to_original_pte - Undo any modification done during mgm_update_gpu_pte() + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @group_id: A physical memory group ID. The meaning of this is + * defined by the systems integrator. Its valid range is + * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @mmu_level: The level of the page table entry in @ate. + * @pte: The page table entry to restore the original representation for, + * in LPAE or AArch64 format (depending on the driver's configuration). + * + * Undo any modifications done during mgm_update_gpu_pte(). + * This function allows getting back the original PTE entry as given + * to mgm_update_gpu_pte(). + * + * Return: PTE entry as originally specified to mgm_update_gpu_pte() + */ + u64 (*mgm_pte_to_original_pte)(struct memory_group_manager_device *mgm_dev, int group_id, + int mmu_level, u64 pte); + /* * mgm_vmf_insert_pfn_prot - Map a physical page in a group for the CPU * diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h new file mode 100644 index 0000000..a8e0874 --- /dev/null +++ b/common/include/linux/version_compat_defs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _VERSION_COMPAT_DEFS_H_ +#define _VERSION_COMPAT_DEFS_H_ + +#include + +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +typedef unsigned int __poll_t; +#endif + +#endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h index 9d677ca..613eb1f 100644 --- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,11 +43,18 @@ (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) #define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 #define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 -#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ +#define KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS 0 +#define KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS \ (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ + (KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS) #define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * \ KBASE_DUMMY_MODEL_COUNTER_PER_CORE) +#define KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_VALUES_PER_BLOCK) +#define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE) #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull) #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull) diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index 7f7b9dd..3b02350 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -23,99 +23,16 @@ #define _UAPI_BASE_CSF_KERNEL_H_ #include +#include "../mali_base_common_kernel.h" -/* Memory allocation, access/hint flags. +/* Memory allocation, access/hint flags & mask specific to CSF GPU. * * See base_mem_alloc_flags. */ -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - /* Must be FIXED memory. */ #define BASE_MEM_FIXED ((base_mem_alloc_flags)1 << 8) -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* IN/OUT */ -/* Should be cached on the CPU, returned if actually cached - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the alloc - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - /* CSF event memory * * If Outer shareable coherence is not specified or not available, then on @@ -131,46 +48,15 @@ #define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu - * mode. Some components within the GPU might only be able to access memory - * that is GPU cacheable. Refer to the specific GPU implementation for more - * details. The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - -/* OUT */ -/* Kernel side cache sync ops required */ -#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) /* Must be FIXABLE memory: its GPU VA will be determined at a later point, * at which time it will be at a fixed GPU VA. */ #define BASE_MEM_FIXABLE ((base_mem_alloc_flags)1 << 29) -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags +/* Note that the number of bits used for base_mem_alloc_flags + * must be less than BASE_MEM_FLAGS_NR_BITS !!! */ -#define BASE_MEM_FLAGS_NR_BITS 30 /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. @@ -178,62 +64,23 @@ #define BASEP_MEM_FLAGS_KERNEL_ONLY \ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - /* A mask of all currently reserved flags */ #define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20 -#define BASEP_MEM_INVALID_HANDLE (0ul) -#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) -/* reserved handles ..-47< for future special handles */ +/* Special base mem handles specific to CSF. + */ #define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT) #define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_FIRST_FREE_ADDRESS \ - ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) #define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ LOCAL_PAGE_SHIFT) -/** - * Valid set of just-in-time memory allocation flags - */ +/* Valid set of just-in-time memory allocation flags */ #define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) -/* Flags to pass to ::base_context_init. - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef __u32 base_context_create_flags; - -/* No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/* Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/* Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) +/* flags for base context specific to CSF */ /* Base context creates a CSF event notification thread. * @@ -242,22 +89,6 @@ typedef __u32 base_context_create_flags; */ #define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - /* Bitpattern describing the ::base_context_create_flags that can be * passed to base_context_init() */ @@ -266,15 +97,7 @@ typedef __u32 base_context_create_flags; BASE_CONTEXT_CSF_EVENT_THREAD | \ BASEP_CONTEXT_CREATE_KERNEL_FLAGS) -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) - */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. - */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) +/* Flags for base tracepoint specific to CSF */ /* Enable KBase tracepoints for CSF builds */ #define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2) @@ -306,6 +129,10 @@ typedef __u32 base_context_create_flags; */ #define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) +/* CSF CSI EXCEPTION_HANDLER_FLAGS */ +#define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0) +#define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG) + /** * enum base_kcpu_command_type - Kernel CPU queue command type. * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, @@ -725,4 +552,45 @@ struct base_csf_notification { } payload; }; +/** + * struct mali_base_gpu_core_props - GPU core props info + * + * @product_id: Pro specific value. + * @version_status: Status of the GPU release. No defined values, but starts at + * 0 and increases by one for each release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" + * release number. + * 8 bit values (0-255). + * @major_revision: Major release number of the GPU. "R" part of an "RnPn" + * release number. + * 4 bit values (0-15). + * @padding: padding to align to 8-byte + * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + * @log2_program_counter_size: Size of the shader program counter, in bits. + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This + * is a bitpattern where a set bit indicates that the format is supported. + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. + * It is unlikely that a client will be able to allocate all of this memory + * for their own purposes, but this at least provides an upper bound on the + * memory available to the GPU. + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + */ +struct mali_base_gpu_core_props { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; +}; + #endif /* _UAPI_BASE_CSF_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index 1794ddc..cbb7310 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -56,10 +56,18 @@ * - Added new Base memory allocation interface * 1.10: * - First release of new HW performance counters interface. + * 1.11: + * - Dummy model (no mali) backend will now clear HWC values after each sample + * 1.12: + * - Added support for incremental rendering flag in CSG create call + * 1.13: + * - Added ioctl to query a register of USER page. + * 1.14: + * - Added support for passing down the buffer descriptor VA in tiler heap init */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 10 +#define BASE_UK_VERSION_MINOR 14 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -245,6 +253,9 @@ union kbase_ioctl_cs_queue_group_create_1_6 { * allowed to use. * @in.compute_max: Maximum number of compute endpoints the group is allowed * to use. + * @in.csi_handlers: Flags to signal that the application intends to use CSI + * exception handlers in some linear buffers to deal with + * the given exception types. * @in.padding: Currently unused, must be zero * @out: Output parameters * @out.group_handle: Handle of a newly created queue group. @@ -261,9 +272,10 @@ union kbase_ioctl_cs_queue_group_create { __u8 tiler_max; __u8 fragment_max; __u8 compute_max; - __u8 padding[3]; + __u8 csi_handlers; + __u8 padding[2]; /** - * @reserved: Reserved + * @in.reserved: Reserved */ __u64 reserved; } in; @@ -353,6 +365,7 @@ struct kbase_ioctl_kcpu_queue_enqueue { * allowed. * @in.group_id: Group ID to be used for physical allocations. * @in.padding: Padding + * @in.buf_desc_va: Buffer descriptor GPU VA for tiler heap reclaims. * @out: Output parameters * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up * for the heap. @@ -368,6 +381,7 @@ union kbase_ioctl_cs_tiler_heap_init { __u16 target_in_flight; __u8 group_id; __u8 padding; + __u64 buf_desc_va; } in; struct { __u64 gpu_heap_va; @@ -378,6 +392,43 @@ union kbase_ioctl_cs_tiler_heap_init { #define KBASE_IOCTL_CS_TILER_HEAP_INIT \ _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) +/** + * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap, + * earlier version upto 1.13 + * @in: Input parameters + * @in.chunk_size: Size of each chunk. + * @in.initial_chunks: Initial number of chunks that heap will be created with. + * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. + * @in.target_in_flight: Number of render-passes that the driver should attempt to + * keep in flight for which allocation of new chunks is + * allowed. + * @in.group_id: Group ID to be used for physical allocations. + * @in.padding: Padding + * @out: Output parameters + * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up + * for the heap. + * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, + * actually points to the header of heap chunk and not to + * the low address of free memory in the chunk. + */ +union kbase_ioctl_cs_tiler_heap_init_1_13 { + struct { + __u32 chunk_size; + __u32 initial_chunks; + __u32 max_chunks; + __u16 target_in_flight; + __u8 group_id; + __u8 padding; + } in; + struct { + __u64 gpu_heap_va; + __u64 first_chunk_va; + } out; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \ + _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13) + /** * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap * instance @@ -479,6 +530,29 @@ union kbase_ioctl_mem_alloc_ex { #define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex) +/** + * union kbase_ioctl_read_user_page - Read a register of USER page + * + * @in: Input parameters. + * @in.offset: Register offset in USER page. + * @in.padding: Padding to round up to a multiple of 8 bytes, must be zero. + * @out: Output parameters. + * @out.val_lo: Value of 32bit register or the 1st half of 64bit register to be read. + * @out.val_hi: Value of the 2nd half of 64bit register to be read. + */ +union kbase_ioctl_read_user_page { + struct { + __u32 offset; + __u32 padding; + } in; + struct { + __u32 val_lo; + __u32 val_hi; + } out; +}; + +#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page) + /*************** * test ioctls * ***************/ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h new file mode 100644 index 0000000..75ae6a1 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ +#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ + +/* IPA control registers */ +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) +#define STATUS 0x004 /* (RO) Status register */ + +#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h index deca665..ebe3b3e 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,9 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_H_ #define _UAPI_KBASE_GPU_REGMAP_H_ -#if !MALI_USE_CSF +#if MALI_USE_CSF +#include "backend/mali_kbase_gpu_regmap_csf.h" +#else #include "backend/mali_kbase_gpu_regmap_jm.h" #endif /* !MALI_USE_CSF */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h index 94f4dc7..ae43908 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -23,100 +23,16 @@ #define _UAPI_BASE_JM_KERNEL_H_ #include +#include "../mali_base_common_kernel.h" -/* Memory allocation, access/hint flags. +/* Memory allocation, access/hint flags & mask specific to JM GPU. * * See base_mem_alloc_flags. */ -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - -/* Used as BASE_MEM_FIXED in other backends - */ +/* Used as BASE_MEM_FIXED in other backends */ #define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* IN/OUT */ -/* Should be cached on the CPU, returned if actually cached - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the allocation - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - /** * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved. * @@ -131,47 +47,15 @@ */ #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu - * mode. Some components within the GPU might only be able to access memory - * that is GPU cacheable. Refer to the specific GPU implementation for more - * details. The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/* Must do CPU cache maintenance when imported memory is mapped/unmapped - * on GPU. Currently applicable to dma-buf type only. - */ -#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) - /* Use the GPU VA chosen by the kernel client */ #define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) -/* OUT */ -/* Kernel side cache sync ops required */ -#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) - /* Force trimming of JIT allocations when creating a new allocation */ #define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags +/* Note that the number of bits used for base_mem_alloc_flags + * must be less than BASE_MEM_FLAGS_NR_BITS !!! */ -#define BASE_MEM_FLAGS_NR_BITS 30 /* A mask of all the flags which are only valid for allocations within kbase, * and may not be passed from user space. @@ -180,29 +64,11 @@ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - /* A mask of all currently reserved flags */ #define BASE_MEM_FLAGS_RESERVED \ (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) -#define BASEP_MEM_INVALID_HANDLE (0ul) -#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) -/* reserved handles ..-47< for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) -#define BASE_MEM_FIRST_FREE_ADDRESS \ - ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power @@ -227,47 +93,6 @@ #define BASE_JIT_ALLOC_VALID_FLAGS \ (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) -/** - * typedef base_context_create_flags - Flags to pass to ::base_context_init. - * - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef __u32 base_context_create_flags; - -/* No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/* Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/* Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - /* Bitpattern describing the ::base_context_create_flags that can be * passed to base_context_init() */ @@ -287,16 +112,7 @@ typedef __u32 base_context_create_flags; #define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ ((base_context_create_flags)(1 << 31)) -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) - */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. - */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - +/* Flags for base tracepoint specific to JM */ #define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ BASE_TLSTREAM_JOB_DUMPING_ENABLED) /* @@ -509,9 +325,6 @@ typedef __u32 base_jd_core_req; * takes priority * * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. - * - * If the core availability policy is keeping the required core group turned - * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. */ #define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) @@ -770,6 +583,9 @@ typedef __u8 base_jd_prio; */ #define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) +/* Invalid atom priority (max uint8_t value) */ +#define BASE_JD_PRIO_INVALID ((base_jd_prio)255) + /* Count of the number of priority levels. This itself is not a valid * base_jd_prio setting */ @@ -1016,11 +832,6 @@ enum { * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the * platform doesn't support the feature specified in * the atom. - * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used - * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used - * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used - * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used - * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate * to userspace that the KBase context has been * destroyed and Base should stop listening for @@ -1115,17 +926,10 @@ enum base_jd_event_code { /* SW defined exceptions */ BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_TIMED_OUT = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, - BASE_JD_EVENT_PM_EVENT = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, - - BASE_JD_EVENT_BAG_INVALID = - BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, @@ -1133,10 +937,6 @@ enum base_jd_event_code { BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, - BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | - BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | - BASE_JD_SW_EVENT_BAG | 0x000, BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, @@ -1203,4 +1003,49 @@ struct base_dump_cpu_gpu_counters { __u8 padding[36]; }; +/** + * struct mali_base_gpu_core_props - GPU core props info + * + * @product_id: Pro specific value. + * @version_status: Status of the GPU release. No defined values, but starts at + * 0 and increases by one for each release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" + * release number. + * 8 bit values (0-255). + * @major_revision: Major release number of the GPU. "R" part of an "RnPn" + * release number. + * 4 bit values (0-15). + * @padding: padding to align to 8-byte + * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + * @log2_program_counter_size: Size of the shader program counter, in bits. + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This + * is a bitpattern where a set bit indicates that the format is supported. + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. + * It is unlikely that a client will be able to allocate all of this memory + * for their own purposes, but this at least provides an upper bound on the + * memory available to the GPU. + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + * @num_exec_engines: The number of execution engines. Only valid for tGOX + * (Bifrost) GPUs, where GPU_HAS_REG_CORE_FEATURES is defined. Otherwise, + * this is always 0. + */ +struct mali_base_gpu_core_props { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; + __u8 num_exec_engines; +}; + #endif /* _UAPI_BASE_JM_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 215f12d..20d931a 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -125,9 +125,11 @@ * - Removed Kernel legacy HWC interface * 11.34: * - First release of new HW performance counters interface. + * 11.35: + * - Dummy model (no mali) backend will now clear HWC values after each sample */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 34 +#define BASE_UK_VERSION_MINOR 35 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h new file mode 100644 index 0000000..f837814 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_base_common_kernel.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_COMMON_KERNEL_H_ +#define _UAPI_BASE_COMMON_KERNEL_H_ + +#include + +struct base_mem_handle { + struct { + __u64 handle; + } basep; +}; + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +/* Memory allocation, access/hint flags & mask. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* IN/OUT */ +/* Should be cached on the CPU, returned if actually cached + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* OUT */ +/* Kernel side cache sync ops required */ +#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 30 + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* Special base mem handles. + */ +#define BASEP_MEM_INVALID_HANDLE (0ul) +#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) +/* reserved handles ..-47< for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) + +/* Flags to pass to ::base_context_init. + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef __u32 base_context_create_flags; + +/* Flags for base context */ + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Flags for base tracepoint + */ + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#endif /* _UAPI_BASE_COMMON_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h index f3ffb36..6adbd81 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -27,19 +27,10 @@ #define _UAPI_BASE_KERNEL_H_ #include - -struct base_mem_handle { - struct { - __u64 handle; - } basep; -}; - #include "mali_base_mem_priv.h" #include "gpu/mali_kbase_gpu_id.h" #include "gpu/mali_kbase_gpu_coherency.h" -#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 - #define BASE_MAX_COHERENT_GROUPS 16 #if defined(PAGE_MASK) && defined(PAGE_SHIFT) @@ -458,49 +449,6 @@ struct base_jd_debug_copy_buffer { * 16 coherent groups, since core groups are typically 4 cores. */ -/** - * struct mali_base_gpu_core_props - GPU core props info - * - * @product_id: Pro specific value. - * @version_status: Status of the GPU release. No defined values, but starts at - * 0 and increases by one for each release status (alpha, beta, EAC, etc.). - * 4 bit values (0-15). - * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" - * release number. - * 8 bit values (0-255). - * @major_revision: Major release number of the GPU. "R" part of an "RnPn" - * release number. - * 4 bit values (0-15). - * @padding: padding to allign to 8-byte - * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by - * clGetDeviceInfo() - * @log2_program_counter_size: Size of the shader program counter, in bits. - * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This - * is a bitpattern where a set bit indicates that the format is supported. - * Before using a texture format, it is recommended that the corresponding - * bit be checked. - * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. - * It is unlikely that a client will be able to allocate all of this memory - * for their own purposes, but this at least provides an upper bound on the - * memory available to the GPU. - * This is required for OpenCL's clGetDeviceInfo() call when - * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The - * client will not be expecting to allocate anywhere near this value. - * @num_exec_engines: The number of execution engines. - */ -struct mali_base_gpu_core_props { - __u32 product_id; - __u16 version_status; - __u16 minor_revision; - __u16 major_revision; - __u16 padding; - __u32 gpu_freq_khz_max; - __u32 log2_program_counter_size; - __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - __u64 gpu_available_memory_size; - __u8 num_exec_engines; -}; - /* * More information is possible - but associativity and bus width are not * required by upper-level apis. @@ -531,7 +479,7 @@ struct mali_base_gpu_tiler_props { * field. * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, * 3 = SW Model/Emulation - * @padding: padding to allign to 8-byte + * @padding: padding to align to 8-byte * @tls_alloc: Number of threads per core that TLS must be * allocated for */ @@ -551,7 +499,7 @@ struct mali_base_gpu_thread_props { * struct mali_base_gpu_coherent_group - descriptor for a coherent group * @core_mask: Core restriction mask required for the group * @num_cores: Number of cores in the group - * @padding: padding to allign to 8-byte + * @padding: padding to align to 8-byte * * \c core_mask exposes all cores in that coherent group, and \c num_cores * provides a cached population-count for that mask. @@ -581,7 +529,7 @@ struct mali_base_gpu_coherent_group { * are in the group[] member. Use num_groups instead. * @coherency: Coherency features of the memory, accessed by gpu_mem_features * methods - * @padding: padding to allign to 8-byte + * @padding: padding to align to 8-byte * @group: Descriptors of coherent groups * * Note that the sizes of the members could be reduced. However, the \c group @@ -599,6 +547,12 @@ struct mali_base_gpu_coherent_group_info { struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; }; +#if MALI_USE_CSF +#include "csf/mali_base_csf_kernel.h" +#else +#include "jm/mali_base_jm_kernel.h" +#endif + /** * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware * Configuration Discovery registers. @@ -696,12 +650,6 @@ struct base_gpu_props { struct mali_base_gpu_coherent_group_info coherency_info; }; -#if MALI_USE_CSF -#include "csf/mali_base_csf_kernel.h" -#else -#include "jm/mali_base_jm_kernel.h" -#endif - #define BASE_MEM_GROUP_ID_GET(flags) \ ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT) diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h index 304a334..70f5b09 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,7 @@ #define _UAPI_BASE_MEM_PRIV_H_ #include - -#include "mali_base_kernel.h" +#include "mali_base_common_kernel.h" #define BASE_SYNCSET_OP_MSYNC (1U << 0) #define BASE_SYNCSET_OP_CSYNC (1U << 1) diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h index 2b1c093..e691aea 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -563,7 +563,8 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { _IO(KBASE_IOCTL_TYPE, 34) /** - * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. + * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault + * addresses. * @in: Input parameters * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas. * @in.size_buffer: Address of buffer to hold size of modified areas (in pages) @@ -683,7 +684,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info { _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info) /** - * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * struct kbase_ioctl_kinstr_prfcnt_setup - Setup HWC dumper/reader * @in: input parameters. * @in.request_item_count: Number of requests in the requests array. * @in.request_item_size: Size in bytes of each request in the requests array. diff --git a/common/include/uapi/gpu/arm/midgard/mali_uk.h b/common/include/uapi/gpu/arm/midgard/mali_uk.h deleted file mode 100644 index 78946f6..0000000 --- a/common/include/uapi/gpu/arm/midgard/mali_uk.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/** - * DOC: Types and definitions that are common across OSs for both the user - * and kernel side of the User-Kernel interface. - */ - -#ifndef _UAPI_UK_H_ -#define _UAPI_UK_H_ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * DOC: uk_api User-Kernel Interface API - * - * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device - * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. - * - * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent - * kernel-side API (UKK) via an OS-specific communication mechanism. - * - * This API is internal to the Midgard DDK and is not exposed to any applications. - * - */ - -/** - * enum uk_client_id - These are identifiers for kernel-side drivers - * implementing a UK interface, aka UKK clients. - * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client. - * @UK_CLIENT_COUNT: The number of uk clients supported. This must be - * the last member of the enum - * - * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this - * identifier to select a UKK client to the uku_open() function. - * - * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id - * enumeration and the uku_open() implemenation for the various OS ports need to be updated to - * provide a mapping of the identifier to the OS specific device name. - * - */ -enum uk_client_id { - UK_CLIENT_MALI_T600_BASE, - UK_CLIENT_COUNT -}; - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* _UAPI_UK_H_ */ -- cgit v1.2.3