From 0c596dc70431fa2c70021fa1685e3efc969a852d Mon Sep 17 00:00:00 2001 From: Jesse Hall Date: Tue, 23 Nov 2021 14:38:46 -0800 Subject: Mali Valhall Android DDK r34p0-00dev1 Provenance: 046d23c969 (collaborate/google/android/v_r34p0-00dev1) VX504X08X-BU-00000-r34p0-00dev1 - Valhall Android DDK VX504X08X-SW-99006-r34p0-00dev1 - Valhall Android Renderscript AOSP parts Documentation from VX504X08X-BU-00000 omitted. Signed-off-by: Jesse Hall Change-Id: I4ebbb3a3af709bd39f883eed3b35bf4657a95797 --- .../gpu/arm/midgard/csf/mali_base_csf_kernel.h | 65 +----- .../midgard/csf/mali_gpu_csf_control_registers.h | 32 --- .../gpu/arm/midgard/csf/mali_gpu_csf_registers.h | 50 ++-- .../gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h | 2 + .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 52 +++-- .../midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h | 4 + .../uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h | 16 ++ .../gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h | 163 ++++++++++++- .../uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h | 16 +- .../uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 2 + .../uapi/gpu/arm/midgard/mali_base_kernel.h | 104 ++------- .../uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 259 +++++++++++++++++++++ .../uapi/gpu/arm/midgard/mali_kbase_ioctl.h | 52 +++++ 13 files changed, 602 insertions(+), 215 deletions(-) delete mode 100644 common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h (limited to 'common') diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index 78c328c..f5f859e 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -186,17 +186,17 @@ #define BASE_MEM_FLAGS_RESERVED \ BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20 -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +#define BASEP_MEM_INVALID_HANDLE (0ul) +#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) /* reserved handles ..-47< for future special handles */ -#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12) -#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12) -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) +#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << LOCAL_PAGE_SHIFT) +#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_FIRST_FREE_ADDRESS \ + ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) #define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ @@ -301,7 +301,6 @@ typedef __u32 base_context_create_flags; */ #define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) -#if MALI_UNIT_TEST /** * enum base_kcpu_command_type - Kernel CPU queue command type. * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, @@ -331,42 +330,8 @@ enum base_kcpu_command_type { BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, BASE_KCPU_COMMAND_TYPE_JIT_FREE, BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, - BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, - BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME, + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER }; -#else -/** - * enum base_kcpu_command_type - Kernel CPU queue command type. - * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, - * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, - * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, - * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, - * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, - * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, - * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, - * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, - * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, - * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, - * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, - * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, - * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, - */ -enum base_kcpu_command_type { - BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, - BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, - BASE_KCPU_COMMAND_TYPE_CQS_WAIT, - BASE_KCPU_COMMAND_TYPE_CQS_SET, - BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, - BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, - BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, - BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, - BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, - BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, - BASE_KCPU_COMMAND_TYPE_JIT_FREE, - BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, - BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, -}; -#endif /* MALI_UNIT_TEST */ /** * enum base_queue_group_priority - Priority of a GPU Command Queue Group. @@ -568,11 +533,6 @@ struct base_kcpu_command_group_suspend_info { __u8 padding[3]; }; -#if MALI_UNIT_TEST -struct base_kcpu_command_sample_time_info { - __u64 time; -}; -#endif /* MALI_UNIT_TEST */ /** * struct base_kcpu_command - kcpu command. @@ -603,9 +563,6 @@ struct base_kcpu_command { struct base_kcpu_command_jit_alloc_info jit_alloc; struct base_kcpu_command_jit_free_info jit_free; struct base_kcpu_command_group_suspend_info suspend_buf_copy; -#if MALI_UNIT_TEST - struct base_kcpu_command_sample_time_info sample_time; -#endif /* MALI_UNIT_TEST */ __u64 padding[2]; /* No sub-struct should be larger */ } info; }; diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h deleted file mode 100644 index b62a8b0..0000000 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ -#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ - -/* GPU_REGISTERS register offsets */ -#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */ - -#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h index 06cc4c2..a5dc745 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h @@ -20,7 +20,8 @@ */ /* - * This header was autogenerated, it should not be edited. + * This header was originally autogenerated, but it is now ok (and + * expected) to have to add to it. */ #ifndef _UAPI_GPU_CSF_REGISTERS_H_ @@ -212,7 +213,6 @@ #define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ #define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ #define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ -#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */ #define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ #define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ @@ -653,7 +653,9 @@ (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) /* CS_FAULT_EXCEPTION_TYPE values */ +#define CS_FAULT_EXCEPTION_TYPE_KABOOM 0x05 #define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F +#define CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 #define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B #define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 #define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 @@ -1164,6 +1166,13 @@ (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) +#define GLB_REQ_SLEEP_SHIFT 12 +#define GLB_REQ_SLEEP_MASK (0x1 << GLB_REQ_SLEEP_SHIFT) +#define GLB_REQ_SLEEP_GET(reg_val) \ + (((reg_val) & GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT) +#define GLB_REQ_SLEEP_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_SLEEP_MASK) | \ + (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK)) #define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 #define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) #define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ @@ -1391,19 +1400,6 @@ #define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) -/* GLB_PROTM_COHERENCY register */ -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0 -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \ - (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \ - (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \ - GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) -#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \ - (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \ - (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \ - GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK)) -/* End of GLB_INPUT_BLOCK register set definitions */ - /* GLB_OUTPUT_BLOCK register set definitions */ /* GLB_ACK register */ @@ -1485,4 +1481,28 @@ (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) +/* GLB_FEATURES_ITER_TRACE_SUPPORTED register */ +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT GPU_U(4) +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK \ + (GPU_U(0x1) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val) \ + (((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >> \ + GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) +#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value) \ + (((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) | \ + (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) & \ + GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK)) + +/* GLB_REQ_ITER_TRACE_ENABLE register */ +#define GLB_REQ_ITER_TRACE_ENABLE_SHIFT GPU_U(11) +#define GLB_REQ_ITER_TRACE_ENABLE_MASK \ + (GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) +#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >> \ + GLB_REQ_ITER_TRACE_ENABLE_SHIFT) +#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) | \ + (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \ + GLB_REQ_ITER_TRACE_ENABLE_MASK)) + #endif /* _UAPI_GPU_CSF_REGISTERS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index d2d7ce2..ec4870c 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -44,6 +44,8 @@ * - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new * queue registration call with extended format for supporting CS * trace configurations with CSF trace_command. + * 1.6: + * - Added new HW performance counters interface to all GPUs. */ #define BASE_UK_VERSION_MAJOR 1 diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h index 2041739..4001a4c 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -28,8 +28,13 @@ #error "Cannot be compiled with JM" #endif -/* IPA control registers */ +/* GPU_CONTROL_MCU base address */ +#define GPU_CONTROL_MCU_BASE 0x3000 + +/* MCU_SUBSYSTEM base address */ +#define MCU_SUBSYSTEM_BASE 0x20000 +/* IPA control registers */ #define IPA_CONTROL_BASE 0x40000 #define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) #define COMMAND 0x000 /* (WO) Command register */ @@ -63,8 +68,6 @@ #define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ #define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ -#include "../../csf/mali_gpu_csf_control_registers.h" - /* Set to implementation defined, outer caching */ #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull /* Set to write back memory, outer caching */ @@ -117,6 +120,9 @@ #define MCU_CNTRL_AUTO (1 << 1) #define MCU_CNTRL_DISABLE (0) +#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31) +#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT) + #define MCU_STATUS_HALTED (1 << 1) #define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory @@ -181,11 +187,19 @@ #define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ #define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ -/* GPU_COMMAND_FLUSH_CACHES payloads */ -#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */ -#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */ -#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */ +/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_L2_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ +#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ + +/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ +#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ + +/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE 0x000 /* No flush */ +#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ /* GPU_COMMAND command + payload */ #define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ @@ -220,13 +234,21 @@ #define GPU_COMMAND_CYCLE_COUNT_STOP \ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) -/* Clean all caches */ -#define GPU_COMMAND_CLEAN_CACHES \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN) - -/* Clean and invalidate all caches */ -#define GPU_COMMAND_CLEAN_INV_CACHES \ - GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE) +/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */ +#define GPU_COMMAND_CACHE_CLN_INV_L2 \ + GPU_COMMAND_CODE_PAYLOAD( \ + GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE | \ + GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE)) + +/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */ +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ + GPU_COMMAND_CODE_PAYLOAD( \ + GPU_COMMAND_CODE_FLUSH_CACHES, \ + (GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \ + GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE)) /* Places the GPU in protected mode */ #define GPU_COMMAND_SET_PROTECTED_MODE \ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h index 1be3541..dcadcc7 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -261,6 +261,10 @@ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ #define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ +/* GPU_COMMAND cache flush alias to CSF command payload */ +#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES + /* IRQ flags */ #define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ #define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h index d093ce4..666b0af 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h @@ -53,6 +53,20 @@ GPU_ID2_VERSION_MINOR | \ GPU_ID2_VERSION_STATUS) +/* Helper macro to construct a value consisting of arch major and revision + * using the value of gpu_id. + */ +#define ARCH_MAJOR_REV_REG(gpu_id) \ + ((((__u32)gpu_id) & GPU_ID2_ARCH_MAJOR) | \ + (((__u32)gpu_id) & GPU_ID2_ARCH_REV)) + +/* Helper macro to create a partial GPU_ID (new format) that defines + * a arch major and revision. + */ +#define GPU_ID2_ARCH_MAJOR_REV_MAKE(arch_major, arch_rev) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)) + /* Helper macro to create a partial GPU_ID (new format) that defines * a product ignoring its version. */ @@ -109,6 +123,8 @@ #define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) #define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) +#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) +#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) /* Helper macro to create a GPU_ID assuming valid values for id, major, * minor, status diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h index 84fad8d..e223220 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -30,6 +30,13 @@ #include "backend/mali_kbase_gpu_regmap_jm.h" #endif +/* GPU_U definition */ +#ifdef __ASSEMBLER__ +#define GPU_U(x) x +#else +#define GPU_U(x) x##u +#endif /* __ASSEMBLER__ */ + /* Begin Register Offsets */ /* GPU control registers */ @@ -149,6 +156,10 @@ #define ASN_HASH(n) (ASN_HASH_0 + (n)*4) #define ASN_HASH_COUNT 3 +#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */ +#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4) +#define SYSC_ALLOC_COUNT 8 + #define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ #define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ @@ -164,6 +175,7 @@ #define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ #define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ #define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ #define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ @@ -327,10 +339,6 @@ #define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ #define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ #define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -/* Flush all L2 caches then issue a flush region command to all MMUs - * (deprecated - only for use with T60x) - */ -#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ #define AS_COMMAND_FLUSH_PT 0x04 /* Wait for memory accesses to complete, flush all the L1s cache then flush all @@ -338,6 +346,28 @@ */ #define AS_COMMAND_FLUSH_MEM 0x05 +/* AS_LOCKADDR register */ +#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) +#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \ + (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \ + AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \ + (((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \ + (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \ + AS_LOCKADDR_LOCKADDR_SIZE_MASK)) +#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) +#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ + (GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ + AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \ + (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ + (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ + AS_LOCKADDR_LOCKADDR_BASE_MASK)) + /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ #define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ @@ -427,8 +457,133 @@ #define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) /* End L2_CONFIG register */ + /* IDVS_GROUP register */ #define IDVS_GROUP_SIZE_SHIFT (16) #define IDVS_GROUP_MAX_SIZE (0x3F) +/* SYSC_ALLOC read IDs */ +#define SYSC_ALLOC_ID_R_OTHER 0x00 +#define SYSC_ALLOC_ID_R_CSF 0x02 +#define SYSC_ALLOC_ID_R_MMU 0x04 +#define SYSC_ALLOC_ID_R_TILER_VERT 0x08 +#define SYSC_ALLOC_ID_R_TILER_PTR 0x09 +#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A +#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B +#define SYSC_ALLOC_ID_R_IC 0x10 +#define SYSC_ALLOC_ID_R_ATTR 0x11 +#define SYSC_ALLOC_ID_R_SCM 0x12 +#define SYSC_ALLOC_ID_R_FSDC 0x13 +#define SYSC_ALLOC_ID_R_VL 0x14 +#define SYSC_ALLOC_ID_R_PLR 0x15 +#define SYSC_ALLOC_ID_R_TEX 0x18 +#define SYSC_ALLOC_ID_R_LSC 0x1c + +/* SYSC_ALLOC write IDs */ +#define SYSC_ALLOC_ID_W_OTHER 0x00 +#define SYSC_ALLOC_ID_W_CSF 0x02 +#define SYSC_ALLOC_ID_W_PCB 0x07 +#define SYSC_ALLOC_ID_W_TILER_PTR 0x09 +#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A +#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B +#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C +#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D +#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10 +#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11 +#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12 +#define SYSC_ALLOC_ID_W_TIB_CRC 0x13 +#define SYSC_ALLOC_ID_W_TIB_DS 0x14 +#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15 +#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16 +#define SYSC_ALLOC_ID_W_LSC 0x1C + +/* SYSC_ALLOC values */ +#define SYSC_ALLOC_L2_ALLOC 0x0 +#define SYSC_ALLOC_NEVER_ALLOC 0x2 +#define SYSC_ALLOC_ALWAYS_ALLOC 0x3 +#define SYSC_ALLOC_PTL_ALLOC 0x4 +#define SYSC_ALLOC_L2_PTL_ALLOC 0x5 + +/* SYSC_ALLOC register */ +#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0) +#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \ + SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \ + SYSC_ALLOC_R_SYSC_ALLOC0_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4) +#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \ + SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \ + SYSC_ALLOC_W_SYSC_ALLOC0_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */ +#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8) +#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \ + SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \ + SYSC_ALLOC_R_SYSC_ALLOC1_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12) +#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \ + SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \ + SYSC_ALLOC_W_SYSC_ALLOC1_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */ +#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16) +#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \ + SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \ + SYSC_ALLOC_R_SYSC_ALLOC2_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20) +#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \ + SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \ + SYSC_ALLOC_W_SYSC_ALLOC2_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */ +#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24) +#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \ + SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \ + (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \ + SYSC_ALLOC_R_SYSC_ALLOC3_MASK)) +/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */ +#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28) +#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \ + (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \ + SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) +#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \ + (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \ + (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \ + SYSC_ALLOC_W_SYSC_ALLOC3_MASK)) +/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */ + #endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h index 749e1fa..7a52fbf 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -192,15 +192,15 @@ #define BASE_MEM_FLAGS_RESERVED \ (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +#define BASEP_MEM_INVALID_HANDLE (0ul) +#define BASE_MEM_MMU_DUMP_HANDLE (1ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ul << LOCAL_PAGE_SHIFT) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ul << LOCAL_PAGE_SHIFT) /* reserved handles ..-47< for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) +#define BASE_MEM_COOKIE_BASE (64ul << LOCAL_PAGE_SHIFT) +#define BASE_MEM_FIRST_FREE_ADDRESS \ + ((BITS_PER_LONG << LOCAL_PAGE_SHIFT) + BASE_MEM_COOKIE_BASE) /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extension' pages, where 'extension' must be a power diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index 72d75cb..2598e20 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -119,6 +119,8 @@ * 11.31: * - Added BASE_JD_REQ_LIMITED_CORE_MASK. * - Added ioctl 55: set_limited_core_count. + * 11.32: + * - Added new HW performance counters interface to all GPUs. */ #define BASE_UK_VERSION_MAJOR 11 #define BASE_UK_VERSION_MINOR 31 diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h index a46c41f..410d54e 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -42,18 +42,6 @@ struct base_mem_handle { #define BASE_MAX_COHERENT_GROUPS 16 -#if defined(CDBG_ASSERT) -#define LOCAL_ASSERT CDBG_ASSERT -#elif defined(KBASE_DEBUG_ASSERT) -#define LOCAL_ASSERT KBASE_DEBUG_ASSERT -#else -#if defined(__KERNEL__) -#error assert macro not defined! -#else -#define LOCAL_ASSERT(...) ((void)#__VA_ARGS__) -#endif -#endif - #if defined(PAGE_MASK) && defined(PAGE_SHIFT) #define LOCAL_PAGE_SHIFT PAGE_SHIFT #define LOCAL_PAGE_LSB ~PAGE_MASK @@ -635,7 +623,7 @@ struct mali_base_gpu_coherent_group_info { * @thread_max_barrier_size: Maximum number of threads per barrier * @thread_features: Thread features * @coherency_mode: Note: This is the _selected_ coherency mode rather than the - * available modes as exposed in the coherency_features register + * available modes as exposed in the coherency_features register * @thread_tls_alloc: Number of threads per core that TLS must be allocated for * @gpu_features: GPU features * @@ -699,7 +687,7 @@ struct gpu_raw_gpu_props { * values from which the value of the other members are derived. The derived * members exist to allow for efficient access and/or shielding the details * of the layout of the registers. - * */ + */ struct base_gpu_props { struct mali_base_gpu_core_props core_props; struct mali_base_gpu_l2_cache_props l2_props; @@ -716,82 +704,24 @@ struct base_gpu_props { #include "jm/mali_base_jm_kernel.h" #endif -/** - * base_mem_group_id_get() - Get group ID from flags - * @flags: Flags to pass to base_mem_alloc - * - * This inline function extracts the encoded group ID from flags - * and converts it into numeric value (0~15). - * - * Return: group ID(0~15) extracted from the parameter - */ -static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags) -{ - LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); - return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> - BASEP_MEM_GROUP_ID_SHIFT); -} - -/** - * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags - * @id: group ID(0~15) you want to encode - * - * This inline function encodes specific group ID into base_mem_alloc_flags. - * Parameter 'id' should lie in-between 0 to 15. - * - * Return: base_mem_alloc_flags with the group ID (id) encoded - * - * The return value can be combined with other flags against base_mem_alloc - * to identify a specific memory group. - */ -static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id) -{ - if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { - /* Set to default value when id is out of range. */ - id = BASE_MEM_GROUP_DEFAULT; - } +#define BASE_MEM_GROUP_ID_GET(flags) \ + ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT) - return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & - BASE_MEM_GROUP_ID_MASK; -} +#define BASE_MEM_GROUP_ID_SET(id) \ + (((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? \ + BASE_MEM_GROUP_DEFAULT : \ + id) \ + << BASEP_MEM_GROUP_ID_SHIFT) & \ + BASE_MEM_GROUP_ID_MASK) -/** - * base_context_mmu_group_id_set - Encode a memory group ID in - * base_context_create_flags - * - * Memory allocated for GPU page tables will come from the specified group. - * - * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1). - * - * Return: Bitmask of flags to pass to base_context_init. - */ -static __inline__ base_context_create_flags base_context_mmu_group_id_set( - int const group_id) -{ - LOCAL_ASSERT(group_id >= 0); - LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT); - return BASEP_CONTEXT_MMU_GROUP_ID_MASK & - ((base_context_create_flags)group_id << - BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); -} +#define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \ + (BASEP_CONTEXT_MMU_GROUP_ID_MASK & \ + ((base_context_create_flags)(group_id) \ + << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)) -/** - * base_context_mmu_group_id_get - Decode a memory group ID from - * base_context_create_flags - * - * Memory allocated for GPU page tables will come from the returned group. - * - * @flags: Bitmask of flags to pass to base_context_init. - * - * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). - */ -static __inline__ int base_context_mmu_group_id_get( - base_context_create_flags const flags) -{ - LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); - return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> - BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); -} +#define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \ + ((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> \ + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) /* * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h index 9baaec1..15843ee 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -91,6 +91,7 @@ enum base_hwcnt_reader_event { #define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) #define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) #define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) + /** * struct kbase_hwcnt_reader_api_version - hwcnt reader API version * @version: API version @@ -101,5 +102,263 @@ struct kbase_hwcnt_reader_api_version { __u32 features; }; +/** Hardware counters reader API version */ +#define PRFCNT_READER_API_VERSION (0) + +/** + * enum prfcnt_list_type - Type of list item + * @PRFCNT_LIST_TYPE_ENUM: Enumeration of performance counters. + * @PRFCNT_LIST_TYPE_REQUEST: Request for configuration setup. + * @PRFCNT_LIST_TYPE_SAMPLE_META: Sample metadata. + */ +enum prfcnt_list_type { + PRFCNT_LIST_TYPE_ENUM, + PRFCNT_LIST_TYPE_REQUEST, + PRFCNT_LIST_TYPE_SAMPLE_META, +}; + +#define FLEX_LIST_TYPE(type, subtype) \ + (__u16)(((type & 0xf) << 12) | (subtype & 0xfff)) +#define FLEX_LIST_TYPE_NONE FLEX_LIST_TYPE(0, 0) + +#define PRFCNT_ENUM_TYPE_BLOCK FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 0) +#define PRFCNT_ENUM_TYPE_REQUEST FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_ENUM, 1) + +#define PRFCNT_REQUEST_TYPE_MODE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 0) +#define PRFCNT_REQUEST_TYPE_ENABLE FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_REQUEST, 1) + +#define PRFCNT_SAMPLE_META_TYPE_SAMPLE \ + FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 0) +#define PRFCNT_SAMPLE_META_TYPE_CLOCK \ + FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 1) +#define PRFCNT_SAMPLE_META_TYPE_BLOCK \ + FLEX_LIST_TYPE(PRFCNT_LIST_TYPE_SAMPLE_META, 2) + +/** + * struct prfcnt_item_header - Header for an item of the list. + * @item_type: Type of item. + * @item_version: Protocol version. + */ +struct prfcnt_item_header { + __u16 item_type; + __u16 item_version; +}; + +/** + * enum prfcnt_block_type - Type of performance counter block. + * @PRFCNT_BLOCK_TYPE_FE: Front End. + * @PRFCNT_BLOCK_TYPE_TILER: Tiler. + * @PRFCNT_BLOCK_TYPE_MEMORY: Memory System. + * @PRFCNT_BLOCK_TYPE_SHADER_CORE: Shader Core. + */ +enum prfcnt_block_type { + PRFCNT_BLOCK_TYPE_FE, + PRFCNT_BLOCK_TYPE_TILER, + PRFCNT_BLOCK_TYPE_MEMORY, + PRFCNT_BLOCK_TYPE_SHADER_CORE, + PRFCNT_BLOCK_TYPE_RESERVED = 255, +}; + +/** + * enum prfcnt_block_set - Type of performance counter block set. + * @PRFCNT_SET_PRIMARY: Primary. + * @PRFCNT_SET_SECONDARY: Secondary. + * @PRFCNT_SET_TERTIARY: Tertiary. + */ +enum prfcnt_set { + PRFCNT_SET_PRIMARY, + PRFCNT_SET_SECONDARY, + PRFCNT_SET_TERTIARY, + PRFCNT_SET_RESERVED = 255, +}; + +/** + * struct prfcnt_enum_block_counter - Performance counter block descriptor. + * @block_type: Type of performance counter block. + * @set: Which SET this represents: primary, secondary or tertiary. + * @num_instances: How many instances of this block type exist in the hardware. + * @num_values: How many entries in the values array there are for samples + * from this block. + * @pad: Padding bytes. + * @counter_mask: Bitmask that indicates the availability of counters in this + * block. + */ +struct prfcnt_enum_block_counter { + __u8 block_type; + __u8 set; + __u8 num_instances; + __u8 num_values; + __u8 pad[4]; + __u64 counter_mask[2]; +}; + +/** + * struct prfcnt_enum_request - Request descriptor. + * @request_item_type: Type of request. + * @pad: Padding bytes. + * @versions_mask: Bitmask of versions that support this request. + */ +struct prfcnt_enum_request { + __u16 request_item_type; + __u16 pad; + __u32 versions_mask; +}; + +/** + * struct prfcnt_enum_item - Performance counter enumeration item. + * @hdr: Header describing the type of item in the list. + * @block_counter: Performance counter block descriptor. + * @request: Request descriptor. + */ +struct prfcnt_enum_item { + struct prfcnt_item_header hdr; + union { + struct prfcnt_enum_block_counter block_counter; + struct prfcnt_enum_request request; + } u; +}; + +/** + * enum prfcnt_mode - Capture mode for counter sampling. + * @PRFCNT_MODE_MANUAL: Manual sampling mode. + * @PRFCNT_MODE_PERIODIC: Periodic sampling mode. + */ +enum prfcnt_mode { + PRFCNT_MODE_MANUAL, + PRFCNT_MODE_PERIODIC, + PRFCNT_MODE_RESERVED = 255, +}; + +/** + * struct prfcnt_request_mode - Mode request descriptor. + * @mode: Capture mode for the session, either manual or periodic. + * @pad: Padding bytes. + * @period_us: Period in microseconds, for periodic mode. + */ +struct prfcnt_request_mode { + __u8 mode; + __u8 pad[7]; + union { + struct { + __u64 period_us; + } periodic; + } mode_config; +}; + +/** + * struct prfcnt_request_enable - Enable request descriptor. + * @block_type: Type of performance counter block. + * @set: Which SET to use: primary, secondary or tertiary. + * @pad: Padding bytes. + * @enable_mask: Bitmask that indicates which performance counters to enable. + * Unavailable counters will be ignored. + */ +struct prfcnt_request_enable { + __u8 block_type; + __u8 set; + __u8 pad[6]; + __u64 enable_mask[2]; +}; + +/** + * struct prfcnt_request_item - Performance counter request item. + * @hdr: Header describing the type of item in the list. + * @req_mode: Mode request descriptor. + * @req_enable: Enable request descriptor. + */ +struct prfcnt_request_item { + struct prfcnt_item_header hdr; + union { + struct prfcnt_request_mode req_mode; + struct prfcnt_request_enable req_enable; + } u; +}; + +/** + * enum prfcnt_request_type - Type of request descriptor. + * @PRFCNT_REQUEST_MODE: Specify the capture mode to be used for the session. + * @PRFCNT_REQUEST_ENABLE: Specify which performance counters to capture. + */ +enum prfcnt_request_type { + PRFCNT_REQUEST_MODE, + PRFCNT_REQUEST_ENABLE, +}; + +/** + * struct prfcnt_sample_metadata - Metadata for counter sample data. + * @timestamp_start: Earliest timestamp that values in this sample represent. + * @timestamp_end: Latest timestamp that values in this sample represent. + * @seq: Sequence number of this sample. Must match the value from + * GET_SAMPLE. + * @user_data: User data provided to HWC_CMD_START or HWC_CMD_SAMPLE_* + * @flags: Property flags. + */ +struct prfcnt_sample_metadata { + __u64 timestamp_start; + __u64 timestamp_end; + __u64 seq; + __u64 user_data; + __u32 flags; + __u32 pad; +}; + +/** + * struct prfcnt_clock_metadata - Metadata for clock cycles. + * @num_domains: Number of domains this metadata refers to. + * @cycles: Number of cycles elapsed in each counter domain between + * timestamp_start and timestamp_end. + */ +struct prfcnt_clock_metadata { + __u32 num_domains; + __u32 pad; + __u64 *cycles; +}; + +/* This block was powered on for at least some portion of the sample */ +#define BLOCK_STATE_ON (1 << 0) +/* This block was powered off for at least some portion of the sample */ +#define BLOCK_STATE_OFF (1 << 1) +/* This block was available to this VM for at least some portion of the sample */ +#define BLOCK_STATE_AVAILABLE (1 << 2) +/* This block was not available to this VM for at least some portion of the sample + * Note that no data is collected when the block is not available to the VM. + */ +#define BLOCK_STATE_UNAVAILABLE (1 << 3) +/* This block was operating in "normal" (non-protected) mode for at least some portion of the sample */ +#define BLOCK_STATE_NORMAL (1 << 4) +/* This block was operating in "protected" mode for at least some portion of the sample. + * Note that no data is collected when the block is in protected mode. + */ +#define BLOCK_STATE_PROTECTED (1 << 5) + +/** + * struct prfcnt_block_metadata - Metadata for counter block. + * @block_type: Type of performance counter block. + * @block_idx: Index of performance counter block. + * @set: Set of performance counter block. + * @block_state: Bits set indicate the states which the block is known + * to have operated in during this sample. + * @values_offset: Offset from the start of the mmapped region, to the values + * for this block. The values themselves are an array of __u64. + */ +struct prfcnt_block_metadata { + __u8 block_type; + __u8 block_idx; + __u8 set; + __u8 pad_u8; + __u32 block_state; + __u32 values_offset; + __u32 pad_u32; +}; + +struct prfcnt_metadata { + struct prfcnt_item_header hdr; + union { + struct prfcnt_sample_metadata sample_md; + struct prfcnt_clock_metadata clock_md; + struct prfcnt_block_metadata block_md; + } u; +}; + #endif /* _UAPI_KBASE_HWCNT_READER_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h index 29ff32a..8e1ed55 100644 --- a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -186,12 +186,15 @@ struct kbase_ioctl_hwcnt_enable { __u32 mmu_l2_bm; }; +/* This IOCTL is deprecated as of R33, and will be removed in R35. */ #define KBASE_IOCTL_HWCNT_ENABLE \ _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) +/* This IOCTL is deprecated as of R33, and will be removed in R35. */ #define KBASE_IOCTL_HWCNT_DUMP \ _IO(KBASE_IOCTL_TYPE, 10) +/* This IOCTL is deprecated as of R33, and will be removed in R35. */ #define KBASE_IOCTL_HWCNT_CLEAR \ _IO(KBASE_IOCTL_TYPE, 11) @@ -686,6 +689,55 @@ struct kbase_ioctl_set_limited_core_count { #define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) +/** + * struct kbase_ioctl_kinstr_prfcnt_enum_info - Enum Performance counter + * information + * @info_item_size: Performance counter item size in bytes. + * @info_item_count: Performance counter item count in the info_list_ptr. + * @info_list_ptr: Performance counter item list pointer which points to a + * list with info_item_count of items. + * + * On success: returns info_item_size and info_item_count if info_list_ptr is + * NULL, returns performance counter information if info_list_ptr is not NULL. + * On error: returns a negative error code. + */ +struct kbase_ioctl_kinstr_prfcnt_enum_info { + __u32 info_item_size; + __u32 info_item_count; + __u64 info_list_ptr; +}; + +#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO \ + _IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info) + +/** + * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * @in: input parameters. + * @in.request_item_count: Number of requests in the requests array. + * @in.request_item_size: Size in bytes of each request in the requests array. + * @in.requests_ptr: Pointer to the requests array. + * @out: output parameters. + * @out.prfcnt_metadata_item_size: Size of each item in the metadata array for + * each sample. + * @out.prfcnt_mmap_size_bytes: Size in bytes that user-space should mmap + * for reading performance counter samples. + * + * A fd is returned from the ioctl if successful, or a negative value on error. + */ +union kbase_ioctl_kinstr_prfcnt_setup { + struct { + __u32 request_item_count; + __u32 request_item_size; + __u64 requests_ptr; + } in; + struct { + __u32 prfcnt_metadata_item_size; + __u32 prfcnt_mmap_size_bytes; + } out; +}; + +#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP \ + _IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup) /*************** * test ioctls * -- cgit v1.2.3