From fca8613cfcf585bf9113dca96a05daea9fd89794 Mon Sep 17 00:00:00 2001 From: Sidath Senanayake Date: Tue, 15 Jun 2021 13:39:30 +0100 Subject: Mali Valhall DDK r31p0 KMD Provenance: 2ea0ef9bd (collaborate/EAC/v_r31p0) VX504X08X-BU-00000-r31p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r31p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r31p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r31p0-01eac0 - Valhall Android Renderscript AOSP parts Signed-off-by: Sidath Senanayake Change-Id: Ide9d5fdc6d9c95fa66a3546b01f619b43c09496d --- common/include/linux/priority_control_manager.h | 4 +- .../midgard/backend/gpu/mali_kbase_model_dummy.h | 53 + .../gpu/arm/midgard/csf/mali_base_csf_kernel.h | 765 +++++++++++ .../midgard/csf/mali_gpu_csf_control_registers.h | 32 + .../gpu/arm/midgard/csf/mali_gpu_csf_registers.h | 1414 ++++++++++++++++++++ .../gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h | 390 ++++++ .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 335 +++++ .../midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h | 287 ++++ .../gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h | 30 + .../uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h | 120 ++ .../gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h | 424 ++++++ .../uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h | 1200 +++++++++++++++++ .../uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 223 +++ .../uapi/gpu/arm/midgard/mali_base_kernel.h | 826 ++++++++++++ .../uapi/gpu/arm/midgard/mali_base_mem_priv.h | 58 + .../uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 105 ++ .../uapi/gpu/arm/midgard/mali_kbase_ioctl.h | 836 ++++++++++++ .../gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h | 69 + common/include/uapi/gpu/arm/midgard/mali_uk.h | 70 + 19 files changed, 7240 insertions(+), 1 deletion(-) create mode 100644 common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h create mode 100644 common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h create mode 100644 common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h create mode 100644 common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h create mode 100644 common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h create mode 100644 common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h create mode 100644 common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h create mode 100644 common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h create mode 100644 common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h create mode 100644 common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h create mode 100644 common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h create mode 100644 common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_base_kernel.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h create mode 100644 common/include/uapi/gpu/arm/midgard/mali_uk.h (limited to 'common') diff --git a/common/include/linux/priority_control_manager.h b/common/include/linux/priority_control_manager.h index d3e22f5..df3b3cd 100644 --- a/common/include/linux/priority_control_manager.h +++ b/common/include/linux/priority_control_manager.h @@ -53,7 +53,7 @@ struct priority_control_manager_ops { * Return: The priority that would actually be given, could be lower than requested_priority */ int (*pcm_scheduler_priority_check)( - struct priority_control_manager_device *mgm_dev, + struct priority_control_manager_device *pcm_dev, struct task_struct *task, int requested_priority); }; @@ -62,6 +62,7 @@ struct priority_control_manager_ops { * control manager * * @ops: Callbacks associated with this device + * @data: Pointer to device private data * @owner: Pointer to the module owner * * This structure should be registered with the platform device using @@ -69,6 +70,7 @@ struct priority_control_manager_ops { */ struct priority_control_manager_device { struct priority_control_manager_ops ops; + void *data; struct module *owner; }; diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h new file mode 100644 index 0000000..61da071 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Dummy Model interface + */ + +#ifndef _UAPI_KBASE_MODEL_DUMMY_H_ +#define _UAPI_KBASE_MODEL_DUMMY_H_ + +#include + +#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE (60) +#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE \ + (64*KBASE_DUMMY_MODEL_COUNTER_PER_CORE) +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT (4) +#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \ + (enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT))) + +#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4 +#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60 +#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK \ + (KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK + \ + KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK) +#define KBASE_DUMMY_MODEL_BLOCK_SIZE \ + (KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32)) +#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS 8 +#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES 32 +#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS \ + (1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES) +#define KBASE_DUMMY_MODEL_COUNTER_TOTAL \ + (KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE) + +#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h new file mode 100644 index 0000000..7fa874b --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -0,0 +1,765 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_CSF_KERNEL_H_ +#define _UAPI_BASE_CSF_KERNEL_H_ + +#include + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* IN/OUT */ +/* Should be cached on the CPU, returned if actually cached + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the alloc + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/* CSF event memory + * + * If Outer shareable coherence is not specified or not available, then on + * allocation kbase will automatically use the uncached GPU mapping. + * There is no need for the client to specify BASE_MEM_UNCACHED_GPU + * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag. + * + * This memory requires a permanent mapping + * + * See also kbase_reg_needs_kernel_mapping() + */ +#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) + +#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* OUT */ +/* Kernel side cache sync ops required */ +#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 29 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20 + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47< for future special handles */ +#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12) +#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12) +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ + ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ + LOCAL_PAGE_SHIFT) + +/** + * Valid set of just-in-time memory allocation flags + */ +#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) + +/* Flags to pass to ::base_context_init. + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef __u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Base context creates a CSF event notification thread. + * + * The creation of a CSF event notification thread is conditional but + * mandatory for the handling of CSF events. + */ +#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | \ + BASE_CONTEXT_CSF_EVENT_THREAD | \ + BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +/* Enable KBase tracepoints for CSF builds */ +#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2) + +/* Enable additional CSF Firmware side tracepoints */ +#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED | \ + BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \ + BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) + +/* Number of pages mapped into the process address space for a bound GPU + * command queue. A pair of input/output pages and a Hw doorbell page + * are mapped to enable direct submission of commands to Hw. + */ +#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3) + +#define BASE_QUEUE_MAX_PRIORITY (15U) + +/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */ +#define BASEP_EVENT_VAL_INDEX (0U) +#define BASEP_EVENT_ERR_INDEX (1U) + +/* The upper limit for number of objects that could be waited/set per command. + * This limit is now enforced as internally the error inherit inputs are + * converted to 32-bit flags in a __u32 variable occupying a previously padding + * field. + */ +#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) + +#if MALI_UNIT_TEST +/** + * enum base_kcpu_command_type - Kernel CPU queue command type. + * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, + * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, + * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, + * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, + * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, + * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, + * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, + * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: sample_time, + */ +enum base_kcpu_command_type { + BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, + BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_SET, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, + BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, + BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, + BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, + BASE_KCPU_COMMAND_TYPE_JIT_FREE, + BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, + BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME, +}; +#else +/** + * enum base_kcpu_command_type - Kernel CPU queue command type. + * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, + * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, + * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, + * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, + * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, + * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, + * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, + */ +enum base_kcpu_command_type { + BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, + BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_SET, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, + BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, + BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, + BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, + BASE_KCPU_COMMAND_TYPE_JIT_FREE, + BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, +}; +#endif /* MALI_UNIT_TEST */ + +/** + * enum base_queue_group_priority - Priority of a GPU Command Queue Group. + * @BASE_QUEUE_GROUP_PRIORITY_HIGH: GPU Command Queue Group is of high + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM: GPU Command Queue Group is of medium + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_LOW: GPU Command Queue Group is of low + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_COUNT: Number of GPU Command Queue Group + * priority levels. + * + * Currently this is in order of highest to lowest, but if new levels are added + * then those new levels may be out of order to preserve the ABI compatibility + * with previous releases. At that point, ensure assignment to + * the 'priority' member in &kbase_queue_group is updated to ensure it remains + * a linear ordering. + * + * There should be no gaps in the enum, otherwise use of + * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated. + */ +enum base_queue_group_priority { + BASE_QUEUE_GROUP_PRIORITY_HIGH = 0, + BASE_QUEUE_GROUP_PRIORITY_MEDIUM, + BASE_QUEUE_GROUP_PRIORITY_LOW, + BASE_QUEUE_GROUP_PRIORITY_REALTIME, + BASE_QUEUE_GROUP_PRIORITY_COUNT +}; + +struct base_kcpu_command_fence_info { + __u64 fence; +}; + +struct base_cqs_wait_info { + __u64 addr; + __u32 val; + __u32 padding; +}; + +struct base_kcpu_command_cqs_wait_info { + __u64 objs; + __u32 nr_objs; + __u32 inherit_err_flags; +}; + +struct base_cqs_set { + __u64 addr; +}; + +struct base_kcpu_command_cqs_set_info { + __u64 objs; + __u32 nr_objs; + __u32 padding; +}; + +/** + * basep_cqs_data_type - Enumeration of CQS Data Types + * + * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value + * is an unsigned 32-bit integer + * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value + * is an unsigned 64-bit integer + */ +typedef enum PACKED { + BASEP_CQS_DATA_TYPE_U32 = 0, + BASEP_CQS_DATA_TYPE_U64 = 1, +} basep_cqs_data_type; + +/** + * basep_cqs_wait_operation_op - Enumeration of CQS Object Wait + * Operation conditions + * + * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a + * wait will be satisfied when a CQS Object's + * value is Less than or Equal to + * the Wait Operation value + * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a + * wait will be satisfied when a CQS Object's + * value is Greater than the Wait Operation value + */ +typedef enum { + BASEP_CQS_WAIT_OPERATION_LE = 0, + BASEP_CQS_WAIT_OPERATION_GT = 1, +} basep_cqs_wait_operation_op; + +struct base_cqs_wait_operation_info { + __u64 addr; + __u64 val; + __u8 operation; + __u8 data_type; + __u8 padding[6]; +}; + +/** + * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information + * about the Timeline CQS wait objects + * + * @objs: An array of Timeline CQS waits. + * @nr_objs: Number of Timeline CQS waits in the array. + * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field + * to be served as the source for importing into the + * queue's error-state. + */ +struct base_kcpu_command_cqs_wait_operation_info { + __u64 objs; + __u32 nr_objs; + __u32 inherit_err_flags; +}; + +/** + * basep_cqs_set_operation_op - Enumeration of CQS Set Operations + * + * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value + * to a synchronization object + * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value + * of a synchronization object + */ +typedef enum { + BASEP_CQS_SET_OPERATION_ADD = 0, + BASEP_CQS_SET_OPERATION_SET = 1, +} basep_cqs_set_operation_op; + +struct base_cqs_set_operation_info { + __u64 addr; + __u64 val; + __u8 operation; + __u8 data_type; + __u8 padding[6]; +}; + +/** + * struct base_kcpu_command_cqs_set_operation_info - structure which contains information + * about the Timeline CQS set objects + * + * @objs: An array of Timeline CQS sets. + * @nr_objs: Number of Timeline CQS sets in the array. + * @padding: Structure padding, unused bytes. + */ +struct base_kcpu_command_cqs_set_operation_info { + __u64 objs; + __u32 nr_objs; + __u32 padding; +}; + +/** + * struct base_kcpu_command_import_info - structure which contains information + * about the imported buffer. + * + * @handle: Address of imported user buffer. + */ +struct base_kcpu_command_import_info { + __u64 handle; +}; + +/** + * struct base_kcpu_command_jit_alloc_info - structure which contains + * information about jit memory allocation. + * + * @info: An array of elements of the + * struct base_jit_alloc_info type. + * @count: The number of elements in the info array. + * @padding: Padding to a multiple of 64 bits. + */ +struct base_kcpu_command_jit_alloc_info { + __u64 info; + __u8 count; + __u8 padding[7]; +}; + +/** + * struct base_kcpu_command_jit_free_info - structure which contains + * information about jit memory which is to be freed. + * + * @ids: An array containing the JIT IDs to free. + * @count: The number of elements in the ids array. + * @padding: Padding to a multiple of 64 bits. + */ +struct base_kcpu_command_jit_free_info { + __u64 ids; + __u8 count; + __u8 padding[7]; +}; + +/** + * struct base_kcpu_command_group_suspend_info - structure which contains + * suspend buffer data captured for a suspended queue group. + * + * @buffer: Pointer to an array of elements of the type char. + * @size: Number of elements in the @buffer array. + * @group_handle: Handle to the mapping of CSG. + * @padding: padding to a multiple of 64 bits. + */ +struct base_kcpu_command_group_suspend_info { + __u64 buffer; + __u32 size; + __u8 group_handle; + __u8 padding[3]; +}; + +#if MALI_UNIT_TEST +struct base_kcpu_command_sample_time_info { + __u64 time; +}; +#endif /* MALI_UNIT_TEST */ + +/** + * struct base_kcpu_command - kcpu command. + * @type: type of the kcpu command, one enum base_kcpu_command_type + * @padding: padding to a multiple of 64 bits + * @info: structure which contains information about the kcpu command; + * actual type is determined by @p type + * @info.fence: Fence + * @info.cqs_wait: CQS wait + * @info.cqs_set: CQS set + * @info.import: import + * @info.jit_alloc: jit allocation + * @info.jit_free: jit deallocation + * @info.suspend_buf_copy: suspend buffer copy + * @info.sample_time: sample time + * @info.padding: padding + */ +struct base_kcpu_command { + __u8 type; + __u8 padding[sizeof(__u64) - sizeof(__u8)]; + union { + struct base_kcpu_command_fence_info fence; + struct base_kcpu_command_cqs_wait_info cqs_wait; + struct base_kcpu_command_cqs_set_info cqs_set; + struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation; + struct base_kcpu_command_cqs_set_operation_info cqs_set_operation; + struct base_kcpu_command_import_info import; + struct base_kcpu_command_jit_alloc_info jit_alloc; + struct base_kcpu_command_jit_free_info jit_free; + struct base_kcpu_command_group_suspend_info suspend_buf_copy; +#if MALI_UNIT_TEST + struct base_kcpu_command_sample_time_info sample_time; +#endif /* MALI_UNIT_TEST */ + __u64 padding[2]; /* No sub-struct should be larger */ + } info; +}; + +/** + * struct basep_cs_stream_control - CSI capabilities. + * + * @features: Features of this stream + * @padding: Padding to a multiple of 64 bits. + */ +struct basep_cs_stream_control { + __u32 features; + __u32 padding; +}; + +/** + * struct basep_cs_group_control - CSG interface capabilities. + * + * @features: Features of this group + * @stream_num: Number of streams in this group + * @suspend_size: Size in bytes of the suspend buffer for this group + * @padding: Padding to a multiple of 64 bits. + */ +struct basep_cs_group_control { + __u32 features; + __u32 stream_num; + __u32 suspend_size; + __u32 padding; +}; + +/** + * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault + * error information associated with GPU command queue group. + * + * @sideband: Additional information of the unrecoverable fault. + * @status: Unrecoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * CS_INVALID_INSTRUCTION (0x49). + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_group_error_fatal_payload { + __u64 sideband; + __u32 status; + __u32 padding; +}; + +/** + * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault + * error information related to GPU command queue. + * + * @sideband: Additional information about this unrecoverable fault. + * @status: Unrecoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * CS_INVALID_INSTRUCTION (0x49). + * @csi_index: Index of the CSF interface the queue is bound to. + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_error_fatal_payload { + __u64 sideband; + __u32 status; + __u8 csi_index; + __u8 padding[3]; +}; + +/** + * enum base_gpu_queue_group_error_type - GPU Fatal error type. + * + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU + * command queue group. + * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU + * command queue. + * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT: Fatal error associated with + * progress timeout. + * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out + * of tiler heap memory. + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types + * + * This type is used for &struct_base_gpu_queue_group_error.error_type. + */ +enum base_gpu_queue_group_error_type { + BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0, + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, + BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, + BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT +}; + +/** + * struct base_gpu_queue_group_error - Unrecoverable fault information + * @error_type: Error type of @base_gpu_queue_group_error_type + * indicating which field in union payload is filled + * @padding: Unused bytes for 64bit boundary + * @payload: Input Payload + * @payload.fatal_group: Unrecoverable fault error associated with + * GPU command queue group + * @payload.fatal_queue: Unrecoverable fault error associated with command queue + */ +struct base_gpu_queue_group_error { + __u8 error_type; + __u8 padding[7]; + union { + struct base_gpu_queue_group_error_fatal_payload fatal_group; + struct base_gpu_queue_error_fatal_payload fatal_queue; + } payload; +}; + +/** + * enum base_csf_notification_type - Notification type + * + * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event + * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal + * error + * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu + * queue + * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type + * + * This type is used for &struct_base_csf_notification.type. + */ +enum base_csf_notification_type { + BASE_CSF_NOTIFICATION_EVENT = 0, + BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP, + BASE_CSF_NOTIFICATION_COUNT +}; + +/** + * struct base_csf_notification - Event or error notification + * + * @type: Notification type of @base_csf_notification_type + * @padding: Padding for 64bit boundary + * @payload: Input Payload + * @payload.align: To fit the struct into a 64-byte cache line + * @payload.csg_error: CSG error + * @payload.csg_error.handle: Handle of GPU command queue group associated with + * fatal error + * @payload.csg_error.padding: Padding + * @payload.csg_error.error: Unrecoverable fault error + * + */ +struct base_csf_notification { + __u8 type; + __u8 padding[7]; + union { + struct { + __u8 handle; + __u8 padding[7]; + struct base_gpu_queue_group_error error; + } csg_error; + + __u8 align[56]; + } payload; +}; + +#endif /* _UAPI_BASE_CSF_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h new file mode 100644 index 0000000..570cba8 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ +#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ + +/* GPU_REGISTERS register offsets */ +#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */ + +#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h new file mode 100644 index 0000000..f233a0d --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h @@ -0,0 +1,1414 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _UAPI_GPU_CSF_REGISTERS_H_ +#define _UAPI_GPU_CSF_REGISTERS_H_ + +/* + * Begin register sets + */ + +/* DOORBELLS base address */ +#define DOORBELLS_BASE 0x0080000 +#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r)) + +/* CS_KERNEL_INPUT_BLOCK base address */ +#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 +#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) + +/* CS_KERNEL_OUTPUT_BLOCK base address */ +#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000 +#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r)) + +/* CS_USER_INPUT_BLOCK base address */ +#define CS_USER_INPUT_BLOCK_BASE 0x0000 +#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r)) + +/* CS_USER_OUTPUT_BLOCK base address */ +#define CS_USER_OUTPUT_BLOCK_BASE 0x0000 +#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r)) + +/* CSG_INPUT_BLOCK base address */ +#define CSG_INPUT_BLOCK_BASE 0x0000 +#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r)) + +/* CSG_OUTPUT_BLOCK base address */ +#define CSG_OUTPUT_BLOCK_BASE 0x0000 +#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r)) + +/* GLB_CONTROL_BLOCK base address */ +#define GLB_CONTROL_BLOCK_BASE 0x04000000 +#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r)) + +/* GLB_INPUT_BLOCK base address */ +#define GLB_INPUT_BLOCK_BASE 0x0000 +#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r)) + +/* GLB_OUTPUT_BLOCK base address */ +#define GLB_OUTPUT_BLOCK_BASE 0x0000 +#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) + +/* USER base address */ +#define USER_BASE 0x0010000 +#define USER_REG(r) (USER_BASE + (r)) + +/* End register sets */ + +/* + * Begin register offsets + */ + +/* DOORBELLS register offsets */ +#define DOORBELL_0 0x0000 /* () Doorbell 0 register */ +#define DOORBELL(n) (DOORBELL_0 + (n)*65536) +#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r)) +#define DOORBELL_COUNT 1024 + +/* DOORBELL_BLOCK register offsets */ +#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */ + +/* CS_KERNEL_INPUT_BLOCK register offsets */ +#define CS_REQ 0x0000 /* () CS request flags */ +#define CS_CONFIG 0x0004 /* () CS configuration */ +#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */ +#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */ +#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */ +#define CS_SIZE 0x0018 /* () Size of the ring buffer */ +#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */ +#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */ +#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */ +#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */ +#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */ +#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */ +#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */ +#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */ + +/* CS_KERNEL_OUTPUT_BLOCK register offsets */ +#define CS_ACK 0x0000 /* () CS acknowledge flags */ +#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */ +#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */ +#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */ +#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */ +#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */ +#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */ +#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */ +#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */ +#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */ +#define CS_FAULT 0x0080 /* () Recoverable fault information */ +#define CS_FATAL 0x0084 /* () Unrecoverable fault information */ +#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ +#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ +#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ +#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ +#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ +#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ +#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ +#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */ +#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */ + +/* CS_USER_INPUT_BLOCK register offsets */ +#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */ +#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */ +#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */ +#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */ + +/* CS_USER_OUTPUT_BLOCK register offsets */ +#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */ +#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */ +#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */ + +/* CSG_INPUT_BLOCK register offsets */ +#define CSG_REQ 0x0000 /* () CSG request */ +#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ +#define CSG_DB_REQ 0x0008 /* () Global doorbell request */ +#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ +#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ +#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ +#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ +#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ +#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ +#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ +#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ +#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ +#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ +#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ +#define CSG_CONFIG 0x0050 /* () CSG configuration options */ +#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ + +/* CSG_OUTPUT_BLOCK register offsets */ +#define CSG_ACK 0x0000 /* () CSG acknowledge flags */ +#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */ +#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */ +#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ +#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ +#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ + +/* GLB_CONTROL_BLOCK register offsets */ +#define GLB_VERSION 0x0000 /* () Global interface version */ +#define GLB_FEATURES 0x0004 /* () Global interface features */ +#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */ +#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */ +#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ +#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ +#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ +#define GLB_INSTR_FEATURES 0x001C /* () TRACE_POINT instrumentation features */ +#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ +#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) +#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) +#define GROUP_CONTROL_COUNT 16 + +/* STREAM_CONTROL_BLOCK register offsets */ +#define STREAM_FEATURES 0x0000 /* () CSI features */ +#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */ +#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */ + +/* GROUP_CONTROL_BLOCK register offsets */ +#define GROUP_FEATURES 0x0000 /* () CSG interface features */ +#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */ +#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */ +#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */ +#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */ +#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */ +#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */ +#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */ +#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12) +#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r)) +#define STREAM_CONTROL_COUNT 16 + +/* GLB_INPUT_BLOCK register offsets */ +#define GLB_REQ 0x0000 /* () Global request */ +#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ +#define GLB_DB_REQ 0x0008 /* () Global doorbell request */ +#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */ +#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ +#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ +#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ +#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */ + +#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ +#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ +#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */ +#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */ +#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */ +#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */ +#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */ +#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */ +#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */ +#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */ +#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ +#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ + +#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ +#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ +#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ +#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ +#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ +#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ + +/* GLB_OUTPUT_BLOCK register offsets */ +#define GLB_ACK 0x0000 /* () Global acknowledge */ +#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ +#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ +#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ +#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ +#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ +#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ + +/* USER register offsets */ +#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ + +/* End register offsets */ + +/* CS_KERNEL_INPUT_BLOCK register set definitions */ +/* GLB_VERSION register */ +#define GLB_VERSION_PATCH_SHIFT (0) +#define GLB_VERSION_MINOR_SHIFT (16) +#define GLB_VERSION_MAJOR_SHIFT (24) + +/* CS_REQ register */ +#define CS_REQ_STATE_SHIFT 0 +#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT) +#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT) +#define CS_REQ_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK)) +/* CS_REQ_STATE values */ +#define CS_REQ_STATE_STOP 0x0 +#define CS_REQ_STATE_START 0x1 +/* End of CS_REQ_STATE values */ +#define CS_REQ_EXTRACT_EVENT_SHIFT 4 +#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) +#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) +#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) + +#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 +#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) +#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) +#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ + (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) +#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 +#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) +#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) +#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ + (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) +#define CS_REQ_IDLE_EMPTY_SHIFT 10 +#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) +#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) +#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) +#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 +#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) +#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ + (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) +#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ + (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) +#define CS_REQ_TILER_OOM_SHIFT 26 +#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) +#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) +#define CS_REQ_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) +#define CS_REQ_PROTM_PEND_SHIFT 27 +#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) +#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) +#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) +#define CS_REQ_FATAL_SHIFT 30 +#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) +#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) +#define CS_REQ_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK)) +#define CS_REQ_FAULT_SHIFT 31 +#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT) +#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT) +#define CS_REQ_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK)) + +/* CS_CONFIG register */ +#define CS_CONFIG_PRIORITY_SHIFT 0 +#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) +#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) +#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) +#define CS_CONFIG_USER_DOORBELL_SHIFT 8 +#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) +#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) +#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ + (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) + +/* CS_ACK_IRQ_MASK register */ +#define CS_ACK_IRQ_MASK_STATE_SHIFT 0 +#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) +#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) +#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) +/* CS_ACK_IRQ_MASK_STATE values */ +#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 +#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7 +/* End of CS_ACK_IRQ_MASK_STATE values */ +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4 +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) +#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 +#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) +#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) +#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) +#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 +#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) +#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) +#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) +#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 +#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) +#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) +#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) +#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 +#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) +#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) +#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) + +/* CS_BASE register */ +#define CS_BASE_POINTER_SHIFT 0 +#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT) +#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) +#define CS_BASE_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) + +/* CS_SIZE register */ +#define CS_SIZE_SIZE_SHIFT 0 +#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT) +#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT) +#define CS_SIZE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK)) + +/* CS_TILER_HEAP_START register */ +#define CS_TILER_HEAP_START_POINTER_SHIFT 0 +#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT) +#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ + (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) +#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ + (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) +/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ +/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ + +/* CS_TILER_HEAP_END register */ +#define CS_TILER_HEAP_END_POINTER_SHIFT 0 +#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT) +#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ + (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) +#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ + (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) +/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ +/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ + +/* CS_USER_INPUT register */ +#define CS_USER_INPUT_POINTER_SHIFT 0 +#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT) +#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) +#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ + (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) + +/* CS_USER_OUTPUT register */ +#define CS_USER_OUTPUT_POINTER_SHIFT 0 +#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT) +#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) +#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ + (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) +/* End of CS_KERNEL_INPUT_BLOCK register set definitions */ + +/* CS_KERNEL_OUTPUT_BLOCK register set definitions */ + +/* CS_ACK register */ +#define CS_ACK_STATE_SHIFT 0 +#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT) +#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT) +#define CS_ACK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK)) +/* CS_ACK_STATE values */ +#define CS_ACK_STATE_STOP 0x0 +#define CS_ACK_STATE_START 0x1 +/* End of CS_ACK_STATE values */ +#define CS_ACK_EXTRACT_EVENT_SHIFT 4 +#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) +#define CS_ACK_TILER_OOM_SHIFT 26 +#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) +#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) +#define CS_ACK_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) +#define CS_ACK_PROTM_PEND_SHIFT 27 +#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) +#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) +#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) +#define CS_ACK_FATAL_SHIFT 30 +#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) +#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) +#define CS_ACK_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK)) +#define CS_ACK_FAULT_SHIFT 31 +#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT) +#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT) +#define CS_ACK_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK)) + +/* CS_STATUS_CMD_PTR register */ +#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 +#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT) +#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ + (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) +#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ + (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) + +/* CS_STATUS_WAIT register */ +#define CS_STATUS_WAIT_SB_MASK_SHIFT 0 +#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) +#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) +#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ + (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) +/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 +/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ +#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 +#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) +#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) +#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ + (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) +#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 +#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) +#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) +#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ + (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 +#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) + +/* CS_STATUS_REQ_RESOURCE register */ +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) + +/* CS_STATUS_WAIT_SYNC_POINTER register */ +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) + +/* CS_STATUS_WAIT_SYNC_VALUE register */ +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) + +/* CS_STATUS_SCOREBOARDS register */ +#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) +#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ + ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) +#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ + (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ + CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) +#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ + (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ + CS_STATUS_SCOREBOARDS_NONZERO_MASK)) + +/* CS_STATUS_BLOCKED_REASON register */ +#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) +#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ + ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) +#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ + (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ + CS_STATUS_BLOCKED_REASON_REASON_SHIFT) +#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ + (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ + CS_STATUS_BLOCKED_REASON_REASON_MASK)) +/* CS_STATUS_BLOCKED_REASON_reason values */ +#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 +#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 +#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 +#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 +#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 +#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5 +#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6 +/* End of CS_STATUS_BLOCKED_REASON_reason values */ + +/* CS_FAULT register */ +#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 +#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) +#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) +#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ + (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) +/* CS_FAULT_EXCEPTION_TYPE values */ +#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F +#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B +#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 +#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 +#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 +#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 +#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 +#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A +#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B +#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 +/* End of CS_FAULT_EXCEPTION_TYPE values */ +#define CS_FAULT_EXCEPTION_DATA_SHIFT 8 +#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) + +/* CS_FATAL register */ +#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 +#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) +#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) +#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ + (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) +/* CS_FATAL_EXCEPTION_TYPE values */ +#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 +#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 +#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 +#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 +#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A +#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 +/* End of CS_FATAL_EXCEPTION_TYPE values */ +#define CS_FATAL_EXCEPTION_DATA_SHIFT 8 +#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) + +/* CS_FAULT_INFO register */ +#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 +#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ + (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) + +/* CS_FATAL_INFO register */ +#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 +#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ + (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) + +/* CS_HEAP_VT_START register */ +#define CS_HEAP_VT_START_VALUE_SHIFT 0 +#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) +#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) +#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ + (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) + +/* CS_HEAP_VT_END register */ +#define CS_HEAP_VT_END_VALUE_SHIFT 0 +#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) +#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) +#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) + +/* CS_HEAP_FRAG_END register */ +#define CS_HEAP_FRAG_END_VALUE_SHIFT 0 +#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) +#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) +#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ + (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) + +/* CS_HEAP_ADDRESS register */ +#define CS_HEAP_ADDRESS_POINTER_SHIFT 0 +#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT) +#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) +#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ + (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) +/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ + +/* CS_USER_INPUT_BLOCK register set definitions */ + +/* CS_INSERT register */ +#define CS_INSERT_VALUE_SHIFT 0 +#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT) +#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) +#define CS_INSERT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) + +/* CS_EXTRACT_INIT register */ +#define CS_EXTRACT_INIT_VALUE_SHIFT 0 +#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT) +#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) +#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ + (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) +/* End of CS_USER_INPUT_BLOCK register set definitions */ + +/* CS_USER_OUTPUT_BLOCK register set definitions */ + +/* CS_EXTRACT register */ +#define CS_EXTRACT_VALUE_SHIFT 0 +#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT) +#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) +#define CS_EXTRACT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) + +/* CS_ACTIVE register */ +#define CS_ACTIVE_HW_ACTIVE_SHIFT 0 +#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) +#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) +#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) +/* End of CS_USER_OUTPUT_BLOCK register set definitions */ + +/* CSG_INPUT_BLOCK register set definitions */ + +/* CSG_REQ register */ +#define CSG_REQ_STATE_SHIFT 0 +#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) +#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) +#define CSG_REQ_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) +/* CSG_REQ_STATE values */ +#define CSG_REQ_STATE_TERMINATE 0x0 +#define CSG_REQ_STATE_START 0x1 +#define CSG_REQ_STATE_SUSPEND 0x2 +#define CSG_REQ_STATE_RESUME 0x3 +/* End of CSG_REQ_STATE values */ +#define CSG_REQ_EP_CFG_SHIFT 4 +#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) +#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) +#define CSG_REQ_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) +#define CSG_REQ_STATUS_UPDATE_SHIFT 5 +#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) +#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) +#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ + (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) +#define CSG_REQ_SYNC_UPDATE_SHIFT 28 +#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) +#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) +#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) +#define CSG_REQ_IDLE_SHIFT 29 +#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) +#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) +#define CSG_REQ_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) +#define CSG_REQ_DOORBELL_SHIFT 30 +#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT) +#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT) +#define CSG_REQ_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK)) +#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 +#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) + +/* CSG_ACK_IRQ_MASK register */ +#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 +#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) +/* CSG_ACK_IRQ_MASK_STATE values */ +#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 +#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7 +/* End of CSG_ACK_IRQ_MASK_STATE values */ +#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 +#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) +#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) +#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) +#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 +#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) +#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) +#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) +#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 +#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) +#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) +#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) + +/* CSG_EP_REQ register */ +#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 +#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ + (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) +#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 +#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ + (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) +#define CSG_EP_REQ_TILER_EP_SHIFT 16 +#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) +#define CSG_EP_REQ_PRIORITY_SHIFT 28 +#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) + +/* CSG_SUSPEND_BUF register */ +#define CSG_SUSPEND_BUF_POINTER_SHIFT 0 +#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ + (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) + +/* CSG_PROTM_SUSPEND_BUF register */ +#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 +#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ + (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ + (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) + +/* End of CSG_INPUT_BLOCK register set definitions */ + +/* CSG_OUTPUT_BLOCK register set definitions */ + +/* CSG_ACK register */ +#define CSG_ACK_STATE_SHIFT 0 +#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) +#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) +#define CSG_ACK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) +/* CSG_ACK_STATE values */ +#define CSG_ACK_STATE_TERMINATE 0x0 +#define CSG_ACK_STATE_START 0x1 +#define CSG_ACK_STATE_SUSPEND 0x2 +#define CSG_ACK_STATE_RESUME 0x3 +/* End of CSG_ACK_STATE values */ +#define CSG_ACK_EP_CFG_SHIFT 4 +#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) +#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) +#define CSG_ACK_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) +#define CSG_ACK_STATUS_UPDATE_SHIFT 5 +#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ + (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) +#define CSG_ACK_SYNC_UPDATE_SHIFT 28 +#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) +#define CSG_ACK_IDLE_SHIFT 29 +#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) +#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) +#define CSG_ACK_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK)) +#define CSG_ACK_DOORBELL_SHIFT 30 +#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) +#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) +#define CSG_ACK_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) +#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 +#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) + +/* CSG_STATUS_EP_CURRENT register */ +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 +#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) + +/* CSG_STATUS_EP_REQ register */ +#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 +#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) +#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 +#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) +#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) +#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) + +/* End of CSG_OUTPUT_BLOCK register set definitions */ + +/* STREAM_CONTROL_BLOCK register set definitions */ + +/* STREAM_FEATURES register */ +#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0 +#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) +#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) +#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ + (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) +#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 +#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) +#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) +#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ + (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) +#define STREAM_FEATURES_COMPUTE_SHIFT 16 +#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) +#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) +#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ + (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) +#define STREAM_FEATURES_FRAGMENT_SHIFT 17 +#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) +#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) +#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ + (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) +#define STREAM_FEATURES_TILER_SHIFT 18 +#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) +#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) +#define STREAM_FEATURES_TILER_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ + (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) + +/* STREAM_INPUT_VA register */ +#define STREAM_INPUT_VA_VALUE_SHIFT 0 +#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) +#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) +#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ + (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) + +/* STREAM_OUTPUT_VA register */ +#define STREAM_OUTPUT_VA_VALUE_SHIFT 0 +#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) +#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) +#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ + (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) +/* End of STREAM_CONTROL_BLOCK register set definitions */ + +/* GLB_INPUT_BLOCK register set definitions */ + +/* GLB_REQ register */ +#define GLB_REQ_HALT_SHIFT 0 +#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT) +#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT) +#define GLB_REQ_HALT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK)) +#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1 +#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) +#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 +#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) +#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) +#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) +#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 +#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) +#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ + (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) +#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ + (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) +#define GLB_REQ_PROTM_ENTER_SHIFT 4 +#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) +#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) +#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) +#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 +#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) +#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) +#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ + (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) +#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 +#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) +#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) +#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ + (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) +#define GLB_REQ_COUNTER_ENABLE_SHIFT 7 +#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) +#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) +#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ + (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) +#define GLB_REQ_PING_SHIFT 8 +#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) +#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT) +#define GLB_REQ_PING_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ + (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ + (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ + GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ + (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ + GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) +#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 +#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) +#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) +#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ + (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) +#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 +#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) +#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) +#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ + (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) +#define GLB_REQ_INACTIVE_TILER_SHIFT 22 +#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) +#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) +#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ + (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) +#define GLB_REQ_PROTM_EXIT_SHIFT 23 +#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) +#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) +#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) +#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 +#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) +#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ + GLB_REQ_PRFCNT_THRESHOLD_SHIFT) +#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ + (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ + GLB_REQ_PRFCNT_THRESHOLD_MASK)) +#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 +#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) +#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ + GLB_REQ_PRFCNT_OVERFLOW_SHIFT) +#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ + (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ + GLB_REQ_PRFCNT_OVERFLOW_MASK)) +#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 +#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) +#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) +#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ + (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) +#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 +#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) +#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) +#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ + (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) + +/* GLB_ACK_IRQ_MASK register */ +#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 +#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) +#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) +#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) +#define GLB_ACK_IRQ_MASK_PING_SHIFT 8 +#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) +#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) +#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ + GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ + GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) + +/* GLB_PROGRESS_TIMER register */ +#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 +#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) +#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ + (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) +#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) + +/* GLB_PWROFF_TIMER register */ +#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0 +#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) +#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ + (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) +#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 +#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ + (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ + (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) +/* GLB_PWROFF_TIMER_TIMER_SOURCE values */ +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 +#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 +/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ + +/* GLB_ALLOC_EN register */ +#define GLB_ALLOC_EN_MASK_SHIFT 0 +#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT) +#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) +#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ + (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) + +/* GLB_PROTM_COHERENCY register */ +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0 +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \ + (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \ + (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \ + GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \ + (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \ + GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK)) +/* End of GLB_INPUT_BLOCK register set definitions */ + +/* GLB_OUTPUT_BLOCK register set definitions */ + +/* GLB_ACK register */ +#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1 +#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) +#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 +#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) +/* End of GLB_OUTPUT_BLOCK register set definitions */ + +/* The following register and fields are for headers before 10.x.7/11.x.4 */ +#define GLB_REQ_IDLE_ENABLE_SHIFT (10) +#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_IDLE_EVENT_SHIFT (26) +#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) +#define GLB_ACK_IDLE_ENABLE_SHIFT (10) +#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT) +#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT) +#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT) +#define GLB_ACK_IDLE_EVENT_SHIFT (26) +#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) + +#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26) +#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT) + +#define GLB_IDLE_TIMER (0x0080) +/* GLB_IDLE_TIMER register */ +#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) +#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) +#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) +#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) +#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) +#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ + (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ + (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) +/* GLB_IDLE_TIMER_TIMER_SOURCE values */ +#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 +#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 +/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ + +#define CSG_STATUS_STATE (0x0018) /* CSG state status register */ +/* CSG_STATUS_STATE register */ +#define CSG_STATUS_STATE_IDLE_SHIFT (0) +#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) +#define CSG_STATUS_STATE_IDLE_GET(reg_val) \ + (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) +#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ + (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) + +#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h new file mode 100644 index 0000000..237cc2e --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_CSF_IOCTL_H_ +#define _UAPI_KBASE_CSF_IOCTL_H_ + +#include +#include + +/* + * 1.0: + * - CSF IOCTL header separated from JM + * 1.1: + * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME + * - Add ioctl 54: This controls the priority setting. + * 1.2: + * - Add new CSF GPU_FEATURES register into the property structure + * returned by KBASE_IOCTL_GET_GPUPROPS + * 1.3: + * - Add __u32 group_uid member to + * &struct_kbase_ioctl_cs_queue_group_create.out + * 1.4: + * - Replace padding in kbase_ioctl_cs_get_glb_iface with + * instr_features member of same size + */ + +#define BASE_UK_VERSION_MAJOR 1 +#define BASE_UK_VERSION_MINOR 4 + +/** + * struct kbase_ioctl_version_check - Check version compatibility between + * kernel and userspace + * + * @major: Major version number + * @minor: Minor version number + */ +struct kbase_ioctl_version_check { + __u16 major; + __u16 minor; +}; + +#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ + _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) + + +/** + * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the + * base back-end + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + * @buffer_size: Size of the buffer in bytes + * @priority: Priority of the queue within a group when run within a process + * @padding: Currently unused, must be zero + */ +struct kbase_ioctl_cs_queue_register { + __u64 buffer_gpu_addr; + __u32 buffer_size; + __u8 priority; + __u8 padding[3]; +}; + +#define KBASE_IOCTL_CS_QUEUE_REGISTER \ + _IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register) + +/** + * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler + * to notify that a queue has been updated + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + */ +struct kbase_ioctl_cs_queue_kick { + __u64 buffer_gpu_addr; +}; + +#define KBASE_IOCTL_CS_QUEUE_KICK \ + _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick) + +/** + * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group + * + * @in: Input parameters + * @in.buffer_gpu_addr: GPU address of the buffer backing the queue + * @in.group_handle: Handle of the group to which the queue should be bound + * @in.csi_index: Index of the CSF interface the queue should be bound to + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.mmap_handle: Handle to be used for creating the mapping of CS + * input/output pages + */ +union kbase_ioctl_cs_queue_bind { + struct { + __u64 buffer_gpu_addr; + __u8 group_handle; + __u8 csi_index; + __u8 padding[6]; + } in; + struct { + __u64 mmap_handle; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_BIND \ + _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind) + +/* ioctl 40 is free to use */ + +/** + * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + */ +struct kbase_ioctl_cs_queue_terminate { + __u64 buffer_gpu_addr; +}; + +#define KBASE_IOCTL_CS_QUEUE_TERMINATE \ + _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate) + +/** + * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group + * @in: Input parameters + * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @in.compute_mask: Mask of compute endpoints the group is allowed to use. + * @in.cs_min: Minimum number of CSs required. + * @in.priority: Queue group's priority within a process. + * @in.tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @in.fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @in.compute_max: Maximum number of compute endpoints the group is allowed + * to use. + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.group_handle: Handle of a newly created queue group. + * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. + */ +union kbase_ioctl_cs_queue_group_create { + struct { + __u64 tiler_mask; + __u64 fragment_mask; + __u64 compute_mask; + __u8 cs_min; + __u8 priority; + __u8 tiler_max; + __u8 fragment_max; + __u8 compute_max; + __u8 padding[3]; + + } in; + struct { + __u8 group_handle; + __u8 padding[3]; + __u32 group_uid; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ + _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create) + +/** + * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group + * + * @group_handle: Handle of the queue group to be terminated + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_cs_queue_group_term { + __u8 group_handle; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \ + _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term) + +#define KBASE_IOCTL_CS_EVENT_SIGNAL \ + _IO(KBASE_IOCTL_TYPE, 44) + +typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */ + +/** + * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue + * + * @id: ID of the new command queue returned by the kernel + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_kcpu_queue_new { + base_kcpu_queue_id id; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_KCPU_QUEUE_CREATE \ + _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new) + +/** + * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue + * + * @id: ID of the command queue to be destroyed + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_kcpu_queue_delete { + base_kcpu_queue_id id; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_KCPU_QUEUE_DELETE \ + _IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete) + +/** + * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue + * + * @addr: Memory address of an array of struct base_kcpu_queue_command + * @nr_commands: Number of commands in the array + * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_kcpu_queue_enqueue { + __u64 addr; + __u32 nr_commands; + base_kcpu_queue_id id; + __u8 padding[3]; +}; + +#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \ + _IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue) + +/** + * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap + * @in: Input parameters + * @in.chunk_size: Size of each chunk. + * @in.initial_chunks: Initial number of chunks that heap will be created with. + * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. + * @in.target_in_flight: Number of render-passes that the driver should attempt to + * keep in flight for which allocation of new chunks is + * allowed. + * @in.group_id: Group ID to be used for physical allocations. + * @in.padding: Padding + * @out: Output parameters + * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up + * for the heap. + * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, + * actually points to the header of heap chunk and not to + * the low address of free memory in the chunk. + */ +union kbase_ioctl_cs_tiler_heap_init { + struct { + __u32 chunk_size; + __u32 initial_chunks; + __u32 max_chunks; + __u16 target_in_flight; + __u8 group_id; + __u8 padding; + } in; + struct { + __u64 gpu_heap_va; + __u64 first_chunk_va; + } out; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_INIT \ + _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) + +/** + * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap + * instance + * + * @gpu_heap_va: GPU VA of Heap context that was set up for the heap. + */ +struct kbase_ioctl_cs_tiler_heap_term { + __u64 gpu_heap_va; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_TERM \ + _IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term) + +/** + * union kbase_ioctl_cs_get_glb_iface - Request the global control block + * of CSF interface capabilities + * + * @in: Input parameters + * @in.max_group_num: The maximum number of groups to be read. Can be 0, in + * which case groups_ptr is unused. + * @in.max_total_stream _num: The maximum number of CSs to be read. Can be 0, in + * which case streams_ptr is unused. + * @in.groups_ptr: Pointer where to store all the group data (sequentially). + * @in.streams_ptr: Pointer where to store all the CS data (sequentially). + * @out: Output parameters + * @out.glb_version: Global interface version. + * @out.features: Bit mask of features (e.g. whether certain types of job + * can be suspended). + * @out.group_num: Number of CSGs supported. + * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16 + * hold the size of firmware performance counter data + * and 15:0 hold the size of hardware performance counter + * data. + * @out.total_stream_num: Total number of CSs, summed across all groups. + * @out.instr_features: Instrumentation features. Bits 7:4 hold the maximum + * size of events. Bits 3:0 hold the offset update rate. + * + */ +union kbase_ioctl_cs_get_glb_iface { + struct { + __u32 max_group_num; + __u32 max_total_stream_num; + __u64 groups_ptr; + __u64 streams_ptr; + } in; + struct { + __u32 glb_version; + __u32 features; + __u32 group_num; + __u32 prfcnt_size; + __u32 total_stream_num; + __u32 instr_features; + } out; +}; + +#define KBASE_IOCTL_CS_GET_GLB_IFACE \ + _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface) + +struct kbase_ioctl_cs_cpu_queue_info { + __u64 buffer; + __u64 size; +}; + +#define KBASE_IOCTL_VERSION_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) + +#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \ + _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info) + +/*************** + * test ioctls * + ***************/ +#if MALI_UNIT_TEST +/* These ioctls are purely for test purposes and are not used in the production + * driver, they therefore may change without notice + */ + +/** + * struct kbase_ioctl_cs_event_memory_write - Write an event memory address + * @cpu_addr: Memory address to write + * @value: Value to write + * @padding: Currently unused, must be zero + */ +struct kbase_ioctl_cs_event_memory_write { + __u64 cpu_addr; + __u8 value; + __u8 padding[7]; +}; + +/** + * union kbase_ioctl_cs_event_memory_read - Read an event memory address + * @in: Input parameters + * @in.cpu_addr: Memory address to read + * @out: Output parameters + * @out.value: Value read + * @out.padding: Currently unused, must be zero + */ +union kbase_ioctl_cs_event_memory_read { + struct { + __u64 cpu_addr; + } in; + struct { + __u8 value; + __u8 padding[7]; + } out; +}; + +#endif /* MALI_UNIT_TEST */ + +#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h new file mode 100644 index 0000000..c87154f --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ +#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ + +#include + +#if !MALI_USE_CSF && defined(__KERNEL__) +#error "Cannot be compiled with JM" +#endif + +/* IPA control registers */ + +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) +#define COMMAND 0x000 /* (WO) Command register */ +#define STATUS 0x004 /* (RO) Status register */ +#define TIMER 0x008 /* (RW) Timer control register */ + +#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ +#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ +#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ +#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ +#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ +#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ +#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ +#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ + +/* Accumulated counter values for CS hardware */ +#define VALUE_CSHW_BASE 0x100 +#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +/* Accumulated counter values for memory system */ +#define VALUE_MEMSYS_BASE 0x140 +#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#define VALUE_TILER_BASE 0x180 +#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#define VALUE_SHADER_BASE 0x1C0 +#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#include "../../csf/mali_gpu_csf_control_registers.h" + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull +/* Set to shared memory, that is inner cacheable on ACE and inner or outer + * shared, otherwise inner non-cacheable. + * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. + */ +#define AS_MEMATTR_AARCH64_SHARED 0x8ull + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 +/* Normal memory, shared between MCU and Host */ +#define AS_MEMATTR_INDEX_SHARED 6 + +/* Configuration bits for the CSF. */ +#define CSF_CONFIG 0xF00 + +/* CSF_CONFIG register */ +#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 + +/* GPU control registers */ +#define CORE_FEATURES 0x008 /* () Shader Core Features */ +#define MCU_CONTROL 0x700 +#define MCU_STATUS 0x704 + +#define MCU_CNTRL_ENABLE (1 << 0) +#define MCU_CNTRL_AUTO (1 << 1) +#define MCU_CNTRL_DISABLE (0) + +#define MCU_STATUS_HALTED (1 << 1) + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter + * configuration + */ + +#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter + * enable for CS Hardware + */ + +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores + */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ + +/* JOB IRQ flags */ +#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ + +/* GPU_COMMAND codes */ +#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ +#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */ +#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ +#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ +#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ +#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ +#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ + +/* GPU_COMMAND_RESET payloads */ + +/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. + * Power domains will remain powered on. + */ +#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 + +/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and + * idle state. + */ +#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 + +/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave + * the system bus in an inconsistent state. Use only as a last resort when nothing else works. + */ +#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 + +/* GPU_COMMAND_PRFCNT payloads */ +#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */ +#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */ + +/* GPU_COMMAND_TIME payloads */ +#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ +#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ + +/* GPU_COMMAND_FLUSH_CACHES payloads */ +#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */ +#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */ + +/* GPU_COMMAND command + payload */ +#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ + ((__u32)opcode | ((__u32)payload << 8)) + +/* Final GPU_COMMAND form */ +/* No operation, nothing happens */ +#define GPU_COMMAND_NOP \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) + +/* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_SOFT_RESET \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) + +/* Immediately reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) + +/* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_CLEAR \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR) + +/* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_PRFCNT_SAMPLE \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE) + +/* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_START \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) + +/* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) + +/* Clean all caches */ +#define GPU_COMMAND_CLEAN_CACHES \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN) + +/* Clean and invalidate all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE) + +/* Places the GPU in protected mode */ +#define GPU_COMMAND_SET_PROTECTED_MODE \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) + +/* Halt CSF */ +#define GPU_COMMAND_FINISH_HALT \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) + +/* Clear GPU faults */ +#define GPU_COMMAND_CLEAR_FAULT \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) + +/* End Command Values */ + +/* GPU_FAULTSTATUS register */ +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ + >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ + (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 +#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ + (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) + +#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 +#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ + (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) + +#define GPU_FAULTSTATUS_JASID_SHIFT 12 +#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) +#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) +#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ + (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ + (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) + +#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ + (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) +/* End GPU_FAULTSTATUS register */ + +/* GPU_FAULTSTATUS_ACCESS_TYPE values */ +#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 +#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 +#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 +#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 +/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ + +/* Implementation-dependent exception codes used to indicate CSG + * and CS errors that are not specified in the specs. + */ +#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) +#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) +#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) + +/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A +/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ + +#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) +#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ + (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ + (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ +#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ + +/* + * In Debug build, + * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ + * by writing it onto GPU_IRQ_CLEAR/MASK registers. + * + * In Release build, + * GPU_IRQ_REG_COMMON is used. + * + * Note: + * CLEAN_CACHES_COMPLETED - Used separately for cache operation. + * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON + * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ + | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) + +/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */ +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */ + +#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h new file mode 100644 index 0000000..1982668 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -0,0 +1,287 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_ +#define _UAPI_KBASE_GPU_REGMAP_JM_H_ + +#if MALI_USE_CSF && defined(__KERNEL__) +#error "Cannot be compiled with CSF" +#endif + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 + +/* GPU control registers */ + +#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest + * clean-and-invalidate operation + */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter + * configuration + */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable + * flags for Job Manager + */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores + */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +/* (RO) Extended affinity mask for job slot n*/ +#define JS_XAFFINITY 0x1C + +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +/* (RW) Next extended affinity mask for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C + +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + +/* No JM-specific MMU control registers */ +/* No JM-specific MMU address space control registers */ + +/* JS_COMMAND register commands */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU (1u << 10) +#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION +#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + +/* JS_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status instead of a particular fault */ +#define JS_STATUS_NO_EXCEPTION_BASE 0x00 +#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JS_STATUS_FAULT_BASE 0x40 +#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JS_STATUS_MEMORY_FAULT_BASE 0x60 +#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* JS_FEATURES register */ +#define JS_FEATURE_NULL_JOB (1u << 1) +#define JS_FEATURE_SET_VALUE_JOB (1u << 2) +#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JS_FEATURE_COMPUTE_JOB (1u << 4) +#define JS_FEATURE_VERTEX_JOB (1u << 5) +#define JS_FEATURE_GEOMETRY_JOB (1u << 6) +#define JS_FEATURE_TILER_JOB (1u << 7) +#define JS_FEATURE_FUSED_JOB (1u << 8) +#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE (1ul << 0) +#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) +#define JM_JOB_THROTTLE_ENABLE (1ul << 2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) +#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) +#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) + +/* GPU_COMMAND values */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +/* + * In Debug build, + * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ + * by writing it onto GPU_IRQ_CLEAR/MASK registers. + * + * In Release build, + * GPU_IRQ_REG_COMMON is used. + * + * Note: + * CLEAN_CACHES_COMPLETED - Used separately for cache operation. + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + +#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h new file mode 100644 index 0000000..98186d2 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_COHERENCY_H_ +#define _UAPI_KBASE_GPU_COHERENCY_H_ + +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 +#define COHERENCY_FEATURE_BIT(x) (1 << (x)) + +#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h new file mode 100644 index 0000000..0145920 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_ID_H_ +#define _UAPI_KBASE_GPU_ID_H_ + +#include + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +#define GPU_ID2_VERSION_STATUS_SHIFT 0 +#define GPU_ID2_VERSION_MINOR_SHIFT 4 +#define GPU_ID2_VERSION_MAJOR_SHIFT 12 +#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 +#define GPU_ID2_ARCH_REV_SHIFT 20 +#define GPU_ID2_ARCH_MINOR_SHIFT 24 +#define GPU_ID2_ARCH_MAJOR_SHIFT 28 +#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) +#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ + GPU_ID2_VERSION_MINOR | \ + GPU_ID2_VERSION_STATUS) + +/* Helper macro to create a partial GPU_ID (new format) that defines + * a product ignoring its version. + */ +#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Helper macro to create a partial GPU_ID (new format) that specifies the + * revision (major, minor, status) of a product + */ +#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ + ((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + +/* Helper macro to create a complete GPU_ID (new format) */ +#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ + version_major, version_minor, version_status) \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ + product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + version_status)) + +/* Helper macro to create a partial GPU_ID (new format) that identifies + * a particular GPU model by its arch_major and product_major. + */ +#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Strip off the non-relevant bits from a product_id value and make it suitable + * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU + * model. + */ +#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ + ((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + GPU_ID2_PRODUCT_MODEL) + +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) +#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) +#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) +#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) +#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) +#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) +#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) +#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) +#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) +#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) +#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) +#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) +#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) + +/* Helper macro to create a GPU_ID assuming valid values for id, major, + * minor, status + */ +#define GPU_ID_MAKE(id, major, minor, status) \ + ((((__u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + (((__u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + (((__u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + (((__u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) + +#endif /* _UAPI_KBASE_GPU_ID_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h new file mode 100644 index 0000000..9977212 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_H_ +#define _UAPI_KBASE_GPU_REGMAP_H_ + +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" +#if MALI_USE_CSF +#include "backend/mali_kbase_gpu_regmap_csf.h" +#else +#include "backend/mali_kbase_gpu_regmap_jm.h" +#endif + +/* Begin Register Offsets */ +/* GPU control registers */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core + * supergroup are l2 coherent + */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ +#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ +#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ +#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) +#define ASN_HASH_COUNT 3 + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +/* MMU address space control registers */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C + +/* End Register Offsets */ + +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers + */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) + +/* + * Begin MMU TRANSCFG register values + */ +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + +/* + * Begin Command Values + */ + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +/* Flush all L2 caches then issue a flush region command to all MMUs + * (deprecated - only for use with T60x) + */ +#define AS_COMMAND_FLUSH 0x04 +/* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_PT 0x04 +/* Wait for memory accesses to complete, flush all the L1s cache then flush all + * L2 caches then issue a flush region command to all MMUs + */ +#define AS_COMMAND_FLUSH_MEM 0x05 + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 + +/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +/* There is no LPAE support for non-cacheable, since the memory type is always + * write-back. + * Marking this setting as reserved for LPAE + */ +#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) + +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +#define SC_TLS_HASH_ENABLE (1ul << 17) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +#define SC_VAR_ALGORITHM (1ul << 29) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +/* End TILER_CONFIG register */ + +/* L2_CONFIG register */ +#define L2_CONFIG_SIZE_SHIFT 16 +#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +#define L2_CONFIG_HASH_SHIFT 24 +#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 +#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) +/* End L2_CONFIG register */ + +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) + +#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h new file mode 100644 index 0000000..cd81421 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -0,0 +1,1200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_JM_KERNEL_H_ +#define _UAPI_BASE_JM_KERNEL_H_ + +#include + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* IN/OUT */ +/* Should be cached on the CPU, returned if actually cached + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/** + * Bit 19 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) + +/** + * Memory starting from the end of the initial commit is aligned to 'extension' + * pages, where 'extension' must be a power of 2 and no more than + * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES + */ +#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* Use the GPU VA chosen by the kernel client */ +#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) + +/* OUT */ +/* Kernel side cache sync ops required */ +#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) + +/* Force trimming of JIT allocations when creating a new allocation */ +#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 30 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ + BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47< for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the + * initial commit is aligned to 'extension' pages, where 'extension' must be a power + * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES + */ +#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) + +/** + * If set, the heap info address points to a __u32 holding the used size in bytes; + * otherwise it points to a __u64 holding the lowest address of unused memory. + */ +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) + +/** + * Valid set of just-in-time memory allocation flags + * + * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr + * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set + * and heap_info_gpu_addr being 0 will be rejected). + */ +#define BASE_JIT_ALLOC_VALID_FLAGS \ + (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + +/** + * typedef base_context_create_flags - Flags to pass to ::base_context_init. + * + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef __u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as base_context_create_flags, and so must + * not collide with them. + */ + +/* Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ + ((base_context_create_flags)(1 << 31)) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED) +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +/* Maximum number of concurrent render passes. + */ +#define BASE_JD_RP_COUNT (256) + +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + +/** + * struct base_jd_udata - Per-job data + * + * This structure is used to store per-job data, and is completely unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + * + * @blob: per-job data array + */ +struct base_jd_udata { + __u64 blob[2]; +}; + +/** + * typedef base_jd_dep_type - Job dependency type. + * + * A flags field will be inserted into the atom structure to specify whether a + * dependency is a data or ordering dependency (by putting it before/after + * 'core_req' in the structure it should be possible to add without changing + * the structure size). + * When the flag is set for a particular dependency to signal that it is an + * ordering only dependency then errors will not be propagated. + */ +typedef __u8 base_jd_dep_type; + +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** + * typedef base_jd_core_req - Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef __u32 base_jd_core_req; + +/* Requirements that come from the HW */ + +/* No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/* Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + +/* Requires compute shaders + * + * This covers any of the following GPU job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) + +/* Requires tiling */ +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) + +/* Requires cache flushes */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) + +/* Requires value writeback */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) + +/* SW-only requirements - the HW does not expose these as part of the job slot + * capabilities + */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + +/* SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. + */ +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) + +/* SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) + +/* SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) + +/* SW Only requirement: External resources are referenced by this atom. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and + * BASE_JD_REQ_SOFT_EVENT_WAIT. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) + +/* SW Only requirement: Software defined job. Jobs with this bit set will not be + * submitted to the hardware but will cause some action to happen within the + * driver + */ +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/* 0x4 RESERVED for now */ + +/* SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/* SW only requirement: Just In Time allocation + * + * This job requests a single or multiple just-in-time allocations through a + * list of base_jit_alloc_info structure which is passed via the jc element of + * the atom. The number of base_jit_alloc_info structures present in the + * list is passed via the nr_extres element of the atom + * + * It should be noted that the id entry in base_jit_alloc_info must not + * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) + +/* SW only requirement: Just In Time free + * + * This job requests a single or multiple just-in-time allocations created by + * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time + * allocations is passed via the jc element of the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/* SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) + +/* SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) + +/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains GPU jobs of the 'Compute + * Shaders' type. + * + * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + */ +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) + +/* HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag + * takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned + * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) + +/* SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) + +/* SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) + +/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use + * if the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + +/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use + * if the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. + */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) + +/* Request the atom be executed on a specific job slot. + * + * When this flag is specified, it takes precedence over any existing job slot + * selection logic. + */ +#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) + +/* SW-only requirement: The atom is the start of a renderpass. + * + * If this bit is set then the job chain will be soft-stopped if it causes the + * GPU to write beyond the end of the physical pages backing the tiler heap, and + * committing more memory to the heap would exceed an internal threshold. It may + * be resumed after running one of the job chains attached to an atom with + * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be + * resumed multiple times until it completes without memory usage exceeding the + * threshold. + * + * Usually used with BASE_JD_REQ_T. + */ +#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) + +/* SW-only requirement: The atom is the end of a renderpass. + * + * If this bit is set then the atom incorporates the CPU address of a + * base_jd_fragment object instead of the GPU address of a job chain. + * + * Which job chain is run depends upon whether the atom with the same renderpass + * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or + * was soft-stopped when it exceeded an upper threshold for tiler heap memory + * usage. + * + * It also depends upon whether one of the job chains attached to the atom has + * already been run as part of the same renderpass (in which case it would have + * written unresolved multisampled and otherwise-discarded output to temporary + * buffers that need to be read back). The job chain for doing a forced read and + * forced write (from/to temporary buffers) is run as many times as necessary. + * + * Usually used with BASE_JD_REQ_FS. + */ +#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) + +/* SW-only requirement: The atom needs to run on a limited core mask affinity. + * + * If this bit is set then the kbase_context.limited_core_mask will be applied + * to the affinity. + */ +#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20) + +/* These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ + BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ + BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK)) + +/* Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) + +/* Returns non-zero value if core requirements passed define a soft job or + * a dependency only job. + */ +#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ + (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ + ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + +/** + * enum kbase_jd_atom_state + * + * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used. + * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD. + * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running). + * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet + * handed back to job dispatcher for + * dependency resolution. + * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed + * back to userspace. + */ +enum kbase_jd_atom_state { + KBASE_JD_ATOM_STATE_UNUSED, + KBASE_JD_ATOM_STATE_QUEUED, + KBASE_JD_ATOM_STATE_IN_JS, + KBASE_JD_ATOM_STATE_HW_COMPLETED, + KBASE_JD_ATOM_STATE_COMPLETED +}; + +/** + * typedef base_atom_id - Type big enough to store an atom number in. + */ +typedef __u8 base_atom_id; + +/** + * struct base_dependency - + * + * @atom_id: An atom number + * @dependency_type: Dependency type + */ +struct base_dependency { + base_atom_id atom_id; + base_jd_dep_type dependency_type; +}; + +/** + * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. + * + * @norm_read_norm_write: Job chain for full rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not exceed + * its memory usage threshold and no fragment job chain + * was previously run for the same renderpass. + * It is used no more than once per renderpass. + * @norm_read_forced_write: Job chain for starting incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain exceeded + * its memory usage threshold for the first time and + * no fragment job chain was previously run for the + * same renderpass. + * Writes unresolved multisampled and normally- + * discarded output to temporary buffers that must be + * read back by a subsequent forced_read job chain + * before the renderpass is complete. + * It is used no more than once per renderpass. + * @forced_read_forced_write: Job chain for continuing incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain + * exceeded its memory usage threshold again + * and a fragment job chain was previously run for + * the same renderpass. + * Reads unresolved multisampled and + * normally-discarded output from temporary buffers + * written by a previous forced_write job chain and + * writes the same to temporary buffers again. + * It is used as many times as required until + * rendering completes. + * @forced_read_norm_write: Job chain for ending incremental rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not + * exceed its memory usage threshold this time and a + * fragment job chain was previously run for the same + * renderpass. + * Reads unresolved multisampled and normally-discarded + * output from temporary buffers written by a previous + * forced_write job chain in order to complete a + * renderpass. + * It is used no more than once per renderpass. + * + * This structure is referenced by the main atom structure if + * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. + */ +struct base_jd_fragment { + __u64 norm_read_norm_write; + __u64 norm_read_forced_write; + __u64 forced_read_forced_write; + __u64 forced_read_norm_write; +}; + +/** + * typedef base_jd_prio - Base Atom priority. + * + * Only certain priority levels are actually implemented, as specified by the + * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority + * level that is not one of those defined below. + * + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. + * + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. + * + * The atoms are scheduled as follows with respect to their priorities: + * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies + * resolved, and atom 'X' has a higher priority than atom 'Y' + * * If atom 'Y' is currently running on the HW, then it is interrupted to + * allow atom 'X' to run soon after + * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing + * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' + * * Any two atoms that have the same priority could run in any order with + * respect to each other. That is, there is no ordering constraint between + * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. + */ +typedef __u8 base_jd_prio; + +/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and + * BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +/* Low atom priority. */ +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) +/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH, + * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) + +/* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting + */ +#define BASE_JD_NR_PRIO_LEVELS 4 + +/** + * struct base_jd_atom_v2 - Node of a dependency graph used to submit a + * GPU job chain or soft-job to the kernel driver. + * + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + * + * This structure has changed since UK 10.2 for which base_jd_core_req was a + * __u16 value. + * + * In UK 10.3 a core_req field of a __u32 type was added to the end of the + * structure, and the place in the structure previously occupied by __u16 + * core_req was kept but renamed to compat_core_req. + * + * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2]. + * Compatibility with UK 10.x from UK 11.y is not handled because + * the major version increase prevents this. + * + * For UK 11.20 jit_id[2] must be initialized to zero. + */ +struct base_jd_atom_v2 { + __u64 jc; + struct base_jd_udata udata; + __u64 extres_list; + __u16 nr_extres; + __u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + __u8 device_nr; + __u8 jobslot; + base_jd_core_req core_req; + __u8 renderpass_id; + __u8 padding[7]; +}; + +/** + * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr + * at the beginning. + * + * @seq_nr: Sequence number of logical grouping of atoms. + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + */ +typedef struct base_jd_atom { + __u64 seq_nr; + __u64 jc; + struct base_jd_udata udata; + __u64 extres_list; + __u16 nr_extres; + __u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + __u8 device_nr; + __u8 jobslot; + base_jd_core_req core_req; + __u8 renderpass_id; + __u8 padding[7]; +} base_jd_atom; + +/* Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */ + /* Event indicates success (SW events only) */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), + BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ + /* Mask to extract the type from an event code */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) +}; + +/** + * enum base_jd_event_code - Job chain event codes + * + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status + * codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, because the + * job was hard-stopped. + * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as + * 'previous job done'. + * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes + * TERMINATED, DONE or JOB_CANCELLED. + * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job + * was hard stopped. + * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on + * complete/fail/cancel. + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, + * because the job was hard-stopped. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status + * codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes. + * Such codes are never returned to + * user-space. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. + * @BASE_JD_EVENT_DONE: atom has completed successfull + * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which + * shall result in a failed atom + * @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the + * part of the memory system required to access + * job descriptors was not powered on + * @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job + * manager failed + * @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job + * manager failed + * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the + * specified affinity mask does not intersect + * any available cores + * @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job + * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program + * counter was executed. + * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal + * encoding was executed. + * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where + * the instruction encoding did not match the + * instruction type encoded in the program + * counter. + * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that + * contained invalid combinations of operands. + * @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried + * to access the thread local storage section + * of another thread. + * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that + * tried to do an unsupported unaligned memory + * access. + * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that + * failed to complete an instruction barrier. + * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job + * contains invalid combinations of data. + * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to + * process a tile that is entirely outside the + * bounding box of the frame. + * @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address + * has been found that exceeds the virtual + * address range. + * @BASE_JD_EVENT_OUT_OF_MEMORY: The tiler ran out of memory when executing a job. + * @BASE_JD_EVENT_UNKNOWN: If multiple jobs in a job chain fail, only + * the first one the reports an error will set + * and return full error information. + * Subsequent failing jobs will not update the + * error status registers, and may write an + * error status of UNKNOWN. + * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to + * physical memory where the original virtual + * address is no longer available. + * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache + * has detected that the same line has been + * accessed as both shareable and non-shareable + * memory from inside the GPU. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table + * entry at level 1 of the translation table. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table + * entry at level 2 of the translation table. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table + * entry at level 3 of the translation table. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table + * entry at level 4 of the translation table. + * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to + * the permission flags set in translation + * table + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading + * level 0 of the translation tables. + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading + * level 1 of the translation tables. + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading + * level 2 of the translation tables. + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading + * level 3 of the translation tables. + * @BASE_JD_EVENT_ACCESS_FLAG: Matches ACCESS_FLAG_0. A memory access hit a + * translation table entry with the ACCESS_FLAG + * bit set to zero in level 0 of the + * page table, and the DISABLE_AF_FAULT flag + * was not set. + * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to + * grow memory on demand + * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its + * dependencies failed + * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data + * in the atom which overlaps with + * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the + * platform doesn't support the feature specified in + * the atom. + * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used + * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used + * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used + * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used + * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used + * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate + * to userspace that the KBase context has been + * destroyed and Base should stop listening for + * further events + * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in + * the GPU has to be retried (but it has not + * started) due to e.g., GPU reset + * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal + * the completion of a renderpass. This value + * shouldn't be returned to userspace but I haven't + * seen where it is reset back to JD_EVENT_DONE. + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see @BASE_JD_EVENT_DONE). + * Events are usually reported as part of a &struct base_jd_event. + * + * The event codes are encoded in the following way: + * * 10:0 - subtype + * * 12:11 - type + * * 13 - SW success (only valid if the SW bit is set) + * * 14 - SW event (HW event if not set) + * * 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * * BASE_JD_EVENT_RANGE__START + * * BASE_JD_EVENT_RANGE__END + * + * code is in 's range when: + * BASE_JD_EVENT_RANGE__START <= code < + * BASE_JD_EVENT_RANGE__END + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +enum base_jd_event_code { + /* HW defined exceptions */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, + BASE_JD_EVENT_TERMINATED = 0x04, + BASE_JD_EVENT_ACTIVE = 0x08, + + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + + BASE_JD_EVENT_BAG_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +}; + +/** + * struct base_jd_event_v2 - Event reporting structure + * + * @event_code: event code. + * @atom_number: the atom number that has completed. + * @udata: user data. + * + * This structure is used by the kernel driver to report information + * about GPU events. They can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK. + */ +struct base_jd_event_v2 { + enum base_jd_event_code event_code; + base_atom_id atom_number; + struct base_jd_udata udata; +}; + +/** + * struct base_dump_cpu_gpu_counters - Structure for + * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS + * jobs. + * @system_time: gpu timestamp + * @cycle_counter: gpu cycle count + * @sec: cpu time(sec) + * @usec: cpu time(usec) + * @padding: padding + * + * This structure is stored into the memory pointed to by the @jc field + * of &struct base_jd_atom. + * + * It must not occupy the same CPU cache line(s) as any neighboring data. + * This is to avoid cases where access to pages containing the structure + * is shared between cached and un-cached memory regions, which would + * cause memory corruption. + */ + +struct base_dump_cpu_gpu_counters { + __u64 system_time; + __u64 cycle_counter; + __u64 sec; + __u32 usec; + __u8 padding[36]; +}; + +#endif /* _UAPI_BASE_JM_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h new file mode 100644 index 0000000..1eb6bcb --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_JM_IOCTL_H_ +#define _UAPI_KBASE_JM_IOCTL_H_ + +#include +#include + +/* + * 11.1: + * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags + * 11.2: + * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, + * which some user-side clients prior to 11.2 might fault if they received + * them + * 11.3: + * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and + * KBASE_IOCTL_STICKY_RESOURCE_UNMAP + * 11.4: + * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET + * 11.5: + * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) + * 11.6: + * - Added flags field to base_jit_alloc_info structure, which can be used to + * specify pseudo chunked tiler alignment for JIT allocations. + * 11.7: + * - Removed UMP support + * 11.8: + * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags + * 11.9: + * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY + * under base_mem_alloc_flags + * 11.10: + * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for + * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations + * with one softjob. + * 11.11: + * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags + * 11.12: + * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT + * 11.14: + * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set + * under base_mem_alloc_flags + * 11.15: + * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. + * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be + * passed to mmap(). + * 11.16: + * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. + * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for + * dma-buf. Now, buffers are mapped on GPU when first imported, no longer + * requiring external resource or sticky resource tracking. UNLESS, + * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. + * 11.17: + * - Added BASE_JD_REQ_JOB_SLOT. + * - Reused padding field in base_jd_atom_v2 to pass job slot number. + * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO + * 11.18: + * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags + * 11.19: + * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified. + * 11.20: + * - Added new phys_pages member to kbase_ioctl_mem_jit_init for + * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2 + * (replacing '_OLD') and _11_5 suffixes + * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in + * base_jd_atom_v2. It must currently be initialized to zero. + * - Added heap_info_gpu_addr to base_jit_alloc_info, and + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's + * flags member. Previous variants of this structure are kept and given _10_2 + * and _11_5 suffixes. + * - The above changes are checked for safe values in usual builds + * 11.21: + * - v2.0 of mali_trace debugfs file, which now versions the file separately + * 11.22: + * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2. + * KBASE_IOCTL_JOB_SUBMIT supports both in parallel. + * 11.23: + * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify + * the physical memory backing of JIT allocations. This was not supposed + * to be a valid use case, but it was allowed by the previous implementation. + * 11.24: + * - Added a sysfs file 'serialize_jobs' inside a new sub-directory + * 'scheduling'. + * 11.25: + * - Enabled JIT pressure limit in base/kbase by default + * 11.26 + * - Added kinstr_jm API + * 11.27 + * - Backwards compatible extension to HWC ioctl. + * 11.28: + * - Added kernel side cache ops needed hint + * 11.29: + * - Reserve ioctl 52 + * 11.30: + * - Add a new priority level BASE_JD_PRIO_REALTIME + * - Add ioctl 54: This controls the priority setting. + * 11.31: + * - Added BASE_JD_REQ_LIMITED_CORE_MASK. + * - Added ioctl 55: set_limited_core_count. + */ +#define BASE_UK_VERSION_MAJOR 11 +#define BASE_UK_VERSION_MINOR 31 + +/** + * struct kbase_ioctl_version_check - Check version compatibility between + * kernel and userspace + * + * @major: Major version number + * @minor: Minor version number + */ +struct kbase_ioctl_version_check { + __u16 major; + __u16 minor; +}; + +#define KBASE_IOCTL_VERSION_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) + + +/** + * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel + * + * @addr: Memory address of an array of struct base_jd_atom_v2 or v3 + * @nr_atoms: Number of entries in the array + * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom) + */ +struct kbase_ioctl_job_submit { + __u64 addr; + __u32 nr_atoms; + __u32 stride; +}; + +#define KBASE_IOCTL_JOB_SUBMIT \ + _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) + +#define KBASE_IOCTL_POST_TERM \ + _IO(KBASE_IOCTL_TYPE, 4) + +/** + * struct kbase_ioctl_soft_event_update - Update the status of a soft-event + * @event: GPU address of the event which has been updated + * @new_status: The new status to set + * @flags: Flags for future expansion + */ +struct kbase_ioctl_soft_event_update { + __u64 event; + __u32 new_status; + __u32 flags; +}; + +#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ + _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + +/** + * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for + * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the + * kernel + * + * @size: The size of the `struct kbase_kinstr_jm_atom_state_change` + * @version: Represents a breaking change in the + * `struct kbase_kinstr_jm_atom_state_change` + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + * + * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the + * end of the structure that older user space might not understand. If the + * `version` is the same, the structure is still compatible with newer kernels. + * The `size` can be used to cast the opaque memory returned from the kernel. + */ +struct kbase_kinstr_jm_fd_out { + __u16 size; + __u8 version; + __u8 padding[5]; +}; + +/** + * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor + * + * @count: Number of atom states that can be stored in the kernel circular + * buffer. Must be a power of two + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + */ +struct kbase_kinstr_jm_fd_in { + __u16 count; + __u8 padding[6]; +}; + +union kbase_kinstr_jm_fd { + struct kbase_kinstr_jm_fd_in in; + struct kbase_kinstr_jm_fd_out out; +}; + +#define KBASE_IOCTL_KINSTR_JM_FD \ + _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) + + +#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ + _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) + +#endif /* _UAPI_KBASE_JM_IOCTL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h new file mode 100644 index 0000000..554c5a3 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -0,0 +1,826 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Base structures shared with the kernel. + */ + +#ifndef _UAPI_BASE_KERNEL_H_ +#define _UAPI_BASE_KERNEL_H_ + +#include + +struct base_mem_handle { + struct { + __u64 handle; + } basep; +}; + +#include "mali_base_mem_priv.h" +#include "gpu/mali_kbase_gpu_id.h" +#include "gpu/mali_kbase_gpu_coherency.h" + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +#define BASE_MAX_COHERENT_GROUPS 16 + +#if defined(CDBG_ASSERT) +#define LOCAL_ASSERT CDBG_ASSERT +#elif defined(KBASE_DEBUG_ASSERT) +#define LOCAL_ASSERT KBASE_DEBUG_ASSERT +#else +#if defined(__KERNEL__) +#error assert macro not defined! +#else +#define LOCAL_ASSERT(...) ((void)#__VA_ARGS__) +#endif +#endif + +#if defined(PAGE_MASK) && defined(PAGE_SHIFT) +#define LOCAL_PAGE_SHIFT PAGE_SHIFT +#define LOCAL_PAGE_LSB ~PAGE_MASK +#else +#ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12 +#endif + +#if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2) +#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) +#else +#error Failed to find page size +#endif +#endif + +/* Physical memory group ID for normal usage. + */ +#define BASE_MEM_GROUP_DEFAULT (0) + +/* Number of physical memory groups. + */ +#define BASE_MEM_GROUP_COUNT (16) + +/** + * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. + * + * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator + * in order to determine the best cache policy. Some combinations are + * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), + * which defines a write-only region on the CPU side, which is + * heavily read by the CPU... + * Other flags are only meaningful to a particular allocator. + * More flags can be added to this list, as long as they don't clash + * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). + */ +typedef __u32 base_mem_alloc_flags; + +/* A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. + */ +#define BASE_MEM_FLAGS_MODIFIABLE \ + (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ + BASE_MEM_COHERENT_LOCAL) + +/* A mask of all the flags that can be returned via the base_mem_get_flags() + * interface. + */ +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ + BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ + BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ + BASEP_MEM_FLAGS_KERNEL_ONLY)) + +/** + * enum base_mem_import_type - Memory types supported by @a base_mem_import + * + * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type + * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) + * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a + * base_mem_import_user_buffer + * + * Each type defines what the supported handle type is. + * + * If any new type is added here ARM must be contacted + * to allocate a numeric value for it. + * Do not just add a new type without synchronizing with ARM + * as future releases from ARM might include other new types + * which could clash with your custom types. + */ +enum base_mem_import_type { + BASE_MEM_IMPORT_TYPE_INVALID = 0, + /* + * Import type with value 1 is deprecated. + */ + BASE_MEM_IMPORT_TYPE_UMM = 2, + BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 +}; + +/** + * struct base_mem_import_user_buffer - Handle of an imported user buffer + * + * @ptr: address of imported user buffer + * @length: length of imported user buffer in bytes + * + * This structure is used to represent a handle of an imported user buffer. + */ + +struct base_mem_import_user_buffer { + __u64 ptr; + __u64 length; +}; + +/* Mask to detect 4GB boundary alignment */ +#define BASE_MEM_MASK_4GB 0xfffff000UL +/* Mask to detect 4GB boundary (in page units) alignment */ +#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) + +/* Limit on the 'extension' parameter for an allocation with the + * BASE_MEM_TILER_ALIGN_TOP flag set + * + * This is the same as the maximum limit for a Buffer Descriptor's chunk size + */ +#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ + (21u - (LOCAL_PAGE_SHIFT)) +#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ + (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) + +/* Bit mask of cookies used for for memory allocation setup */ +#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ + +/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ +#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ + +/* + * struct base_fence - Cross-device synchronisation fence. + * + * A fence is used to signal when the GPU has finished accessing a resource that + * may be shared with other devices, and also to delay work done asynchronously + * by the GPU until other devices have finished accessing a shared resource. + */ +struct base_fence { + struct { + int fd; + int stream_fd; + } basep; +}; + +/** + * struct base_mem_aliasing_info - Memory aliasing info + * + * Describes a memory handle to be aliased. + * A subset of the handle can be chosen for aliasing, given an offset and a + * length. + * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a + * region where a special page is mapped with a write-alloc cache setup, + * typically used when the write result of the GPU isn't needed, but the GPU + * must write anyway. + * + * Offset and length are specified in pages. + * Offset must be within the size of the handle. + * Offset+length must not overrun the size of the handle. + * + * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset: Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. + */ +struct base_mem_aliasing_info { + struct base_mem_handle handle; + __u64 offset; + __u64 length; +}; + +/* Maximum percentage of just-in-time memory allocation trimming to perform + * on free. + */ +#define BASE_JIT_MAX_TRIM_LEVEL (100) + +/* Maximum number of concurrent just-in-time memory allocations. + */ +#define BASE_JIT_ALLOC_COUNT (255) + +/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 + * + * jit_version is 1 + * + * Due to the lack of padding specified, user clients between 32 and 64-bit + * may have assumed a different size of the struct + * + * An array of structures was not supported + */ +struct base_jit_alloc_info_10_2 { + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; +}; + +/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up + * to 11.19 + * + * This structure had a number of modifications during and after kernel driver + * version 11.5, but remains size-compatible throughout its version history, and + * with earlier variants compatible with future variants by requiring + * zero-initialization to the unused space in the structure. + * + * jit_version is 2 + * + * Kernel driver version history: + * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes + * must be zero. Kbase minor version was not incremented, so some + * versions of 11.5 do not have this change. + * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase + * minor version not incremented) + * 11.6: Added 'flags', replacing 1 padding byte + * 11.10: Arrays of this structure are supported + */ +struct base_jit_alloc_info_11_5 { + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; + __u8 bin_id; + __u8 max_allocations; + __u8 flags; + __u8 padding[2]; + __u16 usage_id; +}; + +/** + * struct base_jit_alloc_info - Structure which describes a JIT allocation + * request. + * @gpu_alloc_addr: The GPU virtual address to write the JIT + * allocated GPU virtual address to. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extension: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * Zero is not a valid value. + * @bin_id: The JIT allocation bin, used in conjunction with + * @max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocations: The maximum number of allocations allowed within + * the bin specified by @bin_id. Should be the same + * for all allocations within the same bin. + * @flags: flags specifying the special requirements for + * the JIT allocation, see + * %BASE_JIT_ALLOC_VALID_FLAGS + * @padding: Expansion space - should be initialised to zero + * @usage_id: A hint about which allocation should be reused. + * The kernel should attempt to use a previous + * allocation with the same usage_id + * @heap_info_gpu_addr: Pointer to an object in GPU memory describing + * the actual usage of the region. + * + * jit_version is 3. + * + * When modifications are made to this structure, it is still compatible with + * jit_version 3 when: a) the size is unchanged, and b) new members only + * replace the padding bytes. + * + * Previous jit_version history: + * jit_version == 1, refer to &base_jit_alloc_info_10_2 + * jit_version == 2, refer to &base_jit_alloc_info_11_5 + * + * Kbase version history: + * 11.20: added @heap_info_gpu_addr + */ +struct base_jit_alloc_info { + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; + __u8 bin_id; + __u8 max_allocations; + __u8 flags; + __u8 padding[2]; + __u16 usage_id; + __u64 heap_info_gpu_addr; +}; + +enum base_external_resource_access { + BASE_EXT_RES_ACCESS_SHARED, + BASE_EXT_RES_ACCESS_EXCLUSIVE +}; + +struct base_external_resource { + __u64 ext_resource; +}; + + +/** + * The maximum number of external resources which can be mapped/unmapped + * in a single request. + */ +#define BASE_EXT_RES_COUNT_MAX 10 + +/** + * struct base_external_resource_list - Structure which describes a list of + * external resources. + * @count: The number of resources. + * @ext_res: Array of external resources which is + * sized at allocation time. + */ +struct base_external_resource_list { + __u64 count; + struct base_external_resource ext_res[1]; +}; + +struct base_jd_debug_copy_buffer { + __u64 address; + __u64 size; + struct base_external_resource extres; +}; + +#define GPU_MAX_JOB_SLOTS 16 + +/** + * User-side Base GPU Property Queries + * + * The User-side Base GPU Property Query interface encapsulates two + * sub-modules: + * + * - "Dynamic GPU Properties" + * - "Base Platform Config GPU Properties" + * + * Base only deals with properties that vary between different GPU + * implementations - the Dynamic GPU properties and the Platform Config + * properties. + * + * For properties that are constant for the GPU Architecture, refer to the + * GPU module. However, we will discuss their relevance here just to + * provide background information. + * + * About the GPU Properties in Base and GPU modules + * + * The compile-time properties (Platform Config, GPU Compile-time + * properties) are exposed as pre-processor macros. + * + * Complementing the compile-time properties are the Dynamic GPU + * Properties, which act as a conduit for the GPU Configuration + * Discovery. + * + * In general, the dynamic properties are present to verify that the platform + * has been configured correctly with the right set of Platform Config + * Compile-time Properties. + * + * As a consistent guide across the entire DDK, the choice for dynamic or + * compile-time should consider the following, in order: + * 1. Can the code be written so that it doesn't need to know the + * implementation limits at all? + * 2. If you need the limits, get the information from the Dynamic Property + * lookup. This should be done once as you fetch the context, and then cached + * as part of the context data structure, so it's cheap to access. + * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or GPU Compile-time + * property). Examples of where this might be sensible follow: + * - Part of a critical inner-loop + * - Frequent re-use throughout the driver, causing significant extra load + * instructions or control flow that would be worthwhile optimizing out. + * + * We cannot provide an exhaustive set of examples, neither can we provide a + * rule for every possible situation. Use common sense, and think about: what + * the rest of the driver will be doing; how the compiler might represent the + * value if it is a compile-time constant; whether an OEM shipping multiple + * devices would benefit much more from a single DDK binary, instead of + * insignificant micro-optimizations. + * + * Dynamic GPU Properties + * + * Dynamic GPU properties are presented in two sets: + * 1. the commonly used properties in @ref base_gpu_props, which have been + * unpacked from GPU register bitfields. + * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props + * (also a member of base_gpu_props). All of these are presented in + * the packed form, as presented by the GPU registers themselves. + * + * The raw properties in gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + * The properties returned extend the GPU Configuration Discovery + * registers. For example, GPU clock speed is not specified in the GPU + * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. + * + * The GPU properties are obtained by a call to + * base_get_gpu_props(). This simply returns a pointer to a const + * base_gpu_props structure. It is constant for the life of a base + * context. Multiple calls to base_get_gpu_props() to a base context + * return the same pointer to a constant structure. This avoids cache pollution + * of the common data. + * + * This pointer must not be freed, because it does not point to the start of a + * region allocated by the memory allocator; instead, just close the @ref + * base_context. + * + * + * Kernel Operation + * + * During Base Context Create time, user-side makes a single kernel call: + * - A call to fill user memory with GPU information structures + * + * The kernel-side will fill the provided the entire processed base_gpu_props + * structure, because this information is required in both + * user and kernel side; it does not make sense to decode it twice. + * + * Coherency groups must be derived from the bitmasks, but this can be done + * kernel side, and just once at kernel startup: Coherency groups must already + * be known kernel-side, to support chains that specify a 'Only Coherent Group' + * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. + * + * Coherency Group calculation + * + * Creation of the coherent group data is done at device-driver startup, and so + * is one-time. This will most likely involve a loop with CLZ, shifting, and + * bit clearing on the L2_PRESENT mask, depending on whether the + * system is L2 Coherent. The number of shader cores is done by a + * population count, since faulty cores may be disabled during production, + * producing a non-contiguous mask. + * + * The memory requirements for this algorithm can be determined either by a __u64 + * population count on the L2_PRESENT mask (a LUT helper already is + * required for the above), or simple assumption that there can be no more than + * 16 coherent groups, since core groups are typically 4 cores. + */ + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +#define BASE_MAX_COHERENT_GROUPS 16 +/** + * struct mali_base_gpu_core_props - GPU core props info + * @product_id: Pro specific value. + * @version_status: Status of the GPU release. No defined values, but starts at + * 0 and increases by one for each release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" + * release number. + * 8 bit values (0-255). + * @major_revision: Major release number of the GPU. "R" part of an "RnPn" + * release number. + * 4 bit values (0-15). + * @padding: padding to allign to 8-byte + * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + * @log2_program_counter_size: Size of the shader program counter, in bits. + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This + * is a bitpattern where a set bit indicates that the format is supported. + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. + * It is unlikely that a client will be able to allocate all of this memory + * for their own purposes, but this at least provides an upper bound on the + * memory available to the GPU. + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + * @num_exec_engines: The number of execution engines. + */ +struct mali_base_gpu_core_props { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; + __u8 num_exec_engines; +}; + +/* + * More information is possible - but associativity and bus width are not + * required by upper-level apis. + */ +struct mali_base_gpu_l2_cache_props { + __u8 log2_line_size; + __u8 log2_cache_size; + __u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ + __u8 padding[5]; +}; + +struct mali_base_gpu_tiler_props { + __u32 bin_size_bytes; /* Max is 4*2^15 */ + __u32 max_active_levels; /* Max is 2^15 */ +}; + +/** + * struct mali_base_gpu_thread_props - GPU threading system details. + * @max_threads: Max. number of threads per core + * @max_workgroup_size: Max. number of threads per workgroup + * @max_barrier_size: Max. number of threads that can synchronize on a + * simple barrier + * @max_registers: Total size [1..65535] of the register file available + * per core. + * @max_task_queue: Max. tasks [1..255] which may be sent to a core + * before it becomes blocked. + * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split + * field. + * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, + * 3 = SW Model/Emulation + * @padding: padding to allign to 8-byte + * @tls_alloc: Number of threads per core that TLS must be + * allocated for + */ +struct mali_base_gpu_thread_props { + __u32 max_threads; + __u32 max_workgroup_size; + __u32 max_barrier_size; + __u16 max_registers; + __u8 max_task_queue; + __u8 max_thread_group_split; + __u8 impl_tech; + __u8 padding[3]; + __u32 tls_alloc; +}; + +/** + * struct mali_base_gpu_coherent_group - descriptor for a coherent group + * @core_mask: Core restriction mask required for the group + * @num_cores: Number of cores in the group + * @padding: padding to allign to 8-byte + * + * \c core_mask exposes all cores in that coherent group, and \c num_cores + * provides a cached population-count for that mask. + * + * @note Whilst all cores are exposed in the mask, not all may be available to + * the application, depending on the Kernel Power policy. + * + * @note if u64s must be 8-byte aligned, then this structure has 32-bits of + * wastage. + */ +struct mali_base_gpu_coherent_group { + __u64 core_mask; + __u16 num_cores; + __u16 padding[3]; +}; + +/** + * struct mali_base_gpu_coherent_group_info - Coherency group information + * @num_groups: Number of coherent groups in the GPU. + * @num_core_groups: Number of core groups (coherent or not) in the GPU. + * Equivalent to the number of L2 Caches. + * The GPU Counter dumping writes 2048 bytes per core group, regardless + * of whether the core groups are coherent or not. Hence this member is + * needed to calculate how much memory is required for dumping. + * @note Do not use it to work out how many valid elements are in the + * group[] member. Use num_groups instead. + * @coherency: Coherency features of the memory, accessed by gpu_mem_features + * methods + * @padding: padding to allign to 8-byte + * @group: Descriptors of coherent groups + * + * Note that the sizes of the members could be reduced. However, the \c group + * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte + * aligned, thus leading to wastage if the other members sizes were reduced. + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ +struct mali_base_gpu_coherent_group_info { + __u32 num_groups; + __u32 num_core_groups; + __u32 coherency; + __u32 padding; + struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; +}; + +/** + * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware + * Configuration Discovery registers. + * @shader_present: Shader core present bitmap + * @tiler_present: Tiler core present bitmap + * @l2_present: Level 2 cache present bitmap + * @stack_present: Core stack present bitmap + * @l2_features: L2 features + * @core_features: Core features + * @mem_features: Mem features + * @mmu_features: Mmu features + * @as_present: Bitmap of address spaces present + * @js_present: Job slots present + * @js_features: Array of job slot features. + * @tiler_features: Tiler features + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU + * @gpu_id: GPU and revision identifier + * @thread_max_threads: Maximum number of threads per core + * @thread_max_workgroup_size: Maximum number of threads per workgroup + * @thread_max_barrier_size: Maximum number of threads per barrier + * @thread_features: Thread features + * @coherency_mode: Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register + * @thread_tls_alloc: Number of threads per core that TLS must be allocated for + * @gpu_features: GPU features + * + * The information is presented inefficiently for access. For frequent access, + * the values should be better expressed in an unpacked form in the + * base_gpu_props structure. + * + * The raw properties in gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + */ +struct gpu_raw_gpu_props { + __u64 shader_present; + __u64 tiler_present; + __u64 l2_present; + __u64 stack_present; + __u32 l2_features; + __u32 core_features; + __u32 mem_features; + __u32 mmu_features; + + __u32 as_present; + + __u32 js_present; + __u32 js_features[GPU_MAX_JOB_SLOTS]; + __u32 tiler_features; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + __u32 gpu_id; + + __u32 thread_max_threads; + __u32 thread_max_workgroup_size; + __u32 thread_max_barrier_size; + __u32 thread_features; + + /* + * Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register. + */ + __u32 coherency_mode; + + __u32 thread_tls_alloc; + __u64 gpu_features; +}; + +/** + * struct base_gpu_props - Return structure for base_get_gpu_props(). + * @core_props: Core props. + * @l2_props: L2 props. + * @unused_1: Keep for backwards compatibility. + * @tiler_props: Tiler props. + * @thread_props: Thread props. + * @raw_props: This member is large, likely to be 128 bytes. + * @coherency_info: This must be last member of the structure. + * + * NOTE: the raw_props member in this data structure contains the register + * values from which the value of the other members are derived. The derived + * members exist to allow for efficient access and/or shielding the details + * of the layout of the registers. + * */ +struct base_gpu_props { + struct mali_base_gpu_core_props core_props; + struct mali_base_gpu_l2_cache_props l2_props; + __u64 unused_1; + struct mali_base_gpu_tiler_props tiler_props; + struct mali_base_gpu_thread_props thread_props; + struct gpu_raw_gpu_props raw_props; + struct mali_base_gpu_coherent_group_info coherency_info; +}; + +#if MALI_USE_CSF +#include "csf/mali_base_csf_kernel.h" +#else +#include "jm/mali_base_jm_kernel.h" +#endif + +/** + * base_mem_group_id_get() - Get group ID from flags + * @flags: Flags to pass to base_mem_alloc + * + * This inline function extracts the encoded group ID from flags + * and converts it into numeric value (0~15). + * + * Return: group ID(0~15) extracted from the parameter + */ +static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags) +{ + LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); + return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> + BASEP_MEM_GROUP_ID_SHIFT); +} + +/** + * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags + * @id: group ID(0~15) you want to encode + * + * This inline function encodes specific group ID into base_mem_alloc_flags. + * Parameter 'id' should lie in-between 0 to 15. + * + * Return: base_mem_alloc_flags with the group ID (id) encoded + * + * The return value can be combined with other flags against base_mem_alloc + * to identify a specific memory group. + */ +static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id) +{ + if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { + /* Set to default value when id is out of range. */ + id = BASE_MEM_GROUP_DEFAULT; + } + + return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & + BASE_MEM_GROUP_ID_MASK; +} + +/** + * base_context_mmu_group_id_set - Encode a memory group ID in + * base_context_create_flags + * + * Memory allocated for GPU page tables will come from the specified group. + * + * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1). + * + * Return: Bitmask of flags to pass to base_context_init. + */ +static __inline__ base_context_create_flags base_context_mmu_group_id_set( + int const group_id) +{ + LOCAL_ASSERT(group_id >= 0); + LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT); + return BASEP_CONTEXT_MMU_GROUP_ID_MASK & + ((base_context_create_flags)group_id << + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); +} + +/** + * base_context_mmu_group_id_get - Decode a memory group ID from + * base_context_create_flags + * + * Memory allocated for GPU page tables will come from the returned group. + * + * @flags: Bitmask of flags to pass to base_context_init. + * + * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). + */ +static __inline__ int base_context_mmu_group_id_get( + base_context_create_flags const flags) +{ + LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); + return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); +} + +/* + * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These + * flags are also used, where applicable, for specifying which fields + * are valid following the request operation. + */ + +/* For monotonic (counter) timefield */ +#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) +/* For system wide timestamp */ +#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) +/* For GPU cycle counter */ +#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) +/* Specify kernel GPU register timestamp */ +#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) +/* Specify userspace cntvct_el0 timestamp source */ +#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) + +#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ + BASE_TIMEINFO_MONOTONIC_FLAG | \ + BASE_TIMEINFO_TIMESTAMP_FLAG | \ + BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ + BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ + BASE_TIMEINFO_USER_SOURCE_FLAG) + +/* Maximum number of source allocations allowed to create an alias allocation. + * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array + * layers, since each cube map in the array will have 6 faces. + */ +#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576) + +#endif /* _UAPI_BASE_KERNEL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h new file mode 100644 index 0000000..982bd3d --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_MEM_PRIV_H_ +#define _UAPI_BASE_MEM_PRIV_H_ + +#include + +#include "mali_base_kernel.h" + +#define BASE_SYNCSET_OP_MSYNC (1U << 0) +#define BASE_SYNCSET_OP_CSYNC (1U << 1) + +/* + * This structure describe a basic memory coherency operation. + * It can either be: + * @li a sync from CPU to Memory: + * - type = ::BASE_SYNCSET_OP_MSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes + * - offset is ignored. + * @li a sync from Memory to CPU: + * - type = ::BASE_SYNCSET_OP_CSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes. + * - offset is ignored. + */ +struct basep_syncset { + struct base_mem_handle mem_handle; + __u64 user_addr; + __u64 size; + __u8 type; + __u8 padding[7]; +}; + +#endif /* _UAPI_BASE_MEM_PRIV_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h new file mode 100644 index 0000000..615dbb0 --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_HWCNT_READER_H_ +#define _UAPI_KBASE_HWCNT_READER_H_ + +#include +#include + +/* The ids of ioctl commands. */ +#define KBASE_HWCNT_READER 0xBE +#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32) +#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32) +#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32) +#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32) +#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32) +#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32) +#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32) +#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32) +#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ + _IOW(KBASE_HWCNT_READER, 0xFF, \ + struct kbase_hwcnt_reader_api_version) + +/** + * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles + * @top: the number of cycles associated with the main clock for the + * GPU + * @shader_cores: the cycles that have elapsed on the GPU shader cores + */ +struct kbase_hwcnt_reader_metadata_cycles { + __u64 top; + __u64 shader_cores; +}; + +/** + * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata + * @timestamp: time when sample was collected + * @event_id: id of an event that triggered sample collection + * @buffer_idx: position in sampling area where sample buffer was stored + * @cycles: the GPU cycles that occurred since the last sample + */ +struct kbase_hwcnt_reader_metadata { + __u64 timestamp; + __u32 event_id; + __u32 buffer_idx; + struct kbase_hwcnt_reader_metadata_cycles cycles; +}; + +/** + * enum base_hwcnt_reader_event - hwcnt dumping events + * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump + * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump + * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request + * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request + * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events + */ +enum base_hwcnt_reader_event { + BASE_HWCNT_READER_EVENT_MANUAL, + BASE_HWCNT_READER_EVENT_PERIODIC, + BASE_HWCNT_READER_EVENT_PREJOB, + BASE_HWCNT_READER_EVENT_POSTJOB, + BASE_HWCNT_READER_EVENT_COUNT +}; + +#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) +/** + * struct kbase_hwcnt_reader_api_version - hwcnt reader API version + * @version: API version + * @features: available features in this API version + */ +struct kbase_hwcnt_reader_api_version { + __u32 version; + __u32 features; +}; + +#endif /* _UAPI_KBASE_HWCNT_READER_H_ */ + diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h new file mode 100644 index 0000000..5ca528a --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -0,0 +1,836 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_IOCTL_H_ +#define _UAPI_KBASE_IOCTL_H_ + +#ifdef __cpluscplus +extern "C" { +#endif + +#include +#include + +#if MALI_USE_CSF +#include "csf/mali_kbase_csf_ioctl.h" +#else +#include "jm/mali_kbase_jm_ioctl.h" +#endif /* MALI_USE_CSF */ + +#define KBASE_IOCTL_TYPE 0x80 + +/** + * struct kbase_ioctl_set_flags - Set kernel context creation flags + * + * @create_flags: Flags - see base_context_create_flags + */ +struct kbase_ioctl_set_flags { + __u32 create_flags; +}; + +#define KBASE_IOCTL_SET_FLAGS \ + _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) + +/** + * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel + * + * @buffer: Pointer to the buffer to store properties into + * @size: Size of the buffer + * @flags: Flags - must be zero for now + * + * The ioctl will return the number of bytes stored into @buffer or an error + * on failure (e.g. @size is too small). If @size is specified as 0 then no + * data will be written but the return value will be the number of bytes needed + * for all the properties. + * + * @flags may be used in the future to request a different format for the + * buffer. With @flags == 0 the following format is used. + * + * The buffer will be filled with pairs of values, a __u32 key identifying the + * property followed by the value. The size of the value is identified using + * the bottom bits of the key. The value then immediately followed the key and + * is tightly packed (there is no padding). All keys and values are + * little-endian. + * + * 00 = __u8 + * 01 = __u16 + * 10 = __u32 + * 11 = __u64 + */ +struct kbase_ioctl_get_gpuprops { + __u64 buffer; + __u32 size; + __u32 flags; +}; + +#define KBASE_IOCTL_GET_GPUPROPS \ + _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) + +/** + * union kbase_ioctl_mem_alloc - Allocate memory on the GPU + * @in: Input parameters + * @in.va_pages: The number of pages of virtual address space to reserve + * @in.commit_pages: The number of physical pages to allocate + * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region + * @in.flags: Flags + * @out: Output parameters + * @out.flags: Flags + * @out.gpu_va: The GPU virtual address which is allocated + */ +union kbase_ioctl_mem_alloc { + struct { + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u64 flags; + } in; + struct { + __u64 flags; + __u64 gpu_va; + } out; +}; + +#define KBASE_IOCTL_MEM_ALLOC \ + _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) + +/** + * struct kbase_ioctl_mem_query - Query properties of a GPU memory region + * @in: Input parameters + * @in.gpu_addr: A GPU address contained within the region + * @in.query: The type of query + * @out: Output parameters + * @out.value: The result of the query + * + * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. + */ +union kbase_ioctl_mem_query { + struct { + __u64 gpu_addr; + __u64 query; + } in; + struct { + __u64 value; + } out; +}; + +#define KBASE_IOCTL_MEM_QUERY \ + _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) + +#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) +#define KBASE_MEM_QUERY_FLAGS ((__u64)3) + +/** + * struct kbase_ioctl_mem_free - Free a memory region + * @gpu_addr: Handle to the region to free + */ +struct kbase_ioctl_mem_free { + __u64 gpu_addr; +}; + +#define KBASE_IOCTL_MEM_FREE \ + _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) + +/** + * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * @buffer_count: requested number of dumping buffers + * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + * + * A fd is returned from the ioctl if successful, or a negative value on error + */ +struct kbase_ioctl_hwcnt_reader_setup { + __u32 buffer_count; + __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +}; + +#define KBASE_IOCTL_HWCNT_READER_SETUP \ + _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) + +/** + * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection + * @dump_buffer: GPU address to write counters to + * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + */ +struct kbase_ioctl_hwcnt_enable { + __u64 dump_buffer; + __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +}; + +#define KBASE_IOCTL_HWCNT_ENABLE \ + _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) + +#define KBASE_IOCTL_HWCNT_DUMP \ + _IO(KBASE_IOCTL_TYPE, 10) + +#define KBASE_IOCTL_HWCNT_CLEAR \ + _IO(KBASE_IOCTL_TYPE, 11) + +/** + * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. + * @data: Counter samples for the dummy model. + * @size: Size of the counter sample data. + * @padding: Padding. + */ +struct kbase_ioctl_hwcnt_values { + __u64 data; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_HWCNT_SET \ + _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) + +/** + * struct kbase_ioctl_disjoint_query - Query the disjoint counter + * @counter: A counter of disjoint events in the kernel + */ +struct kbase_ioctl_disjoint_query { + __u32 counter; +}; + +#define KBASE_IOCTL_DISJOINT_QUERY \ + _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) + +/** + * struct kbase_ioctl_get_ddk_version - Query the kernel version + * @version_buffer: Buffer to receive the kernel version string + * @size: Size of the buffer + * @padding: Padding + * + * The ioctl will return the number of bytes written into version_buffer + * (which includes a NULL byte) or a negative error code + * + * The ioctl request code has to be _IOW because the data in ioctl struct is + * being copied to the kernel, even though the kernel then writes out the + * version info to the buffer specified in the ioctl. + */ +struct kbase_ioctl_get_ddk_version { + __u64 version_buffer; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_GET_DDK_VERSION \ + _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) + +/** + * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 10.2--11.4) + * @va_pages: Number of VA pages to reserve for JIT + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_10_2 { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) + +/** + * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 11.5--11.19) + * @va_pages: Number of VA pages to reserve for JIT + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_11_5 { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) + +/** + * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory + * allocator + * @va_pages: Number of GPU virtual address pages to reserve for just-in-time + * memory allocations + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * @phys_pages: Maximum number of physical pages to allocate just-in-time + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + */ +struct kbase_ioctl_mem_jit_init { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; + __u64 phys_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) + +/** + * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory + * + * @handle: GPU memory handle (GPU VA) + * @user_addr: The address where it is mapped in user space + * @size: The number of bytes to synchronise + * @type: The direction to synchronise: 0 is sync to memory (clean), + * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_mem_sync { + __u64 handle; + __u64 user_addr; + __u64 size; + __u8 type; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_MEM_SYNC \ + _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) + +/** + * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer + * + * @in: Input parameters + * @in.gpu_addr: The GPU address of the memory region + * @in.cpu_addr: The CPU address to locate + * @in.size: A size in bytes to validate is contained within the region + * @out: Output parameters + * @out.offset: The offset from the start of the memory region to @cpu_addr + */ +union kbase_ioctl_mem_find_cpu_offset { + struct { + __u64 gpu_addr; + __u64 cpu_addr; + __u64 size; + } in; + struct { + __u64 offset; + } out; +}; + +#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) + +/** + * struct kbase_ioctl_get_context_id - Get the kernel context ID + * + * @id: The kernel context ID + */ +struct kbase_ioctl_get_context_id { + __u32 id; +}; + +#define KBASE_IOCTL_GET_CONTEXT_ID \ + _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) + +/** + * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd + * + * @flags: Flags + * + * The ioctl returns a file descriptor when successful + */ +struct kbase_ioctl_tlstream_acquire { + __u32 flags; +}; + +#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ + _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) + +#define KBASE_IOCTL_TLSTREAM_FLUSH \ + _IO(KBASE_IOCTL_TYPE, 19) + +/** + * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region + * + * @gpu_addr: The memory region to modify + * @pages: The number of physical pages that should be present + * + * The ioctl may return on the following error codes or 0 for success: + * -ENOMEM: Out of memory + * -EINVAL: Invalid arguments + */ +struct kbase_ioctl_mem_commit { + __u64 gpu_addr; + __u64 pages; +}; + +#define KBASE_IOCTL_MEM_COMMIT \ + _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) + +/** + * union kbase_ioctl_mem_alias - Create an alias of memory regions + * @in: Input parameters + * @in.flags: Flags, see BASE_MEM_xxx + * @in.stride: Bytes between start of each memory region + * @in.nents: The number of regions to pack together into the alias + * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info + * @out: Output parameters + * @out.flags: Flags, see BASE_MEM_xxx + * @out.gpu_va: Address of the new alias + * @out.va_pages: Size of the new alias + */ +union kbase_ioctl_mem_alias { + struct { + __u64 flags; + __u64 stride; + __u64 nents; + __u64 aliasing_info; + } in; + struct { + __u64 flags; + __u64 gpu_va; + __u64 va_pages; + } out; +}; + +#define KBASE_IOCTL_MEM_ALIAS \ + _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) + +/** + * union kbase_ioctl_mem_import - Import memory for use by the GPU + * @in: Input parameters + * @in.flags: Flags, see BASE_MEM_xxx + * @in.phandle: Handle to the external memory + * @in.type: Type of external memory, see base_mem_import_type + * @in.padding: Amount of extra VA pages to append to the imported buffer + * @out: Output parameters + * @out.flags: Flags, see BASE_MEM_xxx + * @out.gpu_va: Address of the new alias + * @out.va_pages: Size of the new alias + */ +union kbase_ioctl_mem_import { + struct { + __u64 flags; + __u64 phandle; + __u32 type; + __u32 padding; + } in; + struct { + __u64 flags; + __u64 gpu_va; + __u64 va_pages; + } out; +}; + +#define KBASE_IOCTL_MEM_IMPORT \ + _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) + +/** + * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region + * @gpu_va: The GPU region to modify + * @flags: The new flags to set + * @mask: Mask of the flags to modify + */ +struct kbase_ioctl_mem_flags_change { + __u64 gpu_va; + __u64 flags; + __u64 mask; +}; + +#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ + _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) + +/** + * struct kbase_ioctl_stream_create - Create a synchronisation stream + * @name: A name to identify this stream. Must be NULL-terminated. + * + * Note that this is also called a "timeline", but is named stream to avoid + * confusion with other uses of the word. + * + * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. + * + * The ioctl returns a file descriptor. + */ +struct kbase_ioctl_stream_create { + char name[32]; +}; + +#define KBASE_IOCTL_STREAM_CREATE \ + _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) + +/** + * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence + * @fd: The file descriptor to validate + */ +struct kbase_ioctl_fence_validate { + int fd; +}; + +#define KBASE_IOCTL_FENCE_VALIDATE \ + _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) + +/** + * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel + * @buffer: Pointer to the information + * @len: Length + * @padding: Padding + * + * The data provided is accessible through a debugfs file + */ +struct kbase_ioctl_mem_profile_add { + __u64 buffer; + __u32 len; + __u32 padding; +}; + +#define KBASE_IOCTL_MEM_PROFILE_ADD \ + _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) + +/** + * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource + * @count: Number of resources + * @address: Array of __u64 GPU addresses of the external resources to map + */ +struct kbase_ioctl_sticky_resource_map { + __u64 count; + __u64 address; +}; + +#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ + _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) + +/** + * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was + * previously permanently mapped + * @count: Number of resources + * @address: Array of __u64 GPU addresses of the external resources to unmap + */ +struct kbase_ioctl_sticky_resource_unmap { + __u64 count; + __u64 address; +}; + +#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ + _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) + +/** + * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of + * the GPU memory region for + * the given gpu address and + * the offset of that address + * into the region + * @in: Input parameters + * @in.gpu_addr: GPU virtual address + * @in.size: Size in bytes within the region + * @out: Output parameters + * @out.start: Address of the beginning of the memory region enclosing @gpu_addr + * for the length of @offset bytes + * @out.offset: The offset from the start of the memory region to @gpu_addr + */ +union kbase_ioctl_mem_find_gpu_start_and_offset { + struct { + __u64 gpu_addr; + __u64 size; + } in; + struct { + __u64 start; + __u64 offset; + } out; +}; + +#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) + +#define KBASE_IOCTL_CINSTR_GWT_START \ + _IO(KBASE_IOCTL_TYPE, 33) + +#define KBASE_IOCTL_CINSTR_GWT_STOP \ + _IO(KBASE_IOCTL_TYPE, 34) + +/** + * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. + * @in: Input parameters + * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas. + * @in.size_buffer: Address of buffer to hold size of modified areas (in pages) + * @in.len: Number of addresses the buffers can hold. + * @in.padding: padding + * @out: Output parameters + * @out.no_of_addr_collected: Number of addresses collected into addr_buffer. + * @out.more_data_available: Status indicating if more addresses are available. + * @out.padding: padding + * + * This structure is used when performing a call to dump GPU write fault + * addresses. + */ +union kbase_ioctl_cinstr_gwt_dump { + struct { + __u64 addr_buffer; + __u64 size_buffer; + __u32 len; + __u32 padding; + + } in; + struct { + __u32 no_of_addr_collected; + __u8 more_data_available; + __u8 padding[27]; + } out; +}; + +#define KBASE_IOCTL_CINSTR_GWT_DUMP \ + _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) + +/** + * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone + * + * @va_pages: Number of VA pages to reserve for EXEC_VA + */ +struct kbase_ioctl_mem_exec_init { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_EXEC_INIT \ + _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) + +/** + * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of + * cpu/gpu time (counter values) + * @in: Input parameters + * @in.request_flags: Bit-flags indicating the requested types. + * @in.paddings: Unused, size alignment matching the out. + * @out: Output parameters + * @out.sec: Integer field of the monotonic time, unit in seconds. + * @out.nsec: Fractional sec of the monotonic time, in nano-seconds. + * @out.padding: Unused, for __u64 alignment + * @out.timestamp: System wide timestamp (counter) value. + * @out.cycle_counter: GPU cycle counter value. + */ +union kbase_ioctl_get_cpu_gpu_timeinfo { + struct { + __u32 request_flags; + __u32 paddings[7]; + } in; + struct { + __u64 sec; + __u32 nsec; + __u32 padding; + __u64 timestamp; + __u64 cycle_counter; + } out; +}; + +#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ + _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) + +/** + * struct kbase_ioctl_context_priority_check - Check the max possible priority + * @priority: Input priority & output priority + */ + +struct kbase_ioctl_context_priority_check { + __u8 priority; +}; + +#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) + +/** + * struct kbase_ioctl_set_limited_core_count - Set the limited core count. + * + * @max_core_count: Maximum core count + */ +struct kbase_ioctl_set_limited_core_count { + __u8 max_core_count; +}; + +#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ + _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) + + +/*************** + * test ioctls * + ***************/ +#if MALI_UNIT_TEST +/* These ioctls are purely for test purposes and are not used in the production + * driver, they therefore may change without notice + */ + +#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) + + +/** + * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes + * @bytes_collected: number of bytes read by user + * @bytes_generated: number of bytes generated by tracepoints + */ +struct kbase_ioctl_tlstream_stats { + __u32 bytes_collected; + __u32 bytes_generated; +}; + +#define KBASE_IOCTL_TLSTREAM_STATS \ + _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) + +#endif /* MALI_UNIT_TEST */ + +/* Customer extension range */ +#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) + +/* If the integration needs extra ioctl add them there + * like this: + * + * struct my_ioctl_args { + * .... + * } + * + * #define KBASE_IOCTL_MY_IOCTL \ + * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) + */ + + +/********************************** + * Definitions for GPU properties * + **********************************/ +#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) +#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) +#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) +#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) + +#define KBASE_GPUPROP_PRODUCT_ID 1 +#define KBASE_GPUPROP_VERSION_STATUS 2 +#define KBASE_GPUPROP_MINOR_REVISION 3 +#define KBASE_GPUPROP_MAJOR_REVISION 4 +/* 5 previously used for GPU speed */ +#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 +/* 7 previously used for minimum GPU speed */ +#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 +#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 +#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 +#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 +#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 + +#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 +#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 +#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 + +#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 +#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 + +#define KBASE_GPUPROP_MAX_THREADS 18 +#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 +#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 +#define KBASE_GPUPROP_MAX_REGISTERS 21 +#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 +#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 +#define KBASE_GPUPROP_IMPL_TECH 24 + +#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 +#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 +#define KBASE_GPUPROP_RAW_L2_PRESENT 27 +#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 +#define KBASE_GPUPROP_RAW_L2_FEATURES 29 +#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 +#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 +#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 +#define KBASE_GPUPROP_RAW_AS_PRESENT 33 +#define KBASE_GPUPROP_RAW_JS_PRESENT 34 +#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 +#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 +#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 +#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 +#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 +#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 +#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 +#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 +#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 +#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 +#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 +#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 +#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 +#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 +#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 +#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 +#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 +#define KBASE_GPUPROP_RAW_GPU_ID 55 +#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 +#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 +#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 +#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 +#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 + +#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 +#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 +#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 +#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 +#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 +#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 +#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 +#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 +#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 +#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 +#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 +#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 +#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 +#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 +#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 +#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 +#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 +#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 +#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 + +#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 + +#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 + +#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 +#define KBASE_GPUPROP_TLS_ALLOC 84 +#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 +#ifdef __cpluscplus +} +#endif + +#endif /* _UAPI_KBASE_IOCTL_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h new file mode 100644 index 0000000..cb782bd --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * mali_kbase_kinstr_jm_reader.h + * Provides an ioctl API to read kernel atom state changes. The flow of the + * API is: + * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD`` + * 2. Determine the buffer structure layout via the above ioctl's returned + * size and version fields in ``struct kbase_kinstr_jm_fd_out`` + * 4. Poll the file descriptor for ``POLLIN`` + * 5. Get data with read() on the fd + * 6. Use the structure version to understand how to read the data from the + * buffer + * 7. Repeat 4-6 + * 8. Close the file descriptor + */ + +#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_ +#define _UAPI_KBASE_KINSTR_JM_READER_H_ + +/** + * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has + * entered a hardware queue + * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has + * completed on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations + * + * We can add new states to the end of this if they do not break the existing + * state machine. Old user mode code can gracefully ignore states they do not + * understand. + * + * If we need to make a breaking change to the state machine, we can do that by + * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will + * mean that old user mode code will fail to understand the new state field in + * the structure and gracefully not use the state change API. + */ +enum kbase_kinstr_jm_reader_atom_state { + KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE, + KBASE_KINSTR_JM_READER_ATOM_STATE_START, + KBASE_KINSTR_JM_READER_ATOM_STATE_STOP, + KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE, + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT +}; + +#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/mali_uk.h b/common/include/uapi/gpu/arm/midgard/mali_uk.h new file mode 100644 index 0000000..81cbb9e --- /dev/null +++ b/common/include/uapi/gpu/arm/midgard/mali_uk.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * Types and definitions that are common across OSs for both the user + * and kernel side of the User-Kernel interface. + */ + +#ifndef _UAPI_UK_H_ +#define _UAPI_UK_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * DOC: uk_api User-Kernel Interface API + * + * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. + * + * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent + * kernel-side API (UKK) via an OS-specific communication mechanism. + * + * This API is internal to the Midgard DDK and is not exposed to any applications. + * + */ + +/** + * enum uk_client_id - These are identifiers for kernel-side drivers + * implementing a UK interface, aka UKK clients. + * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client. + * @UK_CLIENT_COUNT: The number of uk clients supported. This must be + * the last member of the enum + * + * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this + * identifier to select a UKK client to the uku_open() function. + * + * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id + * enumeration and the uku_open() implemenation for the various OS ports need to be updated to + * provide a mapping of the identifier to the OS specific device name. + * + */ +enum uk_client_id { + UK_CLIENT_MALI_T600_BASE, + UK_CLIENT_COUNT +}; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _UAPI_UK_H_ */ -- cgit v1.2.3