Mali Valhall DDK r31p0 KMD

Provenance: 2ea0ef9bd (collaborate/EAC/v_r31p0) VX504X08X-BU-00000-r31p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r31p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r31p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r31p0-01eac0 - Valhall Android Renderscript AOSP parts Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Ide9d5fdc6d9c95fa66a3546b01f619b43c09496d
author: Sidath Senanayake <sidaths@google.com> 2021-06-15 13:39:30 +0100
committer: Sidath Senanayake <sidaths@google.com> 2021-06-15 14:11:16 +0100
commit: fca8613cfcf585bf9113dca96a05daea9fd89794 (patch)
tree: f2baa14910f83edf00450bc30d3703eb255a0bba /common
parent: 8037b534570814775d79aeddd06b76e5ee941f59 (diff)
download: gpu-fca8613cfcf585bf9113dca96a05daea9fd89794.tar.gz
19 files changed, 7240 insertions, 1 deletions
diff --git a/common/include/linux/priority_control_manager.h b/common/include/linux/priority_control_manager.h
index d3e22f5..df3b3cd 100644
--- a/common/include/linux/priority_control_manager.h
+++ b/common/include/linux/priority_control_manager.h
@@ -53,7 +53,7 @@ struct priority_control_manager_ops {
 	 * Return: The priority that would actually be given, could be lower than requested_priority
 	 */
 	int (*pcm_scheduler_priority_check)(
-		struct priority_control_manager_device *mgm_dev,
+		struct priority_control_manager_device *pcm_dev,
 		struct task_struct *task, int requested_priority);
 };
 
@@ -62,6 +62,7 @@ struct priority_control_manager_ops {
  *                                          control manager
  *
  * @ops:   Callbacks associated with this device
+ * @data:  Pointer to device private data
  * @owner: Pointer to the module owner
  *
  * This structure should be registered with the platform device using
@@ -69,6 +70,7 @@ struct priority_control_manager_ops {
  */
 struct priority_control_manager_device {
 	struct priority_control_manager_ops ops;
+	void *data;
 	struct module *owner;
 };
 
diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
new file mode 100644
index 0000000..61da071
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_DUMMY_H_
+#define _UAPI_KBASE_MODEL_DUMMY_H_
+
+#include <linux/types.h>
+
+#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE      (60)
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE  \
+		(64*KBASE_DUMMY_MODEL_COUNTER_PER_CORE)
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT      (4)
+#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
+	(enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
+
+#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60
+#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK                                     \
+	(KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK +                                \
+	 KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_BLOCK_SIZE                                           \
+	(KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32))
+#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS      8
+#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES       32
+#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS    \
+	(1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES)
+#define KBASE_DUMMY_MODEL_COUNTER_TOTAL          \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE)
+
+#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
new file mode 100644
index 0000000..7fa874b
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -0,0 +1,765 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_CSF_KERNEL_H_
+#define _UAPI_BASE_CSF_KERNEL_H_
+
+#include <linux/types.h>
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
+#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* IN/OUT */
+/* Should be cached on the CPU, returned if actually cached
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the alloc
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Protected memory
+ */
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/* CSF event memory
+ *
+ * If Outer shareable coherence is not specified or not available, then on
+ * allocation kbase will automatically use the uncached GPU mapping.
+ * There is no need for the client to specify BASE_MEM_UNCACHED_GPU
+ * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag.
+ *
+ * This memory requires a permanent mapping
+ *
+ * See also kbase_reg_needs_kernel_mapping()
+ */
+#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19)
+
+#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
+ * mode. Some components within the GPU might only be able to access memory
+ * that is GPU cacheable. Refer to the specific GPU implementation for more
+ * details. The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK \
+	((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/* Must do CPU cache maintenance when imported memory is mapped/unmapped
+ * on GPU. Currently applicable to dma-buf type only.
+ */
+#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
+
+/* OUT */
+/* Kernel side cache sync ops required */
+#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 29
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASEP_MEM_FLAGS_KERNEL_ONLY \
+	(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED \
+	BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE     (47ul  << 12)
+#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE     (48ul  << 12)
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \
+	((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \
+	 LOCAL_PAGE_SHIFT)
+
+/**
+ * Valid set of just-in-time memory allocation flags
+ */
+#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0)
+
+/* Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef __u32 base_context_create_flags;
+
+/* No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/* Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/* Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
+
+/* Base context creates a CSF event notification thread.
+ *
+ * The creation of a CSF event notification thread is conditional but
+ * mandatory for the handling of CSF events.
+ */
+#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2)
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
+	 BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(BASE_CONTEXT_CCTX_EMBEDDED | \
+	 BASE_CONTEXT_CSF_EVENT_THREAD | \
+	 BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
+ */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact.
+ */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+/* Enable KBase tracepoints for CSF builds */
+#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2)
+
+/* Enable additional CSF Firmware side tracepoints */
+#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED | \
+		BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \
+		BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)
+
+/* Number of pages mapped into the process address space for a bound GPU
+ * command queue. A pair of input/output pages and a Hw doorbell page
+ * are mapped to enable direct submission of commands to Hw.
+ */
+#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3)
+
+#define BASE_QUEUE_MAX_PRIORITY (15U)
+
+/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */
+#define BASEP_EVENT_VAL_INDEX (0U)
+#define BASEP_EVENT_ERR_INDEX (1U)
+
+/* The upper limit for number of objects that could be waited/set per command.
+ * This limit is now enforced as internally the error inherit inputs are
+ * converted to 32-bit flags in a __u32 variable occupying a previously padding
+ * field.
+ */
+#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
+
+#if MALI_UNIT_TEST
+/**
+ * enum base_kcpu_command_type - Kernel CPU queue command type.
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:         fence_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT:           cqs_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET:            cqs_set,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:  cqs_set_operation,
+ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:         map_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:       unmap_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:          jit_alloc,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE:           jit_free,
+ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:      group_suspend,
+ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:      error_barrier,
+ * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME:        sample_time,
+ */
+enum base_kcpu_command_type {
+	BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
+	BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
+	BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
+	BASE_KCPU_COMMAND_TYPE_JIT_FREE,
+	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
+	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
+	BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME,
+};
+#else
+/**
+ * enum base_kcpu_command_type - Kernel CPU queue command type.
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:         fence_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT:           cqs_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET:            cqs_set,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:  cqs_set_operation,
+ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:         map_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:       unmap_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:          jit_alloc,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE:           jit_free,
+ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:      group_suspend,
+ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:      error_barrier,
+ */
+enum base_kcpu_command_type {
+	BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
+	BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
+	BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
+	BASE_KCPU_COMMAND_TYPE_JIT_FREE,
+	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
+	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
+};
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * enum base_queue_group_priority - Priority of a GPU Command Queue Group.
+ * @BASE_QUEUE_GROUP_PRIORITY_HIGH:     GPU Command Queue Group is of high
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM:   GPU Command Queue Group is of medium
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_LOW:      GPU Command Queue Group is of low
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_COUNT:    Number of GPU Command Queue Group
+ *                                      priority levels.
+ *
+ * Currently this is in order of highest to lowest, but if new levels are added
+ * then those new levels may be out of order to preserve the ABI compatibility
+ * with previous releases. At that point, ensure assignment to
+ * the 'priority' member in &kbase_queue_group is updated to ensure it remains
+ * a linear ordering.
+ *
+ * There should be no gaps in the enum, otherwise use of
+ * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated.
+ */
+enum base_queue_group_priority {
+	BASE_QUEUE_GROUP_PRIORITY_HIGH = 0,
+	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+	BASE_QUEUE_GROUP_PRIORITY_LOW,
+	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
+	BASE_QUEUE_GROUP_PRIORITY_COUNT
+};
+
+struct base_kcpu_command_fence_info {
+	__u64 fence;
+};
+
+struct base_cqs_wait_info {
+	__u64 addr;
+	__u32 val;
+	__u32 padding;
+};
+
+struct base_kcpu_command_cqs_wait_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+struct base_cqs_set {
+	__u64 addr;
+};
+
+struct base_kcpu_command_cqs_set_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * basep_cqs_data_type - Enumeration of CQS Data Types
+ *
+ * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value
+ *                           is an unsigned 32-bit integer
+ * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value
+ *                           is an unsigned 64-bit integer
+ */
+typedef enum PACKED {
+	BASEP_CQS_DATA_TYPE_U32 = 0,
+	BASEP_CQS_DATA_TYPE_U64 = 1,
+} basep_cqs_data_type;
+
+/**
+ * basep_cqs_wait_operation_op - Enumeration of CQS Object Wait
+ *                                Operation conditions
+ *
+ * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Less than or Equal to
+ *                                the Wait Operation value
+ * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Greater than the Wait Operation value
+ */
+typedef enum {
+	BASEP_CQS_WAIT_OPERATION_LE = 0,
+	BASEP_CQS_WAIT_OPERATION_GT = 1,
+} basep_cqs_wait_operation_op;
+
+struct base_cqs_wait_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information
+ *		about the Timeline CQS wait objects
+ *
+ * @objs:              An array of Timeline CQS waits.
+ * @nr_objs:           Number of Timeline CQS waits in the array.
+ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field
+ *                     to be served as the source for importing into the
+ *                     queue's error-state.
+ */
+struct base_kcpu_command_cqs_wait_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+/**
+ * basep_cqs_set_operation_op - Enumeration of CQS Set Operations
+ *
+ * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value
+ *                                to a synchronization object
+ * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value
+ *                                of a synchronization object
+ */
+typedef enum {
+	BASEP_CQS_SET_OPERATION_ADD = 0,
+	BASEP_CQS_SET_OPERATION_SET = 1,
+} basep_cqs_set_operation_op;
+
+struct base_cqs_set_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_set_operation_info - structure which contains information
+ *		about the Timeline CQS set objects
+ *
+ * @objs:    An array of Timeline CQS sets.
+ * @nr_objs: Number of Timeline CQS sets in the array.
+ * @padding: Structure padding, unused bytes.
+ */
+struct base_kcpu_command_cqs_set_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * struct base_kcpu_command_import_info - structure which contains information
+ *		about the imported buffer.
+ *
+ * @handle:	Address of imported user buffer.
+ */
+struct base_kcpu_command_import_info {
+	__u64 handle;
+};
+
+/**
+ * struct base_kcpu_command_jit_alloc_info - structure which contains
+ *		information about jit memory allocation.
+ *
+ * @info:	An array of elements of the
+ *		struct base_jit_alloc_info type.
+ * @count:	The number of elements in the info array.
+ * @padding:	Padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_jit_alloc_info {
+	__u64 info;
+	__u8 count;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_kcpu_command_jit_free_info - structure which contains
+ *		information about jit memory which is to be freed.
+ *
+ * @ids:	An array containing the JIT IDs to free.
+ * @count:	The number of elements in the ids array.
+ * @padding:	Padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_jit_free_info {
+	__u64 ids;
+	__u8 count;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_kcpu_command_group_suspend_info - structure which contains
+ *		suspend buffer data captured for a suspended queue group.
+ *
+ * @buffer:		Pointer to an array of elements of the type char.
+ * @size:		Number of elements in the @buffer array.
+ * @group_handle:	Handle to the mapping of CSG.
+ * @padding:		padding to a multiple of 64 bits.
+ */
+struct base_kcpu_command_group_suspend_info {
+	__u64 buffer;
+	__u32 size;
+	__u8 group_handle;
+	__u8 padding[3];
+};
+
+#if MALI_UNIT_TEST
+struct base_kcpu_command_sample_time_info {
+	__u64 time;
+};
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * struct base_kcpu_command - kcpu command.
+ * @type:	type of the kcpu command, one enum base_kcpu_command_type
+ * @padding:	padding to a multiple of 64 bits
+ * @info:	structure which contains information about the kcpu command;
+ *		actual type is determined by @p type
+ * @info.fence:            Fence
+ * @info.cqs_wait:         CQS wait
+ * @info.cqs_set:          CQS set
+ * @info.import:           import
+ * @info.jit_alloc:        jit allocation
+ * @info.jit_free:         jit deallocation
+ * @info.suspend_buf_copy: suspend buffer copy
+ * @info.sample_time:      sample time
+ * @info.padding:          padding
+ */
+struct base_kcpu_command {
+	__u8 type;
+	__u8 padding[sizeof(__u64) - sizeof(__u8)];
+	union {
+		struct base_kcpu_command_fence_info fence;
+		struct base_kcpu_command_cqs_wait_info cqs_wait;
+		struct base_kcpu_command_cqs_set_info cqs_set;
+		struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation;
+		struct base_kcpu_command_cqs_set_operation_info cqs_set_operation;
+		struct base_kcpu_command_import_info import;
+		struct base_kcpu_command_jit_alloc_info jit_alloc;
+		struct base_kcpu_command_jit_free_info jit_free;
+		struct base_kcpu_command_group_suspend_info suspend_buf_copy;
+#if MALI_UNIT_TEST
+		struct base_kcpu_command_sample_time_info sample_time;
+#endif /* MALI_UNIT_TEST */
+		__u64 padding[2]; /* No sub-struct should be larger */
+	} info;
+};
+
+/**
+ * struct basep_cs_stream_control - CSI capabilities.
+ *
+ * @features: Features of this stream
+ * @padding:  Padding to a multiple of 64 bits.
+ */
+struct basep_cs_stream_control {
+	__u32 features;
+	__u32 padding;
+};
+
+/**
+ * struct basep_cs_group_control - CSG interface capabilities.
+ *
+ * @features:     Features of this group
+ * @stream_num:   Number of streams in this group
+ * @suspend_size: Size in bytes of the suspend buffer for this group
+ * @padding:      Padding to a multiple of 64 bits.
+ */
+struct basep_cs_group_control {
+	__u32 features;
+	__u32 stream_num;
+	__u32 suspend_size;
+	__u32 padding;
+};
+
+/**
+ * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault
+ *        error information associated with GPU command queue group.
+ *
+ * @sideband:     Additional information of the unrecoverable fault.
+ * @status:       Unrecoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                CS_INVALID_INSTRUCTION (0x49).
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_group_error_fatal_payload {
+	__u64 sideband;
+	__u32 status;
+	__u32 padding;
+};
+
+/**
+ * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault
+ *        error information related to GPU command queue.
+ *
+ * @sideband:     Additional information about this unrecoverable fault.
+ * @status:       Unrecoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                CS_INVALID_INSTRUCTION (0x49).
+ * @csi_index:    Index of the CSF interface the queue is bound to.
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_error_fatal_payload {
+	__u64 sideband;
+	__u32 status;
+	__u8 csi_index;
+	__u8 padding[3];
+};
+
+/**
+ * enum base_gpu_queue_group_error_type - GPU Fatal error type.
+ *
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL:       Fatal error associated with GPU
+ *                                          command queue group.
+ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU
+ *                                          command queue.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT:     Fatal error associated with
+ *                                          progress timeout.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out
+ *                                             of tiler heap memory.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types
+ *
+ * This type is used for &struct_base_gpu_queue_group_error.error_type.
+ */
+enum base_gpu_queue_group_error_type {
+	BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0,
+	BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+	BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
+	BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
+	BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT
+};
+
+/**
+ * struct base_gpu_queue_group_error - Unrecoverable fault information
+ * @error_type:          Error type of @base_gpu_queue_group_error_type
+ *                       indicating which field in union payload is filled
+ * @padding:             Unused bytes for 64bit boundary
+ * @payload:             Input Payload
+ * @payload.fatal_group: Unrecoverable fault error associated with
+ *                       GPU command queue group
+ * @payload.fatal_queue: Unrecoverable fault error associated with command queue
+ */
+struct base_gpu_queue_group_error {
+	__u8 error_type;
+	__u8 padding[7];
+	union {
+		struct base_gpu_queue_group_error_fatal_payload fatal_group;
+		struct base_gpu_queue_error_fatal_payload fatal_queue;
+	} payload;
+};
+
+/**
+ * enum base_csf_notification_type - Notification type
+ *
+ * @BASE_CSF_NOTIFICATION_EVENT:                 Notification with kernel event
+ * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal
+ *                                               error
+ * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP:        Notification with dumping cpu
+ *                                               queue
+ * @BASE_CSF_NOTIFICATION_COUNT:                 The number of notification type
+ *
+ * This type is used for &struct_base_csf_notification.type.
+ */
+enum base_csf_notification_type {
+	BASE_CSF_NOTIFICATION_EVENT = 0,
+	BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+	BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP,
+	BASE_CSF_NOTIFICATION_COUNT
+};
+
+/**
+ * struct base_csf_notification - Event or error notification
+ *
+ * @type:                      Notification type of @base_csf_notification_type
+ * @padding:                   Padding for 64bit boundary
+ * @payload:                   Input Payload
+ * @payload.align:             To fit the struct into a 64-byte cache line
+ * @payload.csg_error:         CSG error
+ * @payload.csg_error.handle:  Handle of GPU command queue group associated with
+ *                             fatal error
+ * @payload.csg_error.padding: Padding
+ * @payload.csg_error.error:   Unrecoverable fault error
+ *
+ */
+struct base_csf_notification {
+	__u8 type;
+	__u8 padding[7];
+	union {
+		struct {
+			__u8 handle;
+			__u8 padding[7];
+			struct base_gpu_queue_group_error error;
+		} csg_error;
+
+		__u8 align[56];
+	} payload;
+};
+
+#endif /* _UAPI_BASE_CSF_KERNEL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h
new file mode 100644
index 0000000..570cba8
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
+#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
+
+/* GPU_REGISTERS register offsets */
+#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */
+
+#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
new file mode 100644
index 0000000..f233a0d
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h
@@ -0,0 +1,1414 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _UAPI_GPU_CSF_REGISTERS_H_
+#define _UAPI_GPU_CSF_REGISTERS_H_
+
+/*
+ * Begin register sets
+ */
+
+/* DOORBELLS base address */
+#define DOORBELLS_BASE 0x0080000
+#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
+
+/* CS_KERNEL_INPUT_BLOCK base address */
+#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
+#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
+
+/* CS_KERNEL_OUTPUT_BLOCK base address */
+#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000
+#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r))
+
+/* CS_USER_INPUT_BLOCK base address */
+#define CS_USER_INPUT_BLOCK_BASE 0x0000
+#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r))
+
+/* CS_USER_OUTPUT_BLOCK base address */
+#define CS_USER_OUTPUT_BLOCK_BASE 0x0000
+#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r))
+
+/* CSG_INPUT_BLOCK base address */
+#define CSG_INPUT_BLOCK_BASE 0x0000
+#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r))
+
+/* CSG_OUTPUT_BLOCK base address */
+#define CSG_OUTPUT_BLOCK_BASE 0x0000
+#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r))
+
+/* GLB_CONTROL_BLOCK base address */
+#define GLB_CONTROL_BLOCK_BASE 0x04000000
+#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r))
+
+/* GLB_INPUT_BLOCK base address */
+#define GLB_INPUT_BLOCK_BASE 0x0000
+#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r))
+
+/* GLB_OUTPUT_BLOCK base address */
+#define GLB_OUTPUT_BLOCK_BASE 0x0000
+#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
+
+/* USER base address */
+#define USER_BASE 0x0010000
+#define USER_REG(r) (USER_BASE + (r))
+
+/* End register sets */
+
+/*
+ * Begin register offsets
+ */
+
+/* DOORBELLS register offsets */
+#define DOORBELL_0 0x0000 /* () Doorbell 0 register */
+#define DOORBELL(n) (DOORBELL_0 + (n)*65536)
+#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r))
+#define DOORBELL_COUNT 1024
+
+/* DOORBELL_BLOCK register offsets */
+#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */
+
+/* CS_KERNEL_INPUT_BLOCK register offsets */
+#define CS_REQ 0x0000 /* () CS request flags */
+#define CS_CONFIG 0x0004 /* () CS configuration */
+#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */
+#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */
+#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */
+#define CS_SIZE 0x0018 /* () Size of the ring buffer */
+#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */
+#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */
+#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */
+#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */
+#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */
+#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */
+#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */
+#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */
+
+/* CS_KERNEL_OUTPUT_BLOCK register offsets */
+#define CS_ACK 0x0000 /* () CS acknowledge flags */
+#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */
+#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */
+#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */
+#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */
+#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */
+#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */
+#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */
+#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */
+#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */
+#define CS_FAULT 0x0080 /* () Recoverable fault information */
+#define CS_FATAL 0x0084 /* () Unrecoverable fault information */
+#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */
+#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */
+#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */
+#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */
+#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */
+#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */
+#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */
+#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */
+#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */
+
+/* CS_USER_INPUT_BLOCK register offsets */
+#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */
+#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */
+#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */
+#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */
+
+/* CS_USER_OUTPUT_BLOCK register offsets */
+#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */
+#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */
+#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */
+
+/* CSG_INPUT_BLOCK register offsets */
+#define CSG_REQ 0x0000 /* () CSG request */
+#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
+#define CSG_DB_REQ 0x0008 /* () Global doorbell request */
+#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
+#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
+#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
+#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
+#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */
+#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */
+#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */
+#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */
+#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */
+#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */
+#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
+#define CSG_CONFIG 0x0050 /* () CSG configuration options */
+#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */
+
+/* CSG_OUTPUT_BLOCK register offsets */
+#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
+#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */
+#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */
+#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */
+#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */
+#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */
+
+/* GLB_CONTROL_BLOCK register offsets */
+#define GLB_VERSION 0x0000 /* () Global interface version */
+#define GLB_FEATURES 0x0004 /* () Global interface features */
+#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */
+#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */
+#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */
+#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */
+#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */
+#define GLB_INSTR_FEATURES 0x001C /* () TRACE_POINT instrumentation features */
+#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */
+#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256)
+#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r))
+#define GROUP_CONTROL_COUNT 16
+
+/* STREAM_CONTROL_BLOCK register offsets */
+#define STREAM_FEATURES 0x0000 /* () CSI features */
+#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */
+#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */
+
+/* GROUP_CONTROL_BLOCK register offsets */
+#define GROUP_FEATURES 0x0000 /* () CSG interface features */
+#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */
+#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */
+#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */
+#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */
+#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */
+#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces  */
+#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */
+#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12)
+#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r))
+#define STREAM_CONTROL_COUNT 16
+
+/* GLB_INPUT_BLOCK register offsets */
+#define GLB_REQ 0x0000 /* () Global request */
+#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
+#define GLB_DB_REQ 0x0008 /* () Global doorbell request */
+#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */
+#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */
+#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */
+#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */
+#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */
+
+#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */
+#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */
+#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */
+#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */
+#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */
+#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */
+#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */
+#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */
+#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */
+#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */
+#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
+#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
+
+#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
+#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
+#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
+#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
+#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
+#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
+
+/* GLB_OUTPUT_BLOCK register offsets */
+#define GLB_ACK 0x0000 /* () Global acknowledge */
+#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
+#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
+#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
+#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
+#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
+#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
+
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
+/* End register offsets */
+
+/* CS_KERNEL_INPUT_BLOCK register set definitions */
+/* GLB_VERSION register */
+#define GLB_VERSION_PATCH_SHIFT (0)
+#define GLB_VERSION_MINOR_SHIFT (16)
+#define GLB_VERSION_MAJOR_SHIFT (24)
+
+/* CS_REQ register */
+#define CS_REQ_STATE_SHIFT 0
+#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT)
+#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT)
+#define CS_REQ_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK))
+/* CS_REQ_STATE values */
+#define CS_REQ_STATE_STOP 0x0
+#define CS_REQ_STATE_START 0x1
+/* End of CS_REQ_STATE values */
+#define CS_REQ_EXTRACT_EVENT_SHIFT 4
+#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT)
+#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT)
+#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK))
+
+#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8
+#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT)
+#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT)
+#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) |  \
+	 (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK))
+#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9
+#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT)
+#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT)
+#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) |  \
+	 (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK))
+#define CS_REQ_IDLE_EMPTY_SHIFT 10
+#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT)
+#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT)
+#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK))
+#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
+#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
+	(((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) |  \
+	 (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
+#define CS_REQ_TILER_OOM_SHIFT 26
+#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
+#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
+#define CS_REQ_TILER_OOM_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK))
+#define CS_REQ_PROTM_PEND_SHIFT 27
+#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT)
+#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT)
+#define CS_REQ_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK))
+#define CS_REQ_FATAL_SHIFT 30
+#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT)
+#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT)
+#define CS_REQ_FATAL_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK))
+#define CS_REQ_FAULT_SHIFT 31
+#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT)
+#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT)
+#define CS_REQ_FAULT_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK))
+
+/* CS_CONFIG register */
+#define CS_CONFIG_PRIORITY_SHIFT 0
+#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT)
+#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT)
+#define CS_CONFIG_PRIORITY_SET(reg_val, value) \
+	(((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK))
+#define CS_CONFIG_USER_DOORBELL_SHIFT 8
+#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT)
+#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT)
+#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) |  \
+	 (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK))
+
+/* CS_ACK_IRQ_MASK register */
+#define CS_ACK_IRQ_MASK_STATE_SHIFT 0
+#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT)
+#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT)
+#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK))
+/* CS_ACK_IRQ_MASK_STATE values */
+#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0
+#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7
+/* End of CS_ACK_IRQ_MASK_STATE values */
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \
+	(((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK))
+#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26
+#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT)
+#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \
+	(((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT)
+#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK))
+#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27
+#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT)
+#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \
+	(((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT)
+#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK))
+#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30
+#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT)
+#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT)
+#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK))
+#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31
+#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT)
+#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT)
+#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK))
+
+/* CS_BASE register */
+#define CS_BASE_POINTER_SHIFT 0
+#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT)
+#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT)
+#define CS_BASE_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK))
+
+/* CS_SIZE register */
+#define CS_SIZE_SIZE_SHIFT 0
+#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT)
+#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT)
+#define CS_SIZE_SIZE_SET(reg_val, value) \
+	(((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK))
+
+/* CS_TILER_HEAP_START register */
+#define CS_TILER_HEAP_START_POINTER_SHIFT 0
+#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT)
+#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \
+	(((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT)
+#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) |  \
+	 (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK))
+/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */
+/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */
+
+/* CS_TILER_HEAP_END register */
+#define CS_TILER_HEAP_END_POINTER_SHIFT 0
+#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT)
+#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \
+	(((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT)
+#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) |  \
+	 (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK))
+/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */
+/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */
+
+/* CS_USER_INPUT register */
+#define CS_USER_INPUT_POINTER_SHIFT 0
+#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT)
+#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT)
+#define CS_USER_INPUT_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_USER_INPUT_POINTER_MASK) |  \
+	 (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK))
+
+/* CS_USER_OUTPUT register */
+#define CS_USER_OUTPUT_POINTER_SHIFT 0
+#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT)
+#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT)
+#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) |  \
+	 (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK))
+/* End of CS_KERNEL_INPUT_BLOCK register set definitions */
+
+/* CS_KERNEL_OUTPUT_BLOCK register set definitions */
+
+/* CS_ACK register */
+#define CS_ACK_STATE_SHIFT 0
+#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT)
+#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT)
+#define CS_ACK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK))
+/* CS_ACK_STATE values */
+#define CS_ACK_STATE_STOP 0x0
+#define CS_ACK_STATE_START 0x1
+/* End of CS_ACK_STATE values */
+#define CS_ACK_EXTRACT_EVENT_SHIFT 4
+#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK))
+#define CS_ACK_TILER_OOM_SHIFT 26
+#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT)
+#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT)
+#define CS_ACK_TILER_OOM_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK))
+#define CS_ACK_PROTM_PEND_SHIFT 27
+#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT)
+#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT)
+#define CS_ACK_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK))
+#define CS_ACK_FATAL_SHIFT 30
+#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT)
+#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT)
+#define CS_ACK_FATAL_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK))
+#define CS_ACK_FAULT_SHIFT 31
+#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT)
+#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT)
+#define CS_ACK_FAULT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK))
+
+/* CS_STATUS_CMD_PTR register */
+#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0
+#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT)
+#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \
+	(((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT)
+#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) |  \
+	 (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK))
+
+/* CS_STATUS_WAIT register */
+#define CS_STATUS_WAIT_SB_MASK_SHIFT 0
+#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT)
+#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT)
+#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK))
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK))
+/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1
+/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
+#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28
+#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
+#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
+#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK))
+#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29
+#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT)
+#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT)
+#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK))
+#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31
+#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK))
+
+/* CS_STATUS_REQ_RESOURCE register */
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK))
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK))
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK))
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK))
+
+/* CS_STATUS_WAIT_SYNC_POINTER register */
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK))
+
+/* CS_STATUS_WAIT_SYNC_VALUE register */
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK))
+
+/* CS_STATUS_SCOREBOARDS register */
+#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0)
+#define CS_STATUS_SCOREBOARDS_NONZERO_MASK                                     \
+	((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
+#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val)                             \
+	(((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >>                     \
+	 CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
+#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value)                      \
+	(((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) |                   \
+	 (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) &                   \
+	  CS_STATUS_SCOREBOARDS_NONZERO_MASK))
+
+/* CS_STATUS_BLOCKED_REASON register */
+#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0)
+#define CS_STATUS_BLOCKED_REASON_REASON_MASK                                   \
+	((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
+#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val)                           \
+	(((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >>                   \
+	 CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
+#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value)                    \
+	(((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) |                 \
+	 (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) &                 \
+	  CS_STATUS_BLOCKED_REASON_REASON_MASK))
+/* CS_STATUS_BLOCKED_REASON_reason values */
+#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0
+#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1
+#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2
+#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3
+#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4
+#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5
+#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6
+/* End of CS_STATUS_BLOCKED_REASON_reason values */
+
+/* CS_FAULT register */
+#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0
+#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT)
+#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT)
+#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \
+	(((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) |  \
+	 (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK))
+/* CS_FAULT_EXCEPTION_TYPE values */
+#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F
+#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B
+#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50
+#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51
+#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55
+#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58
+#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59
+#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
+#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
+#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
+/* End of CS_FAULT_EXCEPTION_TYPE values */
+#define CS_FAULT_EXCEPTION_DATA_SHIFT 8
+#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK))
+
+/* CS_FATAL register */
+#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0
+#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT)
+#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT)
+#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \
+	(((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) |  \
+	 (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK))
+/* CS_FATAL_EXCEPTION_TYPE values */
+#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40
+#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44
+#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48
+#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49
+#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A
+#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68
+/* End of CS_FATAL_EXCEPTION_TYPE values */
+#define CS_FATAL_EXCEPTION_DATA_SHIFT 8
+#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK))
+
+/* CS_FAULT_INFO register */
+#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0
+#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \
+	(((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK))
+
+/* CS_FATAL_INFO register */
+#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0
+#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \
+	(((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK))
+
+/* CS_HEAP_VT_START register */
+#define CS_HEAP_VT_START_VALUE_SHIFT 0
+#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT)
+#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT)
+#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) |  \
+	 (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK))
+
+/* CS_HEAP_VT_END register */
+#define CS_HEAP_VT_END_VALUE_SHIFT 0
+#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT)
+#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT)
+#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK))
+
+/* CS_HEAP_FRAG_END register */
+#define CS_HEAP_FRAG_END_VALUE_SHIFT 0
+#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT)
+#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT)
+#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) |  \
+	 (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK))
+
+/* CS_HEAP_ADDRESS register */
+#define CS_HEAP_ADDRESS_POINTER_SHIFT 0
+#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT)
+#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT)
+#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) |  \
+	 (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK))
+/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */
+
+/* CS_USER_INPUT_BLOCK register set definitions */
+
+/* CS_INSERT register */
+#define CS_INSERT_VALUE_SHIFT 0
+#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT)
+#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT)
+#define CS_INSERT_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK))
+
+/* CS_EXTRACT_INIT register */
+#define CS_EXTRACT_INIT_VALUE_SHIFT 0
+#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT)
+#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT)
+#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) |  \
+	 (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK))
+/* End of CS_USER_INPUT_BLOCK register set definitions */
+
+/* CS_USER_OUTPUT_BLOCK register set definitions */
+
+/* CS_EXTRACT register */
+#define CS_EXTRACT_VALUE_SHIFT 0
+#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT)
+#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT)
+#define CS_EXTRACT_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK))
+
+/* CS_ACTIVE register */
+#define CS_ACTIVE_HW_ACTIVE_SHIFT 0
+#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT)
+#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT)
+#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK))
+/* End of CS_USER_OUTPUT_BLOCK register set definitions */
+
+/* CSG_INPUT_BLOCK register set definitions */
+
+/* CSG_REQ register */
+#define CSG_REQ_STATE_SHIFT 0
+#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT)
+#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT)
+#define CSG_REQ_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK))
+/* CSG_REQ_STATE values */
+#define CSG_REQ_STATE_TERMINATE 0x0
+#define CSG_REQ_STATE_START 0x1
+#define CSG_REQ_STATE_SUSPEND 0x2
+#define CSG_REQ_STATE_RESUME 0x3
+/* End of CSG_REQ_STATE values */
+#define CSG_REQ_EP_CFG_SHIFT 4
+#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT)
+#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT)
+#define CSG_REQ_EP_CFG_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK))
+#define CSG_REQ_STATUS_UPDATE_SHIFT 5
+#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT)
+#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT)
+#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) |  \
+	 (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK))
+#define CSG_REQ_SYNC_UPDATE_SHIFT 28
+#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT)
+#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT)
+#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK))
+#define CSG_REQ_IDLE_SHIFT 29
+#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT)
+#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT)
+#define CSG_REQ_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK))
+#define CSG_REQ_DOORBELL_SHIFT 30
+#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT)
+#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT)
+#define CSG_REQ_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK))
+#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31
+#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \
+	(((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) |  \
+	 (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK))
+
+/* CSG_ACK_IRQ_MASK register */
+#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0
+#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK))
+/* CSG_ACK_IRQ_MASK_STATE values */
+#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0
+#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7
+/* End of CSG_ACK_IRQ_MASK_STATE values */
+#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4
+#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT)
+#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT)
+#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK))
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK))
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK))
+#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29
+#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT)
+#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT)
+#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK))
+#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30
+#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT)
+#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT)
+#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK))
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK))
+
+/* CSG_EP_REQ register */
+#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0
+#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) |  \
+	 (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
+#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8
+#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) |  \
+	 (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
+#define CSG_EP_REQ_TILER_EP_SHIFT 16
+#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT)
+#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT)
+#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) |  \
+	 (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |  \
+	 (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+#define CSG_EP_REQ_PRIORITY_SHIFT 28
+#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT)
+#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT)
+#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
+
+/* CSG_SUSPEND_BUF register */
+#define CSG_SUSPEND_BUF_POINTER_SHIFT 0
+#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) |  \
+	 (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK))
+
+/* CSG_PROTM_SUSPEND_BUF register */
+#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0
+#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \
+	(((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) |  \
+	 (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
+
+/* End of CSG_INPUT_BLOCK register set definitions */
+
+/* CSG_OUTPUT_BLOCK register set definitions */
+
+/* CSG_ACK register */
+#define CSG_ACK_STATE_SHIFT 0
+#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT)
+#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT)
+#define CSG_ACK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK))
+/* CSG_ACK_STATE values */
+#define CSG_ACK_STATE_TERMINATE 0x0
+#define CSG_ACK_STATE_START 0x1
+#define CSG_ACK_STATE_SUSPEND 0x2
+#define CSG_ACK_STATE_RESUME 0x3
+/* End of CSG_ACK_STATE values */
+#define CSG_ACK_EP_CFG_SHIFT 4
+#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT)
+#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT)
+#define CSG_ACK_EP_CFG_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK))
+#define CSG_ACK_STATUS_UPDATE_SHIFT 5
+#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) |  \
+	 (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK))
+#define CSG_ACK_SYNC_UPDATE_SHIFT 28
+#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK))
+#define CSG_ACK_IDLE_SHIFT 29
+#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT)
+#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT)
+#define CSG_ACK_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK))
+#define CSG_ACK_DOORBELL_SHIFT 30
+#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT)
+#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT)
+#define CSG_ACK_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK))
+#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31
+#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \
+	(((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) |  \
+	 (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK))
+
+/* CSG_STATUS_EP_CURRENT register */
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK))
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK))
+#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16
+#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK))
+
+/* CSG_STATUS_EP_REQ register */
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK))
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK))
+#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16
+#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK))
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+
+/* End of CSG_OUTPUT_BLOCK register set definitions */
+
+/* STREAM_CONTROL_BLOCK register set definitions */
+
+/* STREAM_FEATURES register */
+#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0
+#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT)
+#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \
+	(((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT)
+#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) |  \
+	 (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK))
+#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8
+#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT)
+#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \
+	(((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT)
+#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) |  \
+	 (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK))
+#define STREAM_FEATURES_COMPUTE_SHIFT 16
+#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT)
+#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT)
+#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) |  \
+	 (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK))
+#define STREAM_FEATURES_FRAGMENT_SHIFT 17
+#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT)
+#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \
+	(((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT)
+#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) |  \
+	 (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK))
+#define STREAM_FEATURES_TILER_SHIFT 18
+#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT)
+#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT)
+#define STREAM_FEATURES_TILER_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_TILER_MASK) |  \
+	 (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK))
+
+/* STREAM_INPUT_VA register */
+#define STREAM_INPUT_VA_VALUE_SHIFT 0
+#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT)
+#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT)
+#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) |  \
+	 (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK))
+
+/* STREAM_OUTPUT_VA register */
+#define STREAM_OUTPUT_VA_VALUE_SHIFT 0
+#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT)
+#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT)
+#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) |  \
+	 (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK))
+/* End of STREAM_CONTROL_BLOCK register set definitions */
+
+/* GLB_INPUT_BLOCK register set definitions */
+
+/* GLB_REQ register */
+#define GLB_REQ_HALT_SHIFT 0
+#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT)
+#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT)
+#define GLB_REQ_HALT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK))
+#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1
+#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) |  \
+	 (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK))
+#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2
+#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT)
+#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT)
+#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK))
+#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3
+#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) |  \
+	 (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK))
+#define GLB_REQ_PROTM_ENTER_SHIFT 4
+#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT)
+#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT)
+#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK))
+#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5
+#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT)
+#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT)
+#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) |  \
+	 (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK))
+#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6
+#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT)
+#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT)
+#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) |  \
+	 (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK))
+#define GLB_REQ_COUNTER_ENABLE_SHIFT 7
+#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT)
+#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT)
+#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) |  \
+	 (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK))
+#define GLB_REQ_PING_SHIFT 8
+#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT)
+#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT)
+#define GLB_REQ_PING_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK))
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK                                    \
+	(0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val)                            \
+	(((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >>                    \
+	 GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value)                     \
+	(((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) |                  \
+	 (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) &                  \
+	  GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK))
+#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20
+#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT)
+#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT)
+#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) |  \
+	 (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK))
+#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21
+#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) |  \
+	 (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK))
+#define GLB_REQ_INACTIVE_TILER_SHIFT 22
+#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT)
+#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT)
+#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) |  \
+	 (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK))
+#define GLB_REQ_PROTM_EXIT_SHIFT 23
+#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT)
+#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT)
+#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK))
+#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24
+#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \
+	(((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \
+	 GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \
+	 (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \
+	  GLB_REQ_PRFCNT_THRESHOLD_MASK))
+#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25
+#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \
+	(((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \
+	 GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \
+	 (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \
+	  GLB_REQ_PRFCNT_OVERFLOW_MASK))
+#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30
+#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT)
+#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT)
+#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) |  \
+	 (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK))
+#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31
+#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT)
+#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT)
+#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) |  \
+	 (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK))
+
+/* GLB_ACK_IRQ_MASK register */
+#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0
+#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT)
+#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT)
+#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK))
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK))
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK))
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK))
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK))
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK))
+#define GLB_ACK_IRQ_MASK_PING_SHIFT 8
+#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT)
+#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT)
+#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK))
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK                           \
+	(0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val)                   \
+	(((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >>           \
+	 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value)            \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) |         \
+	 (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) &         \
+	  GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK))
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK))
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK))
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK))
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \
+	(0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \
+	 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \
+	(0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \
+	 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK))
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK))
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK))
+
+/* GLB_PROGRESS_TIMER register */
+#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0
+#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT)
+#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT)
+#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK))
+
+/* GLB_PWROFF_TIMER register */
+#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0
+#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK))
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK))
+/* GLB_PWROFF_TIMER_TIMER_SOURCE values */
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
+
+/* GLB_ALLOC_EN register */
+#define GLB_ALLOC_EN_MASK_SHIFT 0
+#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
+#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT)
+#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK))
+
+/* GLB_PROTM_COHERENCY register */
+#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0
+#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \
+	(0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT)
+#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val)     \
+	(((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \
+	 GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT)
+#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) |  \
+	 (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) &  \
+	  GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK))
+/* End of GLB_INPUT_BLOCK register set definitions */
+
+/* GLB_OUTPUT_BLOCK register set definitions */
+
+/* GLB_ACK register */
+#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1
+#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) |  \
+	 (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK))
+#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2
+#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK))
+/* End of GLB_OUTPUT_BLOCK register set definitions */
+
+/* The following register and fields are for headers before 10.x.7/11.x.4 */
+#define GLB_REQ_IDLE_ENABLE_SHIFT (10)
+#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_EVENT_SHIFT (26)
+#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_SHIFT (10)
+#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
+
+#define GLB_IDLE_TIMER (0x0080)
+/* GLB_IDLE_TIMER register */
+#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
+#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK))
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK))
+/* GLB_IDLE_TIMER_TIMER_SOURCE values */
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
+
+#define CSG_STATUS_STATE (0x0018) /* CSG state status register */
+/* CSG_STATUS_STATE register */
+#define CSG_STATUS_STATE_IDLE_SHIFT (0)
+#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT)
+#define CSG_STATUS_STATE_IDLE_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT)
+#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) |  \
+	(((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK))
+
+#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
new file mode 100644
index 0000000..237cc2e
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_CSF_IOCTL_H_
+#define _UAPI_KBASE_CSF_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 1.0:
+ * - CSF IOCTL header separated from JM
+ * 1.1:
+ * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 1.2:
+ * - Add new CSF GPU_FEATURES register into the property structure
+ *   returned by KBASE_IOCTL_GET_GPUPROPS
+ * 1.3:
+ * - Add __u32 group_uid member to
+ *   &struct_kbase_ioctl_cs_queue_group_create.out
+ * 1.4:
+ * - Replace padding in kbase_ioctl_cs_get_glb_iface with
+ *   instr_features member of same size
+ */
+
+#define BASE_UK_VERSION_MAJOR 1
+#define BASE_UK_VERSION_MINOR 4
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility between
+ * kernel and userspace
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+
+/**
+ * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the
+ *                                        base back-end
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @buffer_size: Size of the buffer in bytes
+ * @priority: Priority of the queue within a group when run within a process
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_queue_register {
+	__u64 buffer_gpu_addr;
+	__u32 buffer_size;
+	__u8 priority;
+	__u8 padding[3];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_REGISTER \
+	_IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register)
+
+/**
+ * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler
+ *                                    to notify that a queue has been updated
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ */
+struct kbase_ioctl_cs_queue_kick {
+	__u64 buffer_gpu_addr;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_KICK \
+	_IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick)
+
+/**
+ * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group
+ *
+ * @in:                 Input parameters
+ * @in.buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @in.group_handle:    Handle of the group to which the queue should be bound
+ * @in.csi_index:       Index of the CSF interface the queue should be bound to
+ * @in.padding:         Currently unused, must be zero
+ * @out:                Output parameters
+ * @out.mmap_handle:    Handle to be used for creating the mapping of CS
+ *                      input/output pages
+ */
+union kbase_ioctl_cs_queue_bind {
+	struct {
+		__u64 buffer_gpu_addr;
+		__u8 group_handle;
+		__u8 csi_index;
+		__u8 padding[6];
+	} in;
+	struct {
+		__u64 mmap_handle;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_BIND \
+	_IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind)
+
+/* ioctl 40 is free to use */
+
+/**
+ * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue
+ *
+ * @buffer_gpu_addr: GPU address of the buffer backing the queue
+ */
+struct kbase_ioctl_cs_queue_terminate {
+	__u64 buffer_gpu_addr;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_TERMINATE \
+	_IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate)
+
+/**
+ * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 padding[3];
+
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
+	_IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create)
+
+/**
+ * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group
+ *
+ * @group_handle: Handle of the queue group to be terminated
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_cs_queue_group_term {
+	__u8 group_handle;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \
+	_IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term)
+
+#define KBASE_IOCTL_CS_EVENT_SIGNAL \
+	_IO(KBASE_IOCTL_TYPE, 44)
+
+typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */
+
+/**
+ * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue
+ *
+ * @id: ID of the new command queue returned by the kernel
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_new {
+	base_kcpu_queue_id id;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_CREATE \
+	_IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new)
+
+/**
+ * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue
+ *
+ * @id: ID of the command queue to be destroyed
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_delete {
+	base_kcpu_queue_id id;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_DELETE \
+	_IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete)
+
+/**
+ * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue
+ *
+ * @addr: Memory address of an array of struct base_kcpu_queue_command
+ * @nr_commands: Number of commands in the array
+ * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_kcpu_queue_enqueue {
+	__u64 addr;
+	__u32 nr_commands;
+	base_kcpu_queue_id id;
+	__u8 padding[3];
+};
+
+#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \
+	_IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue)
+
+/**
+ * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap
+ * @in:                Input parameters
+ * @in.chunk_size:     Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks:     Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ *                     keep in flight for which allocation of new chunks is
+ *                     allowed.
+ * @in.group_id:       Group ID to be used for physical allocations.
+ * @in.padding:        Padding
+ * @out:               Output parameters
+ * @out.gpu_heap_va:   GPU VA (virtual address) of Heap context that was set up
+ *                     for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ *                     actually points to the header of heap chunk and not to
+ *                     the low address of free memory in the chunk.
+ */
+union kbase_ioctl_cs_tiler_heap_init {
+	struct {
+		__u32 chunk_size;
+		__u32 initial_chunks;
+		__u32 max_chunks;
+		__u16 target_in_flight;
+		__u8 group_id;
+		__u8 padding;
+	} in;
+	struct {
+		__u64 gpu_heap_va;
+		__u64 first_chunk_va;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT \
+	_IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init)
+
+/**
+ * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap
+ *                                         instance
+ *
+ * @gpu_heap_va: GPU VA of Heap context that was set up for the heap.
+ */
+struct kbase_ioctl_cs_tiler_heap_term {
+	__u64 gpu_heap_va;
+};
+
+#define KBASE_IOCTL_CS_TILER_HEAP_TERM \
+	_IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term)
+
+/**
+ * union kbase_ioctl_cs_get_glb_iface - Request the global control block
+ *                                        of CSF interface capabilities
+ *
+ * @in:                    Input parameters
+ * @in.max_group_num:      The maximum number of groups to be read. Can be 0, in
+ *                         which case groups_ptr is unused.
+ * @in.max_total_stream    _num: The maximum number of CSs to be read. Can be 0, in
+ *                         which case streams_ptr is unused.
+ * @in.groups_ptr:         Pointer where to store all the group data (sequentially).
+ * @in.streams_ptr:        Pointer where to store all the CS data (sequentially).
+ * @out:                   Output parameters
+ * @out.glb_version:       Global interface version.
+ * @out.features:          Bit mask of features (e.g. whether certain types of job
+ *                         can be suspended).
+ * @out.group_num:         Number of CSGs supported.
+ * @out.prfcnt_size:       Size of CSF performance counters, in bytes. Bits 31:16
+ *                         hold the size of firmware performance counter data
+ *                         and 15:0 hold the size of hardware performance counter
+ *                         data.
+ * @out.total_stream_num:  Total number of CSs, summed across all groups.
+ * @out.instr_features:    Instrumentation features. Bits 7:4 hold the maximum
+ *                         size of events. Bits 3:0 hold the offset update rate.
+ *
+ */
+union kbase_ioctl_cs_get_glb_iface {
+	struct {
+		__u32 max_group_num;
+		__u32 max_total_stream_num;
+		__u64 groups_ptr;
+		__u64 streams_ptr;
+	} in;
+	struct {
+		__u32 glb_version;
+		__u32 features;
+		__u32 group_num;
+		__u32 prfcnt_size;
+		__u32 total_stream_num;
+		__u32 instr_features;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_GET_GLB_IFACE \
+	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface)
+
+struct kbase_ioctl_cs_cpu_queue_info {
+	__u64 buffer;
+	__u64 size;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
+	_IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+	__u64 cpu_addr;
+	__u8 value;
+	__u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @in: Input parameters
+ * @in.cpu_addr: Memory address to read
+ * @out: Output parameters
+ * @out.value: Value read
+ * @out.padding: Currently unused, must be zero
+ */
+union kbase_ioctl_cs_event_memory_read {
+	struct {
+		__u64 cpu_addr;
+	} in;
+	struct {
+		__u8 value;
+		__u8 padding[7];
+	} out;
+};
+
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
new file mode 100644
index 0000000..c87154f
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
+#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
+
+#include <linux/types.h>
+
+#if !MALI_USE_CSF && defined(__KERNEL__)
+#error "Cannot be compiled with JM"
+#endif
+
+/* IPA control registers */
+
+#define IPA_CONTROL_BASE       0x40000
+#define IPA_CONTROL_REG(r)     (IPA_CONTROL_BASE+(r))
+#define COMMAND                0x000 /* (WO) Command register */
+#define STATUS                 0x004 /* (RO) Status register */
+#define TIMER                  0x008 /* (RW) Timer control register */
+
+#define SELECT_CSHW_LO         0x010 /* (RW) Counter select for CS hardware, low word */
+#define SELECT_CSHW_HI         0x014 /* (RW) Counter select for CS hardware, high word */
+#define SELECT_MEMSYS_LO       0x018 /* (RW) Counter select for Memory system, low word */
+#define SELECT_MEMSYS_HI       0x01C /* (RW) Counter select for Memory system, high word */
+#define SELECT_TILER_LO        0x020 /* (RW) Counter select for Tiler cores, low word */
+#define SELECT_TILER_HI        0x024 /* (RW) Counter select for Tiler cores, high word */
+#define SELECT_SHADER_LO       0x028 /* (RW) Counter select for Shader cores, low word */
+#define SELECT_SHADER_HI       0x02C /* (RW) Counter select for Shader cores, high word */
+
+/* Accumulated counter values for CS hardware */
+#define VALUE_CSHW_BASE        0x100
+#define VALUE_CSHW_REG_LO(n)   (VALUE_CSHW_BASE + ((n) << 3))       /* (RO) Counter value #n, low word */
+#define VALUE_CSHW_REG_HI(n)   (VALUE_CSHW_BASE + ((n) << 3) + 4)   /* (RO) Counter value #n, high word */
+
+/* Accumulated counter values for memory system */
+#define VALUE_MEMSYS_BASE      0x140
+#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+#define VALUE_TILER_BASE       0x180
+#define VALUE_TILER_REG_LO(n)  (VALUE_TILER_BASE + ((n) << 3))      /* (RO) Counter value #n, low word */
+#define VALUE_TILER_REG_HI(n)  (VALUE_TILER_BASE + ((n) << 3) + 4)  /* (RO) Counter value #n, high word */
+
+#define VALUE_SHADER_BASE      0x1C0
+#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+#include "../../csf/mali_gpu_csf_control_registers.h"
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+/* Set to shared memory, that is inner cacheable on ACE and inner or outer
+ * shared, otherwise inner non-cacheable.
+ * Outer cacheable if inner or outer shared, otherwise outer non-cacheable.
+ */
+#define AS_MEMATTR_AARCH64_SHARED         0x8ull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching
+ */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+/* Normal memory, shared between MCU and Host */
+#define AS_MEMATTR_INDEX_SHARED                6
+
+/* Configuration bits for the CSF. */
+#define CSF_CONFIG 0xF00
+
+/* CSF_CONFIG register */
+#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2
+
+/* GPU control registers */
+#define CORE_FEATURES           0x008   /* () Shader Core Features */
+#define MCU_CONTROL             0x700
+#define MCU_STATUS              0x704
+
+#define MCU_CNTRL_ENABLE        (1 << 0)
+#define MCU_CNTRL_AUTO          (1 << 1)
+#define MCU_CNTRL_DISABLE       (0)
+
+#define MCU_STATUS_HALTED        (1 << 1)
+
+#define PRFCNT_BASE_LO   0x060  /* (RW) Performance counter memory
+				 * region base address, low word
+				 */
+#define PRFCNT_BASE_HI   0x064  /* (RW) Performance counter memory
+				 * region base address, high word
+				 */
+#define PRFCNT_CONFIG    0x068  /* (RW) Performance counter
+				 * configuration
+				 */
+
+#define PRFCNT_CSHW_EN   0x06C  /* (RW) Performance counter
+				 * enable for CS Hardware
+				 */
+
+#define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
+				 * flags for shader cores
+				 */
+#define PRFCNT_TILER_EN  0x074  /* (RW) Performance counter enable
+				 * flags for tiler
+				 */
+#define PRFCNT_MMU_L2_EN 0x07C  /* (RW) Performance counter enable
+				 * flags for MMU/L2 cache
+				 */
+
+/* JOB IRQ flags */
+#define JOB_IRQ_GLOBAL_IF       (1 << 31)   /* Global interface interrupt received */
+
+/* GPU_COMMAND codes */
+#define GPU_COMMAND_CODE_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_CODE_RESET              0x01 /* Reset the GPU */
+#define GPU_COMMAND_CODE_PRFCNT             0x02 /* Clear or sample performance counters */
+#define GPU_COMMAND_CODE_TIME               0x03 /* Configure time sources */
+#define GPU_COMMAND_CODE_FLUSH_CACHES       0x04 /* Flush caches */
+#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */
+#define GPU_COMMAND_CODE_FINISH_HALT        0x06 /* Halt CSF */
+#define GPU_COMMAND_CODE_CLEAR_FAULT        0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */
+
+/* GPU_COMMAND_RESET payloads */
+
+/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state.
+ * Power domains will remain powered on.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00
+
+/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and
+ * idle state.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01
+
+/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave
+ * the system bus in an inconsistent state. Use only as a last resort when nothing else works.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02
+
+/* GPU_COMMAND_PRFCNT payloads */
+#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */
+#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR  0x02 /* Clear performance counters */
+
+/* GPU_COMMAND_TIME payloads */
+#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */
+#define GPU_COMMAND_TIME_ENABLE  0x01 /* Enable cycle counter */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads */
+#define GPU_COMMAND_FLUSH_PAYLOAD_NONE             0x00 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN            0x01 /* Clean the caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE       0x02 /* Invalidate the caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */
+
+/* GPU_COMMAND command + payload */
+#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
+	((__u32)opcode | ((__u32)payload << 8))
+
+/* Final GPU_COMMAND form */
+/* No operation, nothing happens */
+#define GPU_COMMAND_NOP \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0)
+
+/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_SOFT_RESET \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET)
+
+/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET)
+
+/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_CLEAR \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR)
+
+/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_PRFCNT_SAMPLE \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE)
+
+/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_START \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE)
+
+/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
+
+/* Clean all caches */
+#define GPU_COMMAND_CLEAN_CACHES \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN)
+
+/* Clean and invalidate all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE)
+
+/* Places the GPU in protected mode */
+#define GPU_COMMAND_SET_PROTECTED_MODE \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0)
+
+/* Halt CSF */
+#define GPU_COMMAND_FINISH_HALT \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0)
+
+/* Clear GPU faults */
+#define GPU_COMMAND_CLEAR_FAULT \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0)
+
+/* End Command Values */
+
+/* GPU_FAULTSTATUS register */
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul)
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
+	(((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \
+	 >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
+#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \
+	(0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+
+#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10
+#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \
+	(1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT)
+
+#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11
+#define GPU_FAULTSTATUS_JASID_VALID_FLAG \
+	(1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
+
+#define GPU_FAULTSTATUS_JASID_SHIFT 12
+#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT)
+#define GPU_FAULTSTATUS_JASID_GET(reg_val) \
+	(((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT)
+#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \
+	(((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) |  \
+	(((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK))
+
+#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16
+#define GPU_FAULTSTATUS_SOURCE_ID_MASK \
+	(0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT)
+/* End GPU_FAULTSTATUS register */
+
+/* GPU_FAULTSTATUS_ACCESS_TYPE values */
+#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0
+#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1
+#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2
+#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3
+/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */
+
+/* Implementation-dependent exception codes used to indicate CSG
+ * and CS errors that are not specified in the specs.
+ */
+#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70)
+#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71)
+#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72)
+
+/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A
+/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */
+
+#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \
+	(((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \
+	(((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) |  \
+	(((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK))
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define GPU_PROTECTED_FAULT     (1 << 1)    /* A GPU fault has occurred in protected mode */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed.  */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+#define DOORBELL_MIRROR         (1 << 18)   /* Mirrors the doorbell interrupt line to the CPU */
+#define MCU_STATUS_GPU_IRQ      (1 << 19)   /* MCU requires attention */
+
+/*
+ * In Debug build,
+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ
+ * by writing it onto GPU_IRQ_CLEAR/MASK registers.
+ *
+ * In Release build,
+ * GPU_IRQ_REG_COMMON is used.
+ *
+ * Note:
+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
+ * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON
+ *                   as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen
+ */
+#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)
+
+/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when performance count sample has completed */
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
new file mode 100644
index 0000000..1982668
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_
+#define _UAPI_KBASE_GPU_REGMAP_JM_H_
+
+#if MALI_USE_CSF && defined(__KERNEL__)
+#error "Cannot be compiled with CSF"
+#endif
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching
+ */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+
+/* GPU control registers */
+
+#define CORE_FEATURES           0x008   /* (RO) Shader Core Features */
+#define JS_PRESENT              0x01C   /* (RO) Job slots present */
+#define LATEST_FLUSH            0x038   /* (RO) Flush ID of latest
+					 * clean-and-invalidate operation
+					 */
+
+#define PRFCNT_BASE_LO   0x060  /* (RW) Performance counter memory
+				 * region base address, low word
+				 */
+#define PRFCNT_BASE_HI   0x064  /* (RW) Performance counter memory
+				 * region base address, high word
+				 */
+#define PRFCNT_CONFIG    0x068  /* (RW) Performance counter
+				 * configuration
+				 */
+#define PRFCNT_JM_EN     0x06C  /* (RW) Performance counter enable
+				 * flags for Job Manager
+				 */
+#define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
+				 * flags for shader cores
+				 */
+#define PRFCNT_TILER_EN  0x074  /* (RW) Performance counter enable
+				 * flags for tiler
+				 */
+#define PRFCNT_MMU_L2_EN 0x07C  /* (RW) Performance counter enable
+				 * flags for MMU/L2 cache
+				 */
+
+#define JS0_FEATURES            0x0C0   /* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4   /* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8   /* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC   /* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0   /* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4   /* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8   /* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC   /* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0   /* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4   /* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8   /* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC   /* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0   /* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4   /* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8   /* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC   /* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define JM_CONFIG               0xF00   /* (RW) Job manager configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00   /* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80   /* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00   /* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80   /* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00   /* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80   /* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00   /* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80   /* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00   /* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80   /* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+/* (RO) Extended affinity mask for job slot n*/
+#define JS_XAFFINITY           0x1C
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+/* (RW) Next extended affinity mask for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+/* No JM-specific MMU control registers */
+/* No JM-specific MMU address space control registers */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status instead of a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* JS<n>_FEATURES register */
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02 /* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03 /* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04 /* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05 /* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06 /* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07 /* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08 /* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)    /* More than one GPU Fault occurred.  */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed.  */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+
+/*
+ * In Debug build,
+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ
+ * by writing it onto GPU_IRQ_CLEAR/MASK registers.
+ *
+ * In Release build,
+ * GPU_IRQ_REG_COMMON is used.
+ *
+ * Note:
+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
+ */
+#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+		| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h
new file mode 100644
index 0000000..98186d2
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_COHERENCY_H_
+#define _UAPI_KBASE_GPU_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
new file mode 100644
index 0000000..0145920
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_ID_H_
+#define _UAPI_KBASE_GPU_ID_H_
+
+#include <linux/types.h>
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xFu  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
+								GPU_ID2_VERSION_MINOR | \
+								GPU_ID2_VERSION_STATUS)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+ * a product ignoring its version.
+ */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+ * revision (major, minor, status) of a product
+ */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+ * a particular GPU model by its arch_major and product_major.
+ */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+ * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+ * model.
+ */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TNAX              GPU_ID2_MODEL_MAKE(9, 1)
+#define GPU_ID2_PRODUCT_TBEX              GPU_ID2_MODEL_MAKE(9, 2)
+#define GPU_ID2_PRODUCT_LBEX              GPU_ID2_MODEL_MAKE(9, 4)
+#define GPU_ID2_PRODUCT_TBAX              GPU_ID2_MODEL_MAKE(9, 5)
+#define GPU_ID2_PRODUCT_TDUX              GPU_ID2_MODEL_MAKE(10, 1)
+#define GPU_ID2_PRODUCT_TODX              GPU_ID2_MODEL_MAKE(10, 2)
+#define GPU_ID2_PRODUCT_LODX              GPU_ID2_MODEL_MAKE(10, 7)
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+ * minor, status
+ */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		((((__u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((__u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((__u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((__u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _UAPI_KBASE_GPU_ID_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
new file mode 100644
index 0000000..9977212
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_H_
+#define _UAPI_KBASE_GPU_REGMAP_H_
+
+#include "mali_kbase_gpu_coherency.h"
+#include "mali_kbase_gpu_id.h"
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
+#include "backend/mali_kbase_gpu_regmap_jm.h"
+#endif
+
+/* Begin Register Offsets */
+/* GPU control registers */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
+#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
+#define MEM_FEATURES            0x010   /* (RO) Memory system features */
+#define MMU_FEATURES            0x014   /* (RO) MMU features */
+#define AS_PRESENT              0x018   /* (RO) Address space slots present */
+#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_MASK            0x028   /* (RW) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
+
+#define GPU_COMMAND             0x030   /* (WO) */
+#define GPU_STATUS              0x034   /* (RO) */
+
+#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
+
+#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
+
+#define GROUPS_L2_COHERENT      (1 << 0) /* Cores groups are l2 coherent */
+#define SUPER_L2_COHERENT       (1 << 1) /* Shader cores within a core
+					  * supergroup are l2 coherent
+					  */
+
+#define PWR_KEY                 0x050   /* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
+#define GPU_FEATURES_LO         0x060   /* (RO) GPU features, low word */
+#define GPU_FEATURES_HI         0x064   /* (RO) GPU features, high word */
+#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
+
+#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
+
+#define ASN_HASH_0              0x02C0 /* (RW) ASN hash function argument 0 */
+#define ASN_HASH(n)             (ASN_HASH_0 + (n)*4)
+#define ASN_HASH_COUNT          3
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
+
+#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
+#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
+
+/* MMU control registers */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
+#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
+#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
+#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
+#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
+
+/* MMU address space control registers */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS    16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers
+ */
+#define MMU_PAGE_FAULT(n)       (1UL << (n))
+#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3)
+
+#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
+#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
+
+/*
+ * Begin Command Values
+ */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+/* Flush all L2 caches then issue a flush region command to all MMUs
+ * (deprecated - only for use with T60x)
+ */
+#define AS_COMMAND_FLUSH 0x04
+/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_PT 0x04
+/* Wait for memory accesses to complete, flush all the L1s cache then flush all
+ * L2 caches then issue a flush region command to all MMUs
+ */
+#define AS_COMMAND_FLUSH_MEM 0x05
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_CYCLE_COUNT_ACTIVE       (1 << 6)    /* Set if the cycle counter is active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
+
+/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_OFF          0
+/* The performance counters are enabled, but are only written out when a
+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
+ */
+#define PRFCNT_CONFIG_MODE_MANUAL       1
+/* The performance counters are enabled, and are written out each time a tile
+ * finishes rendering.
+ */
+#define PRFCNT_CONFIG_MODE_TILE         2
+
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_VAR_ALGORITHM            (1ul << 29)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+/* End TILER_CONFIG register */
+
+/* L2_CONFIG register */
+#define L2_CONFIG_SIZE_SHIFT        16
+#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
+#define L2_CONFIG_HASH_SHIFT        24
+#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
+#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT        24
+#define L2_CONFIG_ASN_HASH_ENABLE_MASK         (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
+/* End L2_CONFIG register */
+
+/* IDVS_GROUP register */
+#define IDVS_GROUP_SIZE_SHIFT (16)
+#define IDVS_GROUP_MAX_SIZE (0x3F)
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
new file mode 100644
index 0000000..cd81421
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
@@ -0,0 +1,1200 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_JM_KERNEL_H_
+#define _UAPI_BASE_JM_KERNEL_H_
+
+#include <linux/types.h>
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
+#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* IN/OUT */
+/* Should be cached on the CPU, returned if actually cached
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the allocation
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Protected memory
+ */
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+
+/**
+ * Memory starting from the end of the initial commit is aligned to 'extension'
+ * pages, where 'extension' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
+ */
+#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
+ * mode. Some components within the GPU might only be able to access memory
+ * that is GPU cacheable. Refer to the specific GPU implementation for more
+ * details. The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK \
+	((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/* Must do CPU cache maintenance when imported memory is mapped/unmapped
+ * on GPU. Currently applicable to dma-buf type only.
+ */
+#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
+
+/* Use the GPU VA chosen by the kernel client */
+#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
+
+/* OUT */
+/* Kernel side cache sync ops required */
+#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
+
+/* Force trimming of JIT allocations when creating a new allocation */
+#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 30
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASEP_MEM_FLAGS_KERNEL_ONLY \
+	(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
+	 BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM)
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED \
+	(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extension' pages, where 'extension' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
+/**
+ * If set, the heap info address points to a __u32 holding the used size in bytes;
+ * otherwise it points to a __u64 holding the lowest address of unused memory.
+ */
+#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE  (1 << 1)
+
+/**
+ * Valid set of just-in-time memory allocation flags
+ *
+ * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
+ * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
+ * and heap_info_gpu_addr being 0 will be rejected).
+ */
+#define BASE_JIT_ALLOC_VALID_FLAGS \
+	(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+
+/**
+ * typedef base_context_create_flags - Flags to pass to ::base_context_init.
+ *
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef __u32 base_context_create_flags;
+
+/* No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/* Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/* Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
+
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
+
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
+	 BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as base_context_create_flags, and so must
+ * not collide with them.
+ */
+
+/* Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
+	((base_context_create_flags)(1 << 31))
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
+ */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact.
+ */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+/* Maximum number of concurrent render passes.
+ */
+#define BASE_JD_RP_COUNT (256)
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+/**
+ * struct base_jd_udata - Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ *
+ * @blob: per-job data array
+ */
+struct base_jd_udata {
+	__u64 blob[2];
+};
+
+/**
+ * typedef base_jd_dep_type - Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a
+ * dependency is a data or ordering dependency (by putting it before/after
+ * 'core_req' in the structure it should be possible to add without changing
+ * the structure size).
+ * When the flag is set for a particular dependency to signal that it is an
+ * ordering only dependency then errors will not be propagated.
+ */
+typedef __u8 base_jd_dep_type;
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * typedef base_jd_core_req - Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef __u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/* No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/* Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/* Requires compute shaders
+ *
+ * This covers any of the following GPU job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
+
+/* Requires tiling */
+#define BASE_JD_REQ_T  ((base_jd_core_req)1 << 2)
+
+/* Requires cache flushes */
+#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3)
+
+/* Requires value writeback */
+#define BASE_JD_REQ_V  ((base_jd_core_req)1 << 4)
+
+/* SW-only requirements - the HW does not expose these as part of the job slot
+ * capabilities
+ */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/* SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/* SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/* SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/* SW Only requirement: External resources are referenced by this atom.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
+ * BASE_JD_REQ_SOFT_EVENT_WAIT.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/* SW Only requirement: Software defined job. Jobs with this bit set will not be
+ * submitted to the hardware but will cause some action to happen within the
+ * driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/* 0x4 RESERVED for now */
+
+/* SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/* SW only requirement: Just In Time allocation
+ *
+ * This job requests a single or multiple just-in-time allocations through a
+ * list of base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
+ *
+ * It should be noted that the id entry in base_jit_alloc_info must not
+ * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+
+/* SW only requirement: Just In Time free
+ *
+ * This job requests a single or multiple just-in-time allocations created by
+ * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time
+ * allocations is passed via the jc element of the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/* SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+
+/* SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains GPU jobs of the 'Compute
+ * Shaders' type.
+ *
+ * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/* HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag
+ * takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned
+ * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/* SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/* SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use
+ * if the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use
+ * if the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/* Request the atom be executed on a specific job slot.
+ *
+ * When this flag is specified, it takes precedence over any existing job slot
+ * selection logic.
+ */
+#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
+
+/* SW-only requirement: The atom is the start of a renderpass.
+ *
+ * If this bit is set then the job chain will be soft-stopped if it causes the
+ * GPU to write beyond the end of the physical pages backing the tiler heap, and
+ * committing more memory to the heap would exceed an internal threshold. It may
+ * be resumed after running one of the job chains attached to an atom with
+ * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be
+ * resumed multiple times until it completes without memory usage exceeding the
+ * threshold.
+ *
+ * Usually used with BASE_JD_REQ_T.
+ */
+#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18)
+
+/* SW-only requirement: The atom is the end of a renderpass.
+ *
+ * If this bit is set then the atom incorporates the CPU address of a
+ * base_jd_fragment object instead of the GPU address of a job chain.
+ *
+ * Which job chain is run depends upon whether the atom with the same renderpass
+ * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or
+ * was soft-stopped when it exceeded an upper threshold for tiler heap memory
+ * usage.
+ *
+ * It also depends upon whether one of the job chains attached to the atom has
+ * already been run as part of the same renderpass (in which case it would have
+ * written unresolved multisampled and otherwise-discarded output to temporary
+ * buffers that need to be read back). The job chain for doing a forced read and
+ * forced write (from/to temporary buffers) is run as many times as necessary.
+ *
+ * Usually used with BASE_JD_REQ_FS.
+ */
+#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
+
+/* SW-only requirement: The atom needs to run on a limited core mask affinity.
+ *
+ * If this bit is set then the kbase_context.limited_core_mask will be applied
+ * to the affinity.
+ */
+#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20)
+
+/* These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
+	BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \
+	BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK))
+
+/* Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/* Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	(((core_req) & BASE_JD_REQ_SOFT_JOB) || \
+	((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * enum kbase_jd_atom_state
+ *
+ * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used.
+ * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD.
+ * @KBASE_JD_ATOM_STATE_IN_JS:  Atom has been given to JS (is runnable/running).
+ * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet
+ *                                    handed back to job dispatcher for
+ *                                    dependency resolution.
+ * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed
+ *                                 back to userspace.
+ */
+enum kbase_jd_atom_state {
+	KBASE_JD_ATOM_STATE_UNUSED,
+	KBASE_JD_ATOM_STATE_QUEUED,
+	KBASE_JD_ATOM_STATE_IN_JS,
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+/**
+ * typedef base_atom_id - Type big enough to store an atom number in.
+ */
+typedef __u8 base_atom_id;
+
+/**
+ * struct base_dependency -
+ *
+ * @atom_id:         An atom number
+ * @dependency_type: Dependency type
+ */
+struct base_dependency {
+	base_atom_id atom_id;
+	base_jd_dep_type dependency_type;
+};
+
+/**
+ * struct base_jd_fragment - Set of GPU fragment job chains used for rendering.
+ *
+ * @norm_read_norm_write: Job chain for full rendering.
+ *                        GPU address of a fragment job chain to render in the
+ *                        circumstance where the tiler job chain did not exceed
+ *                        its memory usage threshold and no fragment job chain
+ *                        was previously run for the same renderpass.
+ *                        It is used no more than once per renderpass.
+ * @norm_read_forced_write: Job chain for starting incremental
+ *                          rendering.
+ *                          GPU address of a fragment job chain to render in
+ *                          the circumstance where the tiler job chain exceeded
+ *                          its memory usage threshold for the first time and
+ *                          no fragment job chain was previously run for the
+ *                          same renderpass.
+ *                          Writes unresolved multisampled and normally-
+ *                          discarded output to temporary buffers that must be
+ *                          read back by a subsequent forced_read job chain
+ *                          before the renderpass is complete.
+ *                          It is used no more than once per renderpass.
+ * @forced_read_forced_write: Job chain for continuing incremental
+ *                            rendering.
+ *                            GPU address of a fragment job chain to render in
+ *                            the circumstance where the tiler job chain
+ *                            exceeded its memory usage threshold again
+ *                            and a fragment job chain was previously run for
+ *                            the same renderpass.
+ *                            Reads unresolved multisampled and
+ *                            normally-discarded output from temporary buffers
+ *                            written by a previous forced_write job chain and
+ *                            writes the same to temporary buffers again.
+ *                            It is used as many times as required until
+ *                            rendering completes.
+ * @forced_read_norm_write: Job chain for ending incremental rendering.
+ *                          GPU address of a fragment job chain to render in the
+ *                          circumstance where the tiler job chain did not
+ *                          exceed its memory usage threshold this time and a
+ *                          fragment job chain was previously run for the same
+ *                          renderpass.
+ *                          Reads unresolved multisampled and normally-discarded
+ *                          output from temporary buffers written by a previous
+ *                          forced_write job chain in order to complete a
+ *                          renderpass.
+ *                          It is used no more than once per renderpass.
+ *
+ * This structure is referenced by the main atom structure if
+ * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
+ */
+struct base_jd_fragment {
+	__u64 norm_read_norm_write;
+	__u64 norm_read_forced_write;
+	__u64 forced_read_forced_write;
+	__u64 forced_read_norm_write;
+};
+
+/**
+ * typedef base_jd_prio - Base Atom priority.
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling after the atoms have had dependencies
+ * resolved. For example, a low priority atom that has had its dependencies
+ * resolved might run before a higher priority atom that has not had its
+ * dependencies resolved.
+ *
+ * In general, fragment atoms do not affect non-fragment atoms with
+ * lower priorities, and vice versa. One exception is that there is only one
+ * priority value for each context. So a high-priority (e.g.) fragment atom
+ * could increase its context priority, causing its non-fragment atoms to also
+ * be scheduled sooner.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * * If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * * Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ *
+ * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
+ * scheduled between contexts. The default value, 0, will cause higher-priority
+ * atoms to be scheduled first, regardless of their context. The value 1 will
+ * use a round-robin algorithm when deciding which context's atoms to schedule
+ * next, so higher-priority atoms can only preempt lower priority atoms within
+ * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
+ * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
+ */
+typedef __u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW
+ */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH,
+ * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW
+ */
+#define BASE_JD_PRIO_REALTIME    ((base_jd_prio)3)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting
+ */
+#define BASE_JD_NR_PRIO_LEVELS 4
+
+/**
+ * struct base_jd_atom_v2 - Node of a dependency graph used to submit a
+ *                          GPU job chain or soft-job to the kernel driver.
+ *
+ * @jc:            GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
+ *                 is set in the base_jd_core_req) the CPU address of a
+ *                 base_jd_fragment object.
+ * @udata:         User data.
+ * @extres_list:   List of external resources.
+ * @nr_extres:     Number of external resources or JIT allocations.
+ * @jit_id:        Zero-terminated array of IDs of just-in-time memory
+ *                 allocations written to by the atom. When the atom
+ *                 completes, the value stored at the
+ *                 &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ *                 each allocation is read in order to enforce an
+ *                 overall physical memory usage limit.
+ * @pre_dep:       Pre-dependencies. One need to use SETTER function to assign
+ *                 this field; this is done in order to reduce possibility of
+ *                 improper assignment of a dependency field.
+ * @atom_number:   Unique number to identify the atom.
+ * @prio:          Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr:     Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ *                 specified.
+ * @jobslot:       Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req:      Core requirements.
+ * @renderpass_id: Renderpass identifier used to associate an atom that has
+ *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @padding:       Unused. Must be zero.
+ *
+ * This structure has changed since UK 10.2 for which base_jd_core_req was a
+ * __u16 value.
+ *
+ * In UK 10.3 a core_req field of a __u32 type was added to the end of the
+ * structure, and the place in the structure previously occupied by __u16
+ * core_req was kept but renamed to compat_core_req.
+ *
+ * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2].
+ * Compatibility with UK 10.x from UK 11.y is not handled because
+ * the major version increase prevents this.
+ *
+ * For UK 11.20 jit_id[2] must be initialized to zero.
+ */
+struct base_jd_atom_v2 {
+	__u64 jc;
+	struct base_jd_udata udata;
+	__u64 extres_list;
+	__u16 nr_extres;
+	__u8 jit_id[2];
+	struct base_dependency pre_dep[2];
+	base_atom_id atom_number;
+	base_jd_prio prio;
+	__u8 device_nr;
+	__u8 jobslot;
+	base_jd_core_req core_req;
+	__u8 renderpass_id;
+	__u8 padding[7];
+};
+
+/**
+ * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
+ *                          at the beginning.
+ *
+ * @seq_nr:        Sequence number of logical grouping of atoms.
+ * @jc:            GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
+ *                 is set in the base_jd_core_req) the CPU address of a
+ *                 base_jd_fragment object.
+ * @udata:         User data.
+ * @extres_list:   List of external resources.
+ * @nr_extres:     Number of external resources or JIT allocations.
+ * @jit_id:        Zero-terminated array of IDs of just-in-time memory
+ *                 allocations written to by the atom. When the atom
+ *                 completes, the value stored at the
+ *                 &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ *                 each allocation is read in order to enforce an
+ *                 overall physical memory usage limit.
+ * @pre_dep:       Pre-dependencies. One need to use SETTER function to assign
+ *                 this field; this is done in order to reduce possibility of
+ *                 improper assignment of a dependency field.
+ * @atom_number:   Unique number to identify the atom.
+ * @prio:          Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr:     Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ *                 specified.
+ * @jobslot:       Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req:      Core requirements.
+ * @renderpass_id: Renderpass identifier used to associate an atom that has
+ *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @padding:       Unused. Must be zero.
+ */
+typedef struct base_jd_atom {
+	__u64 seq_nr;
+	__u64 jc;
+	struct base_jd_udata udata;
+	__u64 extres_list;
+	__u16 nr_extres;
+	__u8 jit_id[2];
+	struct base_dependency pre_dep[2];
+	base_atom_id atom_number;
+	base_jd_prio prio;
+	__u8 device_nr;
+	__u8 jobslot;
+	base_jd_core_req core_req;
+	__u8 renderpass_id;
+	__u8 padding[7];
+} base_jd_atom;
+
+/* Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */
+	/* Event indicates success (SW events only) */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13),
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/* Reserved event type */
+	/* Mask to extract the type from an event code */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)
+};
+
+/**
+ * enum base_jd_event_code - Job chain event codes
+ *
+ * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status
+ *                                         codes.
+ *                                         Obscurely, BASE_JD_EVENT_TERMINATED
+ *                                         indicates a real fault, because the
+ *                                         job was hard-stopped.
+ * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as
+ *                             'previous job done'.
+ * @BASE_JD_EVENT_STOPPED:     Can't be seen by userspace, becomes
+ *                             TERMINATED, DONE or JOB_CANCELLED.
+ * @BASE_JD_EVENT_TERMINATED:  This is actually a fault status code - the job
+ *                             was hard stopped.
+ * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on
+ *                        complete/fail/cancel.
+ * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes.
+ *                                       Obscurely, BASE_JD_EVENT_TERMINATED
+ *                                       indicates a real fault,
+ *                                       because the job was hard-stopped.
+ * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and
+ *                                                  software error status codes.
+ * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and
+ *                                                software error status codes.
+ * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status
+ *                                        codes.
+ * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes.
+ * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes.
+ *                                         Such codes are never returned to
+ *                                         user-space.
+ * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
+ * @BASE_JD_EVENT_DONE: atom has completed successfull
+ * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which
+ *                                  shall result in a failed atom
+ * @BASE_JD_EVENT_JOB_POWER_FAULT:  The job could not be executed because the
+ *                                  part of the memory system required to access
+ *                                  job descriptors was not powered on
+ * @BASE_JD_EVENT_JOB_READ_FAULT:   Reading a job descriptor into the Job
+ *                                  manager failed
+ * @BASE_JD_EVENT_JOB_WRITE_FAULT:  Writing a job descriptor from the Job
+ *                                  manager failed
+ * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the
+ *                                    specified affinity mask does not intersect
+ *                                    any available cores
+ * @BASE_JD_EVENT_JOB_BUS_FAULT:    A bus access failed while executing a job
+ * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program
+ *                                  counter was executed.
+ * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal
+ *                                  encoding was executed.
+ * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where
+ *                                  the instruction encoding did not match the
+ *                                  instruction type encoded in the program
+ *                                  counter.
+ * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that
+ *                                  contained invalid combinations of operands.
+ * @BASE_JD_EVENT_INSTR_TLS_FAULT:  A shader instruction was executed that tried
+ *                                  to access the thread local storage section
+ *                                  of another thread.
+ * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that
+ *                                  tried to do an unsupported unaligned memory
+ *                                  access.
+ * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that
+ *                                  failed to complete an instruction barrier.
+ * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job
+ *                                  contains invalid combinations of data.
+ * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to
+ *                                  process a tile that is entirely outside the
+ *                                  bounding box of the frame.
+ * @BASE_JD_EVENT_STATE_FAULT:      Matches ADDR_RANGE_FAULT. A virtual address
+ *                                  has been found that exceeds the virtual
+ *                                  address range.
+ * @BASE_JD_EVENT_OUT_OF_MEMORY:    The tiler ran out of memory when executing a job.
+ * @BASE_JD_EVENT_UNKNOWN:          If multiple jobs in a job chain fail, only
+ *                                  the first one the reports an error will set
+ *                                  and return full error information.
+ *                                  Subsequent failing jobs will not update the
+ *                                  error status registers, and may write an
+ *                                  error status of UNKNOWN.
+ * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to
+ *                                  physical memory where the original virtual
+ *                                  address is no longer available.
+ * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache
+ *                                  has detected that the same line has been
+ *                                  accessed as both shareable and non-shareable
+ *                                  memory from inside the GPU.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table
+ *                                  entry at level 1 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table
+ *                                  entry at level 2 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table
+ *                                  entry at level 3 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table
+ *                                  entry at level 4 of the translation table.
+ * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to
+ *                                  the permission flags set in translation
+ *                                  table
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading
+ *                                  level 0 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading
+ *                                  level 1 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading
+ *                                  level 2 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading
+ *                                  level 3 of the translation tables.
+ * @BASE_JD_EVENT_ACCESS_FLAG:      Matches ACCESS_FLAG_0. A memory access hit a
+ *                                  translation table entry with the ACCESS_FLAG
+ *                                  bit set to zero in level 0 of the
+ *                                  page table, and the DISABLE_AF_FAULT flag
+ *                                  was not set.
+ * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to
+ *                                   grow memory on demand
+ * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its
+ *                               dependencies failed
+ * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data
+ *                             in the atom which overlaps with
+ *                             BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the
+ *                             platform doesn't support the feature specified in
+ *                             the atom.
+ * @BASE_JD_EVENT_PM_EVENT:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_TIMED_OUT:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_BAG_INVALID:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_PROGRESS_REPORT:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_BAG_DONE:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate
+ *                                to userspace that the KBase context has been
+ *                                destroyed and Base should stop listening for
+ *                                further events
+ * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in
+ *                                   the GPU has to be retried (but it has not
+ *                                   started) due to e.g., GPU reset
+ * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal
+ *                             the completion of a renderpass. This value
+ *                             shouldn't be returned to userspace but I haven't
+ *                             seen where it is reset back to JD_EVENT_DONE.
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see @BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a &struct base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * * 10:0  - subtype
+ * * 12:11 - type
+ * * 13    - SW success (only valid if the SW bit is set)
+ * * 14    - SW event (HW event if not set)
+ * * 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * * BASE_JD_EVENT_RANGE_<description>_START
+ * * BASE_JD_EVENT_RANGE_<description>_END
+ *
+ * code is in <description>'s range when:
+ * BASE_JD_EVENT_RANGE_<description>_START <= code <
+ *   BASE_JD_EVENT_RANGE_<description>_END
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+enum base_jd_event_code {
+	/* HW defined exceptions */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00,
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,
+	BASE_JD_EVENT_TERMINATED = 0x04,
+	BASE_JD_EVENT_ACTIVE = 0x08,
+
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED =
+		BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT =
+		BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED =
+		BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID =
+		BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT =
+		BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+
+	BASE_JD_EVENT_BAG_INVALID =
+		BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS |
+		BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001,
+
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT |
+		BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+};
+
+/**
+ * struct base_jd_event_v2 - Event reporting structure
+ *
+ * @event_code:  event code.
+ * @atom_number: the atom number that has completed.
+ * @udata:       user data.
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. They can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK.
+ */
+struct base_jd_event_v2 {
+	enum base_jd_event_code event_code;
+	base_atom_id atom_number;
+	struct base_jd_udata udata;
+};
+
+/**
+ * struct base_dump_cpu_gpu_counters - Structure for
+ *                                     BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS
+ *                                     jobs.
+ * @system_time:   gpu timestamp
+ * @cycle_counter: gpu cycle count
+ * @sec:           cpu time(sec)
+ * @usec:          cpu time(usec)
+ * @padding:       padding
+ *
+ * This structure is stored into the memory pointed to by the @jc field
+ * of &struct base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+struct base_dump_cpu_gpu_counters {
+	__u64 system_time;
+	__u64 cycle_counter;
+	__u64 sec;
+	__u32 usec;
+	__u8 padding[36];
+};
+
+#endif /* _UAPI_BASE_JM_KERNEL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
new file mode 100644
index 0000000..1eb6bcb
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_JM_IOCTL_H_
+#define _UAPI_KBASE_JM_IOCTL_H_
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
+ *   which some user-side clients prior to 11.2 might fault if they received
+ *   them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ *   under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ *   JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ *   with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ * 11.12:
+ * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
+ * 11.14:
+ * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
+ *   under base_mem_alloc_flags
+ * 11.15:
+ * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
+ * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
+ *   passed to mmap().
+ * 11.16:
+ * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
+ * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
+ *   dma-buf. Now, buffers are mapped on GPU when first imported, no longer
+ *   requiring external resource or sticky resource tracking. UNLESS,
+ *   CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
+ * 11.17:
+ * - Added BASE_JD_REQ_JOB_SLOT.
+ * - Reused padding field in base_jd_atom_v2 to pass job slot number.
+ * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
+ * 11.18:
+ * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
+ * 11.19:
+ * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified.
+ * 11.20:
+ * - Added new phys_pages member to kbase_ioctl_mem_jit_init for
+ *   KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2
+ *   (replacing '_OLD') and _11_5 suffixes
+ * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in
+ *   base_jd_atom_v2. It must currently be initialized to zero.
+ * - Added heap_info_gpu_addr to base_jit_alloc_info, and
+ *   BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's
+ *   flags member. Previous variants of this structure are kept and given _10_2
+ *   and _11_5 suffixes.
+ * - The above changes are checked for safe values in usual builds
+ * 11.21:
+ * - v2.0 of mali_trace debugfs file, which now versions the file separately
+ * 11.22:
+ * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
+ *   KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
+ * 11.23:
+ * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
+ *   the physical memory backing of JIT allocations. This was not supposed
+ *   to be a valid use case, but it was allowed by the previous implementation.
+ * 11.24:
+ * - Added a sysfs file 'serialize_jobs' inside a new sub-directory
+ *   'scheduling'.
+ * 11.25:
+ * - Enabled JIT pressure limit in base/kbase by default
+ * 11.26
+ * - Added kinstr_jm API
+ * 11.27
+ * - Backwards compatible extension to HWC ioctl.
+ * 11.28:
+ * - Added kernel side cache ops needed hint
+ * 11.29:
+ * - Reserve ioctl 52
+ * 11.30:
+ * - Add a new priority level BASE_JD_PRIO_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 11.31:
+ * - Added BASE_JD_REQ_LIMITED_CORE_MASK.
+ * - Added ioctl 55: set_limited_core_count.
+ */
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 31
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility between
+ * kernel and userspace
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2 or v3
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+#define KBASE_IOCTL_POST_TERM \
+	_IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/**
+ * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
+ * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
+ * kernel
+ *
+ * @size:    The size of the `struct kbase_kinstr_jm_atom_state_change`
+ * @version: Represents a breaking change in the
+ *           `struct kbase_kinstr_jm_atom_state_change`
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ *
+ * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
+ * end of the structure that older user space might not understand. If the
+ * `version` is the same, the structure is still compatible with newer kernels.
+ * The `size` can be used to cast the opaque memory returned from the kernel.
+ */
+struct kbase_kinstr_jm_fd_out {
+	__u16 size;
+	__u8 version;
+	__u8 padding[5];
+};
+
+/**
+ * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
+ *
+ * @count: Number of atom states that can be stored in the kernel circular
+ *         buffer. Must be a power of two
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ */
+struct kbase_kinstr_jm_fd_in {
+	__u16 count;
+	__u8 padding[6];
+};
+
+union kbase_kinstr_jm_fd {
+	struct kbase_kinstr_jm_fd_in in;
+	struct kbase_kinstr_jm_fd_out out;
+};
+
+#define KBASE_IOCTL_KINSTR_JM_FD \
+	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
+
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#endif /* _UAPI_KBASE_JM_IOCTL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 0000000..554c5a3
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,826 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _UAPI_BASE_KERNEL_H_
+#define _UAPI_BASE_KERNEL_H_
+
+#include <linux/types.h>
+
+struct base_mem_handle {
+	struct {
+		__u64 handle;
+	} basep;
+};
+
+#include "mali_base_mem_priv.h"
+#include "gpu/mali_kbase_gpu_id.h"
+#include "gpu/mali_kbase_gpu_coherency.h"
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined(CDBG_ASSERT)
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined(KBASE_DEBUG_ASSERT)
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#if defined(__KERNEL__)
+#error assert macro not defined!
+#else
+#define LOCAL_ASSERT(...)	((void)#__VA_ARGS__)
+#endif
+#endif
+
+#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
+#define LOCAL_PAGE_SHIFT PAGE_SHIFT
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12
+#endif
+
+#if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2)
+#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/* Physical memory group ID for normal usage.
+ */
+#define BASE_MEM_GROUP_DEFAULT (0)
+
+/* Number of physical memory groups.
+ */
+#define BASE_MEM_GROUP_COUNT (16)
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef __u32 base_mem_alloc_flags;
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+/* A mask of all the flags that can be returned via the base_mem_get_flags()
+ * interface.
+ */
+#define BASE_MEM_FLAGS_QUERYABLE \
+	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
+		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
+		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
+		BASEP_MEM_FLAGS_KERNEL_ONLY))
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/*
+	 * Import type with value 1 is deprecated.
+	 */
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+};
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	__u64 ptr;
+	__u64 length;
+};
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
+
+/* Limit on the 'extension' parameter for an allocation with the
+ * BASE_MEM_TILER_ALIGN_TOP flag set
+ *
+ * This is the same as the maximum limit for a Buffer Descriptor's chunk size
+ */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2                      \
+	(21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES                           \
+	(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
+/*
+ * struct base_fence - Cross-device synchronisation fence.
+ *
+ * A fence is used to signal when the GPU has finished accessing a resource that
+ * may be shared with other devices, and also to delay work done asynchronously
+ * by the GPU until other devices have finished accessing a shared resource.
+ */
+struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+};
+
+/**
+ * struct base_mem_aliasing_info - Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset: Offset within the handle to start aliasing from, in pages.
+ *          Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *          specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	struct base_mem_handle handle;
+	__u64 offset;
+	__u64 length;
+};
+
+/* Maximum percentage of just-in-time memory allocation trimming to perform
+ * on free.
+ */
+#define BASE_JIT_MAX_TRIM_LEVEL (100)
+
+/* Maximum number of concurrent just-in-time memory allocations.
+ */
+#define BASE_JIT_ALLOC_COUNT (255)
+
+/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5
+ *
+ * jit_version is 1
+ *
+ * Due to the lack of padding specified, user clients between 32 and 64-bit
+ * may have assumed a different size of the struct
+ *
+ * An array of structures was not supported
+ */
+struct base_jit_alloc_info_10_2 {
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+};
+
+/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up
+ * to 11.19
+ *
+ * This structure had a number of modifications during and after kernel driver
+ * version 11.5, but remains size-compatible throughout its version history, and
+ * with earlier variants compatible with future variants by requiring
+ * zero-initialization to the unused space in the structure.
+ *
+ * jit_version is 2
+ *
+ * Kernel driver version history:
+ * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes
+ *       must be zero. Kbase minor version was not incremented, so some
+ *       versions of 11.5 do not have this change.
+ * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase
+ *       minor version not incremented)
+ * 11.6: Added 'flags', replacing 1 padding byte
+ * 11.10: Arrays of this structure are supported
+ */
+struct base_jit_alloc_info_11_5 {
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+	__u8 bin_id;
+	__u8 max_allocations;
+	__u8 flags;
+	__u8 padding[2];
+	__u16 usage_id;
+};
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extension:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation, see
+ *                              %BASE_JIT_ALLOC_VALID_FLAGS
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
+ * @heap_info_gpu_addr:         Pointer to an object in GPU memory describing
+ *                              the actual usage of the region.
+ *
+ * jit_version is 3.
+ *
+ * When modifications are made to this structure, it is still compatible with
+ * jit_version 3 when: a) the size is unchanged, and b) new members only
+ * replace the padding bytes.
+ *
+ * Previous jit_version history:
+ * jit_version == 1, refer to &base_jit_alloc_info_10_2
+ * jit_version == 2, refer to &base_jit_alloc_info_11_5
+ *
+ * Kbase version history:
+ * 11.20: added @heap_info_gpu_addr
+ */
+struct base_jit_alloc_info {
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+	__u8 bin_id;
+	__u8 max_allocations;
+	__u8 flags;
+	__u8 padding[2];
+	__u16 usage_id;
+	__u64 heap_info_gpu_addr;
+};
+
+enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+};
+
+struct base_external_resource {
+	__u64 ext_resource;
+};
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	__u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	__u64 address;
+	__u64 size;
+	struct base_external_resource extres;
+};
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * User-side Base GPU Property Queries
+ *
+ * The User-side Base GPU Property Query interface encapsulates two
+ * sub-modules:
+ *
+ * - "Dynamic GPU Properties"
+ * - "Base Platform Config GPU Properties"
+ *
+ * Base only deals with properties that vary between different GPU
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the GPU Architecture, refer to the
+ * GPU module. However, we will discuss their relevance here just to
+ * provide background information.
+ *
+ * About the GPU Properties in Base and GPU modules
+ *
+ * The compile-time properties (Platform Config, GPU Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the GPU Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * 1. Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * 2. If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * 3. If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or GPU Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * 1. the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props
+ * (also a member of base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * The raw properties in gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the GPU Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the GPU
+ * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function.
+ *
+ * The GPU properties are obtained by a call to
+ * base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * Coherency Group calculation
+ *
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a __u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ * 	0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ * 	4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ * 	release number.
+ * 	8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ * 	release number.
+ * 	4 bit values (0-15).
+ * @padding: padding to allign to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ * 	clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ * 	is a bitpattern where a set bit indicates that the format is supported.
+ * 	Before using a texture format, it is recommended that the corresponding
+ * 	bit be checked.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ * 	It is unlikely that a client will be able to allocate all of this memory
+ * 	for their own purposes, but this at least provides an upper bound on the
+ * 	memory available to the GPU.
+ * 	This is required for OpenCL's clGetDeviceInfo() call when
+ * 	CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ * 	client will not be expecting to allocate anywhere near this value.
+ * @num_exec_engines: The number of execution engines.
+ */
+struct mali_base_gpu_core_props {
+	__u32 product_id;
+	__u16 version_status;
+	__u16 minor_revision;
+	__u16 major_revision;
+	__u16 padding;
+	__u32 gpu_freq_khz_max;
+	__u32 log2_program_counter_size;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u64 gpu_available_memory_size;
+	__u8 num_exec_engines;
+};
+
+/*
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	__u8 log2_line_size;
+	__u8 log2_cache_size;
+	__u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	__u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	__u32 bin_size_bytes;	/* Max is 4*2^15 */
+	__u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * struct mali_base_gpu_thread_props - GPU threading system details.
+ * @max_threads: Max. number of threads per core
+ * @max_workgroup_size:     Max. number of threads per workgroup
+ * @max_barrier_size:       Max. number of threads that can synchronize on a
+ *                          simple barrier
+ * @max_registers:          Total size [1..65535] of the register file available
+ *                          per core.
+ * @max_task_queue:         Max. tasks [1..255] which may be sent to a core
+ *                          before it becomes blocked.
+ * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split
+ *                          field.
+ * @impl_tech:              0 = Not specified, 1 = Silicon, 2 = FPGA,
+ *                          3 = SW Model/Emulation
+ * @padding:                padding to allign to 8-byte
+ * @tls_alloc:              Number of threads per core that TLS must be
+ *                          allocated for
+ */
+struct mali_base_gpu_thread_props {
+	__u32 max_threads;
+	__u32 max_workgroup_size;
+	__u32 max_barrier_size;
+	__u16 max_registers;
+	__u8 max_task_queue;
+	__u8 max_thread_group_split;
+	__u8 impl_tech;
+	__u8  padding[3];
+	__u32 tls_alloc;
+};
+
+/**
+ * struct mali_base_gpu_coherent_group - descriptor for a coherent group
+ * @core_mask: Core restriction mask required for the group
+ * @num_cores: Number of cores in the group
+ * @padding:   padding to allign to 8-byte
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * 	provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * 	the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
+ * 	wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	__u64 core_mask;
+	__u16 num_cores;
+	__u16 padding[3];
+};
+
+/**
+ * struct mali_base_gpu_coherent_group_info - Coherency group information
+ * @num_groups: Number of coherent groups in the GPU.
+ * @num_core_groups: Number of core groups (coherent or not) in the GPU.
+ * 	Equivalent to the number of L2 Caches.
+ * 	  The GPU Counter dumping writes 2048 bytes per core group, regardless
+ * 	of whether the core groups are coherent or not. Hence this member is
+ * 	needed to calculate how much memory is required for dumping.
+ * 	  @note Do not use it to work out how many valid elements are in the
+ * 	group[] member. Use num_groups instead.
+ * @coherency: Coherency features of the memory, accessed by gpu_mem_features
+ * 	methods
+ * @padding: padding to allign to 8-byte
+ * @group: Descriptors of coherent groups
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	__u32 num_groups;
+	__u32 num_core_groups;
+	__u32 coherency;
+	__u32 padding;
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
+ *                            Configuration Discovery registers.
+ * @shader_present: Shader core present bitmap
+ * @tiler_present: Tiler core present bitmap
+ * @l2_present: Level 2 cache present bitmap
+ * @stack_present: Core stack present bitmap
+ * @l2_features: L2 features
+ * @core_features: Core features
+ * @mem_features: Mem features
+ * @mmu_features: Mmu features
+ * @as_present: Bitmap of address spaces present
+ * @js_present: Job slots present
+ * @js_features: Array of job slot features.
+ * @tiler_features: Tiler features
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU
+ * @gpu_id: GPU and revision identifier
+ * @thread_max_threads: Maximum number of threads per core
+ * @thread_max_workgroup_size: Maximum number of threads per workgroup
+ * @thread_max_barrier_size: Maximum number of threads per barrier
+ * @thread_features: Thread features
+ * @coherency_mode: Note: This is the _selected_ coherency mode rather than the
+ * 	available modes as exposed in the coherency_features register
+ * @thread_tls_alloc: Number of threads per core that TLS must be allocated for
+ * @gpu_features: GPU features
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * The raw properties in gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it does not need to be processed
+ * by the driver. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	__u64 shader_present;
+	__u64 tiler_present;
+	__u64 l2_present;
+	__u64 stack_present;
+	__u32 l2_features;
+	__u32 core_features;
+	__u32 mem_features;
+	__u32 mmu_features;
+
+	__u32 as_present;
+
+	__u32 js_present;
+	__u32 js_features[GPU_MAX_JOB_SLOTS];
+	__u32 tiler_features;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	__u32 gpu_id;
+
+	__u32 thread_max_threads;
+	__u32 thread_max_workgroup_size;
+	__u32 thread_max_barrier_size;
+	__u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	__u32 coherency_mode;
+
+	__u32 thread_tls_alloc;
+	__u64 gpu_features;
+};
+
+/**
+ * struct base_gpu_props - Return structure for base_get_gpu_props().
+ * @core_props:     Core props.
+ * @l2_props:       L2 props.
+ * @unused_1:       Keep for backwards compatibility.
+ * @tiler_props:    Tiler props.
+ * @thread_props:   Thread props.
+ * @raw_props:      This member is large, likely to be 128 bytes.
+ * @coherency_info: This must be last member of the structure.
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ * */
+struct base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	__u64 unused_1;
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+	struct gpu_raw_gpu_props raw_props;
+	struct mali_base_gpu_coherent_group_info coherency_info;
+};
+
+#if MALI_USE_CSF
+#include "csf/mali_base_csf_kernel.h"
+#else
+#include "jm/mali_base_jm_kernel.h"
+#endif
+
+/**
+ * base_mem_group_id_get() - Get group ID from flags
+ * @flags: Flags to pass to base_mem_alloc
+ *
+ * This inline function extracts the encoded group ID from flags
+ * and converts it into numeric value (0~15).
+ *
+ * Return: group ID(0~15) extracted from the parameter
+ */
+static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags)
+{
+	LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
+	return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
+			BASEP_MEM_GROUP_ID_SHIFT);
+}
+
+/**
+ * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
+ * @id: group ID(0~15) you want to encode
+ *
+ * This inline function encodes specific group ID into base_mem_alloc_flags.
+ * Parameter 'id' should lie in-between 0 to 15.
+ *
+ * Return: base_mem_alloc_flags with the group ID (id) encoded
+ *
+ * The return value can be combined with other flags against base_mem_alloc
+ * to identify a specific memory group.
+ */
+static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id)
+{
+	if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) {
+		/* Set to default value when id is out of range. */
+		id = BASE_MEM_GROUP_DEFAULT;
+	}
+
+	return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
+		BASE_MEM_GROUP_ID_MASK;
+}
+
+/**
+ * base_context_mmu_group_id_set - Encode a memory group ID in
+ *                                 base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the specified group.
+ *
+ * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1).
+ *
+ * Return: Bitmask of flags to pass to base_context_init.
+ */
+static __inline__ base_context_create_flags base_context_mmu_group_id_set(
+	int const group_id)
+{
+	LOCAL_ASSERT(group_id >= 0);
+	LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT);
+	return BASEP_CONTEXT_MMU_GROUP_ID_MASK &
+		((base_context_create_flags)group_id <<
+		BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
+}
+
+/**
+ * base_context_mmu_group_id_get - Decode a memory group ID from
+ *                                 base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the returned group.
+ *
+ * @flags: Bitmask of flags to pass to base_context_init.
+ *
+ * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
+ */
+static __inline__ int base_context_mmu_group_id_get(
+	base_context_create_flags const flags)
+{
+	LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
+	return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>
+			BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
+}
+
+/*
+ * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
+ * flags are also used, where applicable, for specifying which fields
+ * are valid following the request operation.
+ */
+
+/* For monotonic (counter) timefield */
+#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0)
+/* For system wide timestamp */
+#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1)
+/* For GPU cycle counter */
+#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2)
+/* Specify kernel GPU register timestamp */
+#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30)
+/* Specify userspace cntvct_el0 timestamp source */
+#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31)
+
+#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\
+		BASE_TIMEINFO_MONOTONIC_FLAG | \
+		BASE_TIMEINFO_TIMESTAMP_FLAG | \
+		BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \
+		BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
+		BASE_TIMEINFO_USER_SOURCE_FLAG)
+
+/* Maximum number of source allocations allowed to create an alias allocation.
+ * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array
+ * layers, since each cube map in the array will have 6 faces.
+ */
+#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576)
+
+#endif /* _UAPI_BASE_KERNEL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 0000000..982bd3d
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_BASE_MEM_PRIV_H_
+#define _UAPI_BASE_MEM_PRIV_H_
+
+#include <linux/types.h>
+
+#include "mali_base_kernel.h"
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	struct base_mem_handle mem_handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#endif /* _UAPI_BASE_MEM_PRIV_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100644
index 0000000..615dbb0
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_HWCNT_READER_H_
+#define _UAPI_KBASE_HWCNT_READER_H_
+
+#include <stddef.h>
+#include <linux/types.h>
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, __u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\
+		offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\
+		offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, __u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, __u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
+		_IOW(KBASE_HWCNT_READER, 0xFF, \
+		     struct kbase_hwcnt_reader_api_version)
+
+/**
+ * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles
+ * @top:           the number of cycles associated with the main clock for the
+ *                 GPU
+ * @shader_cores:  the cycles that have elapsed on the GPU shader cores
+ */
+struct kbase_hwcnt_reader_metadata_cycles {
+	__u64 top;
+	__u64 shader_cores;
+};
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ * @cycles:     the GPU cycles that occurred since the last sample
+ */
+struct kbase_hwcnt_reader_metadata {
+	__u64 timestamp;
+	__u32 event_id;
+	__u32 buffer_idx;
+	struct kbase_hwcnt_reader_metadata_cycles cycles;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
+/**
+ * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
+ * @version:  API version
+ * @features: available features in this API version
+ */
+struct kbase_hwcnt_reader_api_version {
+	__u32 version;
+	__u32 features;
+};
+
+#endif /* _UAPI_KBASE_HWCNT_READER_H_ */
+
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
new file mode 100644
index 0000000..5ca528a
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,836 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_IOCTL_H_
+#define _UAPI_KBASE_IOCTL_H_
+
+#ifdef __cpluscplus
+extern "C" {
+#endif
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+#if MALI_USE_CSF
+#include "csf/mali_kbase_csf_ioctl.h"
+#else
+#include "jm/mali_kbase_jm_ioctl.h"
+#endif /* MALI_USE_CSF */
+
+#define KBASE_IOCTL_TYPE 0x80
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS \
+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a __u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = __u8
+ * 01 = __u16
+ * 10 = __u32
+ * 11 = __u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS \
+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ * @in: Input parameters
+ * @in.va_pages: The number of pages of virtual address space to reserve
+ * @in.commit_pages: The number of physical pages to allocate
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
+ * @in.flags: Flags
+ * @out: Output parameters
+ * @out.flags: Flags
+ * @out.gpu_va: The GPU virtual address which is allocated
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extension;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC \
+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @in: Input parameters
+ * @in.gpu_addr: A GPU address contained within the region
+ * @in.query: The type of query
+ * @out: Output parameters
+ * @out.value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY \
+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((__u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((__u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((__u64)3)
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE \
+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @fe_bm:        counters selection bitmask (Front end)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 fe_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
+ * @dump_buffer:  GPU address to write counters to
+ * @fe_bm:        counters selection bitmask (Front end)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ */
+struct kbase_ioctl_hwcnt_enable {
+	__u64 dump_buffer;
+	__u32 fe_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_ENABLE \
+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+
+#define KBASE_IOCTL_HWCNT_DUMP \
+	_IO(KBASE_IOCTL_TYPE, 10)
+
+#define KBASE_IOCTL_HWCNT_CLEAR \
+	_IO(KBASE_IOCTL_TYPE, 11)
+
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET \
+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY \
+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION \
+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory
+ *                                        allocator (between kernel driver
+ *                                        version 10.2--11.4)
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_10_2 {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2)
+
+/**
+ * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory
+ *                                        allocator (between kernel driver
+ *                                        version 11.5--11.19)
+ * @va_pages: Number of VA pages to reserve for JIT
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
+ * @padding: Currently unused, must be zero
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_11_5 {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 group_id;
+	__u8 padding[5];
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
+ *                                   allocator
+ * @va_pages: Number of GPU virtual address pages to reserve for just-in-time
+ *            memory allocations
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
+ * @padding: Currently unused, must be zero
+ * @phys_pages: Maximum number of physical pages to allocate just-in-time
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 group_id;
+	__u8 padding[5];
+	__u64 phys_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC \
+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @in: Input parameters
+ * @in.gpu_addr: The GPU address of the memory region
+ * @in.cpu_addr: The CPU address to locate
+ * @in.size: A size in bytes to validate is contained within the region
+ * @out: Output parameters
+ * @out.offset: The offset from the start of the memory region to @cpu_addr
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID \
+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH \
+	_IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT \
+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.stride: Bytes between start of each memory region
+ * @in.nents: The number of regions to pack together into the alias
+ * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS \
+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.phandle: Handle to the external memory
+ * @in.type: Type of external memory, see base_mem_import_type
+ * @in.padding: Amount of extra VA pages to append to the imported buffer
+ * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT \
+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE \
+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE \
+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD \
+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
+ * @count: Number of resources
+ * @address: Array of __u64 GPU addresses of the external resources to map
+ */
+struct kbase_ioctl_sticky_resource_map {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
+ *                                          previously permanently mapped
+ * @count: Number of resources
+ * @address: Array of __u64 GPU addresses of the external resources to unmap
+ */
+struct kbase_ioctl_sticky_resource_unmap {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
+
+/**
+ * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
+ *                                                   the GPU memory region for
+ *                                                   the given gpu address and
+ *                                                   the offset of that address
+ *                                                   into the region
+ * @in: Input parameters
+ * @in.gpu_addr: GPU virtual address
+ * @in.size: Size in bytes within the region
+ * @out: Output parameters
+ * @out.start: Address of the beginning of the memory region enclosing @gpu_addr
+ *             for the length of @offset bytes
+ * @out.offset: The offset from the start of the memory region to @gpu_addr
+ */
+union kbase_ioctl_mem_find_gpu_start_and_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 start;
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
+
+#define KBASE_IOCTL_CINSTR_GWT_START \
+	_IO(KBASE_IOCTL_TYPE, 33)
+
+#define KBASE_IOCTL_CINSTR_GWT_STOP \
+	_IO(KBASE_IOCTL_TYPE, 34)
+
+/**
+ * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
+ * @in: Input parameters
+ * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas.
+ * @in.size_buffer: Address of buffer to hold size of modified areas (in pages)
+ * @in.len: Number of addresses the buffers can hold.
+ * @in.padding: padding
+ * @out: Output parameters
+ * @out.no_of_addr_collected: Number of addresses collected into addr_buffer.
+ * @out.more_data_available: Status indicating if more addresses are available.
+ * @out.padding: padding
+ *
+ * This structure is used when performing a call to dump GPU write fault
+ * addresses.
+ */
+union kbase_ioctl_cinstr_gwt_dump {
+	struct {
+		__u64 addr_buffer;
+		__u64 size_buffer;
+		__u32 len;
+		__u32 padding;
+
+	} in;
+	struct {
+		__u32 no_of_addr_collected;
+		__u8 more_data_available;
+		__u8 padding[27];
+	} out;
+};
+
+#define KBASE_IOCTL_CINSTR_GWT_DUMP \
+	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+
+/**
+ * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
+ *
+ * @va_pages: Number of VA pages to reserve for EXEC_VA
+ */
+struct kbase_ioctl_mem_exec_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_EXEC_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+
+/**
+ * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
+ *                                          cpu/gpu time (counter values)
+ * @in: Input parameters
+ * @in.request_flags: Bit-flags indicating the requested types.
+ * @in.paddings:      Unused, size alignment matching the out.
+ * @out: Output parameters
+ * @out.sec:           Integer field of the monotonic time, unit in seconds.
+ * @out.nsec:          Fractional sec of the monotonic time, in nano-seconds.
+ * @out.padding:       Unused, for __u64 alignment
+ * @out.timestamp:     System wide timestamp (counter) value.
+ * @out.cycle_counter: GPU cycle counter value.
+ */
+union kbase_ioctl_get_cpu_gpu_timeinfo {
+	struct {
+		__u32 request_flags;
+		__u32 paddings[7];
+	} in;
+	struct {
+		__u64 sec;
+		__u32 nsec;
+		__u32 padding;
+		__u64 timestamp;
+		__u64 cycle_counter;
+	} out;
+};
+
+#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
+	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
+
+/**
+ * struct kbase_ioctl_context_priority_check - Check the max possible priority
+ * @priority: Input priority & output priority
+ */
+
+struct kbase_ioctl_context_priority_check {
+	__u8 priority;
+};
+
+#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
+
+/**
+ * struct kbase_ioctl_set_limited_core_count - Set the limited core count.
+ *
+ * @max_core_count: Maximum core count
+ */
+struct kbase_ioctl_set_limited_core_count {
+	__u8 max_core_count;
+};
+
+#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
+	_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
+
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS \
+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+#endif /* MALI_UNIT_TEST */
+
+/* Customer extension range */
+#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
+
+/* If the integration needs extra ioctl add them there
+ * like this:
+ *
+ * struct my_ioctl_args {
+ *  ....
+ * }
+ *
+ * #define KBASE_IOCTL_MY_IOCTL \
+ *         _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
+ */
+
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID			1
+#define KBASE_GPUPROP_VERSION_STATUS			2
+#define KBASE_GPUPROP_MINOR_REVISION			3
+#define KBASE_GPUPROP_MAJOR_REVISION			4
+/* 5 previously used for GPU speed */
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+/* 7 previously used for minimum GPU speed */
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+
+#define KBASE_GPUPROP_MAX_THREADS			18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
+#define KBASE_GPUPROP_MAX_REGISTERS			21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
+#define KBASE_GPUPROP_IMPL_TECH				24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
+#define KBASE_GPUPROP_RAW_L2_PRESENT			27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
+#define KBASE_GPUPROP_RAW_L2_FEATURES			29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
+#define KBASE_GPUPROP_RAW_AS_PRESENT			33
+#define KBASE_GPUPROP_RAW_JS_PRESENT			34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
+#define KBASE_GPUPROP_RAW_GPU_ID			55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES			82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
+#define KBASE_GPUPROP_TLS_ALLOC				84
+#define KBASE_GPUPROP_RAW_GPU_FEATURES			85
+#ifdef __cpluscplus
+}
+#endif
+
+#endif /* _UAPI_KBASE_IOCTL_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h b/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h
new file mode 100644
index 0000000..cb782bd
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm_reader.h
+ * Provides an ioctl API to read kernel atom state changes. The flow of the
+ * API is:
+ *    1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD``
+ *    2. Determine the buffer structure layout via the above ioctl's returned
+ *       size and version fields in ``struct kbase_kinstr_jm_fd_out``
+ *    4. Poll the file descriptor for ``POLLIN``
+ *    5. Get data with read() on the fd
+ *    6. Use the structure version to understand how to read the data from the
+ *       buffer
+ *    7. Repeat 4-6
+ *    8. Close the file descriptor
+ */
+
+#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_
+#define _UAPI_KBASE_KINSTR_JM_READER_H_
+
+/**
+ * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE:    Signifies that an atom has
+ *                                              entered a hardware queue
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_START:    Signifies that work has started
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP:     Signifies that work has stopped
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has
+ *                                              completed on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT:    The number of state enumerations
+ *
+ * We can add new states to the end of this if they do not break the existing
+ * state machine. Old user mode code can gracefully ignore states they do not
+ * understand.
+ *
+ * If we need to make a breaking change to the state machine, we can do that by
+ * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will
+ * mean that old user mode code will fail to understand the new state field in
+ * the structure and gracefully not use the state change API.
+ */
+enum kbase_kinstr_jm_reader_atom_state {
+	KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_START,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_STOP,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
+};
+
+#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/mali_uk.h b/common/include/uapi/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 0000000..81cbb9e
--- /dev/null
+++ b/common/include/uapi/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UAPI_UK_H_
+#define _UAPI_UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * DOC: uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ */
+
+/**
+ * enum uk_client_id - These are identifiers for kernel-side drivers
+ * implementing a UK interface, aka UKK clients.
+ * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client.
+ * @UK_CLIENT_COUNT:          The number of uk clients supported. This must be
+ *                            the last member of the enum
+ *
+ * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	UK_CLIENT_MALI_T600_BASE,
+	UK_CLIENT_COUNT
+};
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _UAPI_UK_H_ */
author	Sidath Senanayake <sidaths@google.com>	2021-06-15 13:39:30 +0100
committer	Sidath Senanayake <sidaths@google.com>	2021-06-15 14:11:16 +0100
commit	fca8613cfcf585bf9113dca96a05daea9fd89794 (patch)
tree	f2baa14910f83edf00450bc30d3703eb255a0bba /common
parent	8037b534570814775d79aeddd06b76e5ee941f59 (diff)
download	gpu-fca8613cfcf585bf9113dca96a05daea9fd89794.tar.gz