/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * */ /** * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here * to allow the hierarchy of header files to work. */ #ifndef _KBASE_DEFS_H_ #define _KBASE_DEFS_H_ #include #include #include #include #include #include #include #include #include #include #include #if MALI_USE_CSF #include #else #include #include #endif #include "debug/mali_kbase_debug_ktrace_defs.h" #include #include #if IS_ENABLED(CONFIG_DEBUG_FS) #include #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_MALI_DEVFREQ #include #endif /* CONFIG_MALI_DEVFREQ */ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include #endif /* CONFIG_MALI_ARBITER_SUPPORT */ #include #include #include #include #include #include #include #include #include #include /** Number of milliseconds before we time out on a GPU soft/hard reset */ #define RESET_TIMEOUT 500 /** * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware. * * You can optimize this down if your target devices will only ever support a * small number of job slots. */ #define BASE_JM_MAX_NR_SLOTS 3 /** * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware. * * You can optimize this down if your target devices will only ever support a * small number of Address Spaces */ #define BASE_MAX_NR_AS 16 /* mmu */ #define MIDGARD_MMU_LEVEL(x) (x) #define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) #define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) /** setting in kbase_context::as_nr that indicates it's invalid */ #define KBASEP_AS_NR_INVALID (-1) /** * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region, * as a logarithm */ #define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */ /** * Priority level for realtime worker threads */ #define KBASE_RT_THREAD_PRIO (2) /* TODO(b/181145264) get the following two numbers from device tree */ /** * First CPU in the contiguous CPU mask used for realtime worker threads. */ #define KBASE_RT_THREAD_CPUMASK_MIN (0) /** * Last CPU in the contiguous CPU mask used for realtime worker threads. */ #define KBASE_RT_THREAD_CPUMASK_MAX (3) /** * Minimum allowed wake duration in usec for apc request. */ #define KBASE_APC_MIN_DUR_USEC (100) /** * Maximum allowed wake duration in usec for apc request. */ #define KBASE_APC_MAX_DUR_USEC (4000) #include "mali_kbase_hwaccess_defs.h" /* Maximum number of pages of memory that require a permanent mapping, per * kbase_context */ #define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT) /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer * clients, to reduce undesired system load. * If a virtualizer client requests a dump within this threshold period after * some other client has performed a dump, a new dump won't be performed and * the accumulated counter values for that client will be returned instead. */ #define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) #if MALI_USE_CSF /* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF * hwcnt backend allocate the ring buffer to communicate with CSF firmware for * HWC dump samples. * To meet the hardware requirement, this number MUST be power of 2, otherwise, * CSF hwcnt backend creation will be failed. */ #define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128) #endif /* Maximum number of clock/regulator pairs that may be referenced by * the device node. * This is dependent on support for of_property_read_u64_array() in the * kernel. */ #define BASE_MAX_NR_CLOCKS_REGULATORS (2) /* Forward declarations */ struct kbase_context; struct kbase_device; struct kbase_as; struct kbase_mmu_setup; struct kbase_kinstr_jm; #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) /** * struct kbase_gpu_metrics - Object containing members that are used to emit * GPU metrics tracepoints for all applications that * created Kbase context(s) for a GPU. * * @active_list: List of applications that did some GPU activity in the recent work period. * @inactive_list: List of applications that didn't do any GPU activity in the recent work period. */ struct kbase_gpu_metrics { struct list_head active_list; struct list_head inactive_list; }; /** * struct kbase_gpu_metrics_ctx - Object created for every application, that created * Kbase context(s), containing members that are used * to emit GPU metrics tracepoints for the application. * * @link: Links the object in kbase_device::gpu_metrics::active_list * or kbase_device::gpu_metrics::inactive_list. * @first_active_start_time: Records the time at which the application first became * active in the current work period. * @last_active_start_time: Records the time at which the application last became * active in the current work period. * @last_active_end_time: Records the time at which the application last became * inactive in the current work period. * @total_active: Tracks the time for which application has been active * in the current work period. * @prev_wp_active_end_time: Records the time at which the application last became * inactive in the previous work period. * @aid: Unique identifier for an application. * @kctx_count: Counter to keep a track of the number of Kbase contexts * created for an application. There may be multiple Kbase * contexts contributing GPU activity data to a single GPU * metrics context. * @active_cnt: Counter that is updated every time the GPU activity starts * and ends in the current work period for an application. * @flags: Flags to track the state of GPU metrics context. */ struct kbase_gpu_metrics_ctx { struct list_head link; u64 first_active_start_time; u64 last_active_start_time; u64 last_active_end_time; u64 total_active; u64 prev_wp_active_end_time; unsigned int aid; unsigned int kctx_count; u8 active_cnt; u8 flags; }; #endif /** * struct kbase_io_access - holds information about 1 register access * * @addr: first bit indicates r/w (r=0, w=1) * @value: value written or read */ struct kbase_io_access { uintptr_t addr; u32 value; }; /** * struct kbase_io_history - keeps track of all recent register accesses * * @enabled: true if register accesses are recorded, false otherwise * @lock: spinlock protecting kbase_io_access array * @count: number of registers read/written * @size: number of elements in kbase_io_access array * @buf: array of kbase_io_access */ struct kbase_io_history { bool enabled; spinlock_t lock; size_t count; u16 size; struct kbase_io_access *buf; }; /** * struct kbase_debug_copy_buffer - information about the buffer to be copied. * * @size: size of the buffer in bytes * @pages: pointer to an array of pointers to the pages which contain * the buffer * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was * allocated with kcalloc * @nr_pages: number of pages * @offset: offset into the pages * @gpu_alloc: pointer to physical memory allocated by the GPU * @extres_pages: array of pointers to the pages containing external resources * for this buffer * @nr_extres_pages: number of pages in @extres_pages */ struct kbase_debug_copy_buffer { size_t size; struct page **pages; bool is_vmalloc; unsigned int nr_pages; size_t offset; struct kbase_mem_phy_alloc *gpu_alloc; struct page **extres_pages; unsigned int nr_extres_pages; }; struct kbase_device_info { u32 features; }; struct kbase_mmu_setup { u64 transtab; u64 memattr; u64 transcfg; }; /** * struct kbase_fault - object containing data relating to a page or bus fault. * @addr: Records the faulting address. * @extra_addr: Records the secondary fault address. * @status: Records the fault status as reported by Hw. * @protected_mode: Flag indicating whether the fault occurred in protected mode * or not. */ struct kbase_fault { u64 addr; u64 extra_addr; u32 status; bool protected_mode; }; /** Maximum number of memory pages that should be allocated for the array * of pointers to free PGDs. * * This number has been pre-calculated to deal with the maximum allocation * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE. * This is supposed to be enough for almost the entirety of MMU operations. * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE * bytes. * * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes. */ #define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) /* Maximum number of pointers to free PGDs */ #define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) /** * struct kbase_mmu_table - object representing a set of GPU page tables * @mmu_lock: Lock to serialize the accesses made to multi level GPU * page tables * @pgd: Physical address of the page allocated for the top * level page table of the context, this is used for * MMU HW programming as the address translation will * start from the top level page table. * @group_id: A memory group ID to be passed to a platform-specific * memory group manager. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @kctx: If this set of MMU tables belongs to a context then * this is a back-reference to the context, otherwise * it is NULL. * @scratch_mem: Scratch memory used for MMU operations, which are * serialized by the @mmu_lock. */ struct kbase_mmu_table { struct rt_mutex mmu_lock; phys_addr_t pgd; u8 group_id; struct kbase_context *kctx; union { /** * @teardown_pages: Scratch memory used for backup copies of whole * PGD pages when tearing down levels upon * termination of the MMU table. */ struct { /** * @levels: Array of PGD pages, large enough to copy one PGD * for each level of the MMU table. */ u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; } teardown_pages; /** * @free_pgds: Scratch memory used for insertion, update and teardown * operations to store a temporary list of PGDs to be freed * at the end of the operation. */ struct { /** @pgds: Array of pointers to PGDs to free. */ struct page *pgds[MAX_FREE_PGDS]; /** @head_index: Index of first free element in the PGDs array. */ size_t head_index; } free_pgds; } scratch_mem; }; #if MALI_USE_CSF #include "csf/mali_kbase_csf_defs.h" #else #include "jm/mali_kbase_jm_defs.h" #endif #include "mali_kbase_hwaccess_time.h" static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) { return (fault == &as->bf_data); } static inline int kbase_as_has_page_fault(struct kbase_as *as, struct kbase_fault *fault) { return (fault == &as->pf_data); } /** * struct kbasep_mem_device - Data stored per device for memory allocation * * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is * allocated/freed. * @ir_threshold: Fraction of the maximum size of an allocation that grows * on GPU page fault that can be used before the driver * switches to incremental rendering, in 1/256ths. * 0 means disabled. */ struct kbasep_mem_device { atomic_t used_pages; atomic_t ir_threshold; }; struct kbase_clk_rate_listener; /** * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback * * @listener: Clock frequency change listener. * @clk_index: Index of the clock for which the change has occurred. * @clk_rate_hz: Clock frequency(Hz). * * A callback to call when clock rate changes. The function must not * sleep. No clock rate manager functions must be called from here, as * its lock is taken. */ typedef void kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, u32 clk_index, u32 clk_rate_hz); /** * struct kbase_clk_rate_listener - Clock frequency listener * * @node: List node. * @notify: Callback to be called when GPU frequency changes. */ struct kbase_clk_rate_listener { struct list_head node; kbase_clk_rate_listener_on_change_t *notify; }; /** * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock * rate trace manager. * * @gpu_idle: Tracks the idle state of GPU. * @clks: Array of pointer to structures storing data for every * enumerated GPU clock. * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace * operations. * @listeners: List of listener attached. * @lock: Lock to serialize the actions of GPU clock rate trace * manager. */ struct kbase_clk_rate_trace_manager { bool gpu_idle; struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; struct list_head listeners; spinlock_t lock; }; /** * struct kbase_pm_device_data - Data stored per device for power management. * @lock: The lock protecting Power Management structures accessed * outside of IRQ. * This lock must also be held whenever the GPU is being * powered on or off. * @active_count: The reference count of active contexts on this device. * Note that some code paths keep shaders/the tiler * powered whilst this is 0. * Use kbase_pm_is_active() instead to check for such cases. * @suspending: Flag set to true when System suspend of GPU device begins and * set to false only when System resume of GPU device starts. * So GPU device could be in suspended state while the flag is set. * The flag is updated with @lock held. * @resuming: Flag set to true when System resume of GPU device starts and is set * to false when resume ends. The flag is set to true at the same time * when @suspending is set to false with @lock held. * The flag is currently used only to prevent Kbase context termination * during System resume of GPU device. * @runtime_active: Flag to track if the GPU is in runtime suspended or active * state. This ensures that runtime_put and runtime_get * functions are called in pairs. For example if runtime_get * has already been called from the power_on callback, then * the call to it from runtime_gpu_active callback can be * skipped. * @gpu_lost: Flag indicating gpu lost * This structure contains data for the power management framework. * There is one instance of this structure per device in the system. * @zero_active_count_wait: Wait queue set when active_count == 0 * @resume_wait: Wait queue to wait for the System suspend/resume of GPU device. * @debug_core_mask: Bit masks identifying the available shader cores that are * specified via sysfs. One mask per job slot. * @debug_core_mask_all: Bit masks identifying the available shader cores that * are specified via sysfs. * @callback_power_runtime_init: Callback for initializing the runtime power * management. Return 0 on success, else error code * @callback_power_runtime_term: Callback for terminating the runtime power * management. * @dvfs_period: Time in milliseconds between each dvfs sample * @backend: KBase PM backend data * @arb_vm_state: The state of the arbiter VM machine * @gpu_users_waiting: Used by virtualization to notify the arbiter that there * are users waiting for the GPU so that it can request * and resume the driver. * @clk_rtm: The state of the GPU clock rate trace manager */ struct kbase_pm_device_data { struct rt_mutex lock; int active_count; bool suspending; bool resuming; #if MALI_USE_CSF bool runtime_active; #endif #ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_t gpu_lost; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ wait_queue_head_t zero_active_count_wait; wait_queue_head_t resume_wait; #if MALI_USE_CSF u64 debug_core_mask; #else /* One mask per job slot. */ u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; u64 debug_core_mask_all; #endif /* MALI_USE_CSF */ int (*callback_power_runtime_init)(struct kbase_device *kbdev); void (*callback_power_runtime_term)(struct kbase_device *kbdev); u32 dvfs_period; struct kbase_pm_backend_data backend; #ifdef CONFIG_MALI_ARBITER_SUPPORT struct kbase_arbiter_vm_state *arb_vm_state; atomic_t gpu_users_waiting; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ struct kbase_clk_rate_trace_manager clk_rtm; }; /** * struct kbase_mem_pool - Page based memory pool for kctx/kbdev * @kbdev: Kbase device where memory is used * @cur_size: Number of free pages currently in the pool (may exceed * @max_size in some corner cases) * @max_size: Maximum number of free pages in the pool * @order: order = 0 refers to a pool of small pages * order != 0 refers to a pool of 2 MB pages, so * order = 9 (when small page size is 4KB, 2^9 * 4KB = 2 MB) * order = 7 (when small page size is 16KB, 2^7 * 16KB = 2 MB) * order = 5 (when small page size is 64KB, 2^5 * 64KB = 2 MB) * @group_id: A memory group ID to be passed to a platform-specific * memory group manager, if present. Immutable. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @pool_lock: Lock protecting the pool - must be held when modifying * @cur_size and @page_list * @page_list: List of free pages in the pool * @reclaim: Shrinker for kernel reclaim of free pages * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation. * This is used to avoid race condition between pool termination * and page isolation for page migration. * @next_pool: Pointer to next pool where pages can be allocated when this * pool is empty. Pages will spill over to the next pool when * this pool is full. Can be NULL if there is no next pool. * @dying: true if the pool is being terminated, and any ongoing * operations should be abandoned * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from * this pool, eg during a grow operation */ struct kbase_mem_pool { struct kbase_device *kbdev; size_t cur_size; size_t max_size; u8 order; u8 group_id; spinlock_t pool_lock; struct list_head page_list; struct shrinker reclaim; atomic_t isolation_in_progress_cnt; struct kbase_mem_pool *next_pool; bool dying; bool dont_reclaim; }; /** * struct kbase_mem_pool_group - a complete set of physical memory pools. * * @small: Array of objects containing the state for pools of small size * physical pages. * @large: Array of objects containing the state for pools of 2 MiB size * physical pages. * * Memory pools are used to allow efficient reallocation of previously-freed * physical pages. A pair of memory pools is initialized for each physical * memory group: one for small pages and one for 2 MiB pages. These arrays * should be indexed by physical memory group ID, the meaning of which is * defined by the systems integrator. */ struct kbase_mem_pool_group { struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS]; }; /** * struct kbase_mem_pool_config - Initial configuration for a physical memory * pool * * @max_size: Maximum number of free pages that the pool can hold. */ struct kbase_mem_pool_config { size_t max_size; }; /** * struct kbase_mem_pool_group_config - Initial configuration for a complete * set of physical memory pools * * @small: Array of initial configuration for pools of small pages. * @large: Array of initial configuration for pools of 2 MiB pages. * * This array should be indexed by physical memory group ID, the meaning * of which is defined by the systems integrator. */ struct kbase_mem_pool_group_config { struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS]; }; /** * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP * frequency, real frequencies and core mask * @real_freqs: Real GPU frequencies. * @opp_volts: OPP voltages. * @opp_freq: Nominal OPP frequency * @core_mask: Shader core mask */ struct kbase_devfreq_opp { u64 opp_freq; u64 core_mask; u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; }; /* MMU mode flags */ #define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ /** * struct kbase_mmu_mode - object containing pointer to methods invoked for * programming the MMU, as per the MMU mode supported * by Hw. * @update: enable & setup/configure one of the GPU address space. * @get_as_setup: retrieve the configuration of one of the GPU address space. * @disable_as: disable one of the GPU address space. * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. * @ate_is_valid: check if the pte is a valid address translation entry * encoding the physical address of the actual mapped page. * @pte_is_valid: check if the pte is a valid entry encoding the physical * address of the next lower level page table. * @entry_set_ate: program the pte to be a valid address translation entry to * encode the physical address of the actual page being mapped. * @entry_set_pte: program the pte to be a valid entry to encode the physical * address of the next lower level page table and also update * the number of valid entries. * @entries_invalidate: clear out or invalidate a range of ptes. * @get_num_valid_entries: returns the number of valid entries for a specific pgd. * @set_num_valid_entries: sets the number of valid entries for a specific pgd * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. */ struct kbase_mmu_mode { void (*update)(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr); void (*get_as_setup)(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup); void (*disable_as)(struct kbase_device *kbdev, int as_nr); phys_addr_t (*pte_to_phy_addr)(u64 entry); int (*ate_is_valid)(u64 ate, int level); int (*pte_is_valid)(u64 pte, int level); void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); void (*entries_invalidate)(u64 *entry, u32 count); unsigned int (*get_num_valid_entries)(u64 *pgd); void (*set_num_valid_entries)(u64 *pgd, unsigned int num_of_valid_entries); unsigned long flags; }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); #define DEVNAME_SIZE 16 /** * enum kbase_devfreq_work_type - The type of work to perform in the devfreq * suspend/resume worker. * @DEVFREQ_WORK_NONE: Initilisation state. * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device(). * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device(). */ enum kbase_devfreq_work_type { DEVFREQ_WORK_NONE, DEVFREQ_WORK_SUSPEND, DEVFREQ_WORK_RESUME }; /** * struct kbase_devfreq_queue_info - Object representing an instance for managing * the queued devfreq suspend/resume works. * @workq: Workqueue for devfreq suspend/resume requests * @work: Work item for devfreq suspend & resume * @req_type: Requested work type to be performed by the devfreq * suspend/resume worker * @acted_type: Work type has been acted on by the worker, i.e. the * internal recorded state of the suspend/resume */ struct kbase_devfreq_queue_info { struct workqueue_struct *workq; struct work_struct work; enum kbase_devfreq_work_type req_type; enum kbase_devfreq_work_type acted_type; }; /** * struct kbase_process - Representing an object of a kbase process instantiated * when the first kbase context is created under it. * @tgid: Thread group ID. * @total_gpu_pages: Total gpu pages allocated across all the contexts * of this process, it accounts for both native allocations * and dma_buf imported allocations. * @dma_buf_pages: Total dma_buf pages allocated across all the contexts * of this process, native allocations can be accounted for * by subtracting this from &total_gpu_pages. * @kctx_list: List of kbase contexts created for the process. * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree * based on key tgid, kprcs_node is the node link to * &struct_kbase_device.process_root. * @dma_buf_root: RB tree of the dma-buf imported allocations, imported * across all the contexts created for this process. * Used to ensure that pages of allocation are accounted * only once for the process, even if the allocation gets * imported multiple times for the process. * @kobj: Links to the per-process sysfs node * &kbase_device.proc_sysfs_node. */ struct kbase_process { pid_t tgid; size_t total_gpu_pages; size_t dma_buf_pages; struct list_head kctx_list; struct rb_node kprcs_node; struct rb_root dma_buf_root; struct kobject kobj; }; /** * struct kbase_mem_migrate - Object representing an instance for managing * page migration. * * @free_pages_list: List of deferred pages to free. Mostly used when page migration * is enabled. Pages in memory pool that require migrating * will be freed instead. However page cannot be freed * right away as Linux will need to release the page lock. * Therefore page will be added to this list and freed later. * @free_pages_lock: This lock should be held when adding or removing pages * from @free_pages_list. * @free_pages_workq: Work queue to process the work items queued to free * pages in @free_pages_list. * @free_pages_work: Work item to free pages in @free_pages_list. * @inode: Pointer to inode whose address space operations are used * for page migration purposes. */ struct kbase_mem_migrate { struct list_head free_pages_list; spinlock_t free_pages_lock; struct workqueue_struct *free_pages_workq; struct work_struct free_pages_work; #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) struct inode *inode; #endif }; /** * struct kbase_device - Object representing an instance of GPU platform device, * allocated from the probe method of mali driver. * @hw_quirks_sc: Configuration to be used for the shader cores as per * the HW issues present in the GPU. * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW * issues present in the GPU. * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW * issues present in the GPU. * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU * subsystems as per the HW issues present in the GPU. * @entry: Links the device instance to the global list of GPU * devices. The list would have as many entries as there * are GPU device instances. * @dev: Pointer to the kernel's generic/base representation * of the GPU platform device. * @mdev: Pointer to the miscellaneous device registered to * provide Userspace access to kernel driver through the * device file /dev/malixx. * @reg_start: Base address of the region in physical address space * where GPU registers have been mapped. * @reg_size: Size of the region containing GPU registers * @reg: Kernel virtual address of the region containing GPU * registers, using which Driver will access the registers. * @regmap: Top level structure for hw_access regmaps, containing * the size of the regmap, pointers to Look-Up Tables (LUT). * @regmap.regs: Pointer to regmap LUT of precomputed iomem pointers from * GPU register offsets and @reg. * @regmap.flags: Pointer to regmap LUT of flags detailing each register * access type and width (32/64-bit). * @regmap.size: Size of the regs and flags LUT. * @irqs: Array containing IRQ resource info for 3 types of * interrupts : Job scheduling, MMU & GPU events (like * power management, cache etc.) * @irqs.irq: irq number * @irqs.flags: irq flags * @nr_irqs: The number of interrupt entries. * @clocks: Pointer to the input clock resources referenced by * the GPU device node. * @nr_clocks: Number of clocks set in the clocks array. * @regulators: Pointer to the structs corresponding to the * regulators referenced by the GPU device node. * @nr_regulators: Number of regulators set in the regulators array. * @opp_table: Pointer to the device OPP structure maintaining the * link to OPPs attached to a device. This is obtained * after setting regulator names for the device. * @token: Integer replacement for opp_table in kernel versions * 6 and greater. Value is a token id number when 0 or greater, * and a linux errno when negative. Must be initialised * to an non-zero value as 0 is valid token id. * @devname: string containing the name used for GPU device instance, * miscellaneous device is registered using the same name. * @id: Unique identifier for the device, indicates the number of * devices which have been created so far. * @model: Pointer, valid only when Driver is compiled to not access * the real GPU Hw, to the dummy model which tries to mimic * to some extent the state & behavior of GPU Hw in response * to the register accesses made by the Driver. * @irq_slab: slab cache for allocating the work items queued when * model mimics raising of IRQ to cause an interrupt on CPU. * @irq_workq: workqueue for processing the irq work items. * @serving_job_irq: function to execute work items queued when model mimics * the raising of JS irq, mimics the interrupt handler * processing JS interrupts. * @serving_gpu_irq: function to execute work items queued when model mimics * the raising of GPU irq, mimics the interrupt handler * processing GPU interrupts. * @serving_mmu_irq: function to execute work items queued when model mimics * the raising of MMU irq, mimics the interrupt handler * processing MMU interrupts. * @reg_op_lock: lock used by model to serialize the handling of register * accesses made by the driver. * @pm: Per device object for storing data for power management * framework. * @fw_load_lock: Mutex to protect firmware loading in @ref kbase_open. * @csf: CSF object for the GPU device. * @js_data: Per device object encapsulating the current context of * Job Scheduler, which is global to the device and is not * tied to any particular struct kbase_context running on * the device * @mem_pools: Global pools of free physical memory pages which can * be used by all the contexts. * @memdev: keeps track of the in use physical pages allocated by * the Driver. * @mmu_mode: Pointer to the object containing methods for programming * the MMU, depending on the type of MMU supported by Hw. * @mgm_dev: Pointer to the memory group manager device attached * to the GPU device. This points to an internal memory * group manager if no platform-specific memory group * manager was retrieved through device tree. * @mmu_unresponsive: Flag to indicate MMU is not responding. * Set if a MMU command isn't completed within the * MMU_AS_INACTIVE_WAIT_TIMEOUT scaled timeout. * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. * @as: Array of objects representing address spaces of GPU. * @as_to_kctx: Array of pointers to struct kbase_context, having * GPU adrress spaces assigned to them. * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are * supported and used where possible. * @gpu_props: Object containing complete information about the * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues * @hw_features_mask: List of available HW features. * @disjoint_event: struct for keeping track of the disjoint information, * that whether the GPU is in a disjoint state and the * number of disjoint events that have occurred on GPU. * @disjoint_event.count: disjoint event count * @disjoint_event.state: disjoint event state * @nr_hw_address_spaces: Number of address spaces actually available in the * GPU, remains constant after driver initialisation. * @nr_user_address_spaces: Number of address spaces available to user contexts * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance * counters. * @hwcnt: Structure used for instrumentation and HW counters * dumping * @hwcnt.lock: The lock should be used when accessing any of the * following members * @hwcnt.kctx: kbase context * @hwcnt.addr: HW counter address * @hwcnt.addr_bytes: HW counter size in bytes * @hwcnt.backend: Kbase instrumentation backend * @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference * pointer by hwcnt_gpu_iface, which wraps this implementation in * order to extend it with periodic dumping functionality. * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. * @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to * perform periodic dumps in order to prevent hardware counter value * overflow or saturation. * @hwcnt_gpu_ctx: Context for GPU hardware counter access. * @hwaccess_lock must be held when calling * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device. * @timeline_flags: Bitmask defining which sets of timeline tracepoints * are enabled. If zero, there is no timeline client and * therefore timeline is disabled. * @timeline: Timeline context created per device. * @ktrace: kbase device's ktrace * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to * complete for the GPU jobs before proceeding with the * GPU reset. * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used * to calculate suitable timeouts for wait operations. * @backend_time: Kbase backend time related attributes. * @cache_clean_in_progress: Set when a cache clean has been started, and * cleared when it has finished. This prevents multiple * cache cleans being done simultaneously. * @cache_clean_queued: Pended cache clean operations invoked while another is * in progress. If this is not 0, another cache clean needs * to be triggered immediately after completion of the * current one. * @cache_clean_wait: Signalled when a cache clean has finished. * @platform_context: Platform specific private data to be accessed by * platform specific config files only. * @kctx_list: List of kbase_contexts created for the device, * including any contexts that might be created for * hardware counters. * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed * to devfreq_add_device() to add devfreq feature to Mali * GPU device. * @devfreq: Pointer to devfreq structure for Mali GPU device, * returned on the call to devfreq_add_device(). * @current_freqs: The real frequencies, corresponding to * @current_nominal_freq, at which the Mali GPU device * is currently operating, as retrieved from * @devfreq_table in the target callback of * @devfreq_profile. * @current_nominal_freq: The nominal frequency currently used for the Mali GPU * device as retrieved through devfreq_recommended_opp() * using the freq value passed as an argument to target * callback of @devfreq_profile * @current_voltages: The voltages corresponding to @current_nominal_freq, * as retrieved from @devfreq_table in the target * callback of @devfreq_profile. * @current_core_mask: bitmask of shader cores that are currently desired & * enabled, corresponding to @current_nominal_freq as * retrieved from @devfreq_table in the target callback * of @devfreq_profile. * @devfreq_table: Pointer to the lookup table for converting between * nominal OPP (operating performance point) frequency, * and real frequency and core mask. This table is * constructed according to operating-points-v2-mali * table in devicetree. * @num_opps: Number of operating performance points available for the Mali * GPU device. * @last_devfreq_metrics: last PM metrics * @devfreq_queue: Per device object for storing data that manages devfreq * suspend & resume request queue and the related items. * @devfreq_cooling: Pointer returned on registering devfreq cooling device * corresponding to @devfreq. * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected * mode. It is a sticky flag which is cleared by IPA * once it has made use of information that GPU had * previously entered protected mode. * @ipa: Top level structure for IPA, containing pointers to both * configured & fallback models. * @ipa.lock: Access to this struct must be with ipa.lock held * @ipa.configured_model: ipa model to use * @ipa.fallback_model: ipa fallback model * @ipa.last_metrics: Values of the PM utilization metrics from last time * the power model was invoked. The utilization is * calculated as the difference between last_metrics * and the current values. * @ipa.force_fallback_model: true if use of fallback model has been forced by * the User * @ipa.last_sample_time: Records the time when counters, used for dynamic * energy estimation, were last sampled. * @previous_frequency: Previous frequency of GPU clock used for * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is * restored when L2 is powered on. * @job_fault_debug: Flag to control the dumping of debug data for job faults, * set when the 'job_fault' debugfs file is opened. * @mali_debugfs_directory: Root directory for the debugfs files created by the driver * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing * a sub-directory for every context. * @debugfs_instr_directory: Instrumentation debugfs directory * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault * has occurred. * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the * occurrence of a job fault. * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait * for the job fault dumping to complete before they can * do bottom half of job done for the atoms which followed * the faulty atom. * @job_fault_resume_workq: workqueue to process the work items queued for the faulty * atoms, whereby the work item function waits for the dumping * to get completed. * @job_fault_event_list: List of atoms, each belonging to a different context, which * generated a job fault. * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs * file "read_register". * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be * read through debugfs file "read_register". * @ctx_num: Total number of contexts created for the device. * @io_history: Pointer to an object keeping a track of all recent * register accesses. The history of register accesses * can be read through "regs_history" debugfs file. * @hwaccess: Contains a pointer to active kbase context and GPU * backend specific data for HW access layer. * @faults_pending: Count of page/bus faults waiting for bottom half processing * via workqueues. * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction * and cleared after the transaction completes. PM L2 state is * prevented from entering powering up/down transitions when the * flag is set, @hwaccess_lock is used to serialize the access. * @poweroff_pending: Set when power off operation for GPU is started, reset when * power on for GPU is started. * @infinite_cache_active_default: Set to enable using infinite cache for all the * allocations of a new context. * @mem_pool_defaults: Default configuration for the group of memory pools * created for a new context. * @current_gpu_coherency_mode: coherency mode in use, which can be different * from @system_coherency, when using protected mode. * @system_coherency: coherency mode as retrieved from the device tree. * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. * @snoop_enable_smc: SMC function ID to call into Trusted firmware to * enable cache snooping. Value of 0 indicates that it * is not used. * @snoop_disable_smc: SMC function ID to call disable cache snooping. * @protected_ops: Pointer to the methods for switching in or out of the * protected mode, as per the @protected_dev being used. * @protected_dev: Pointer to the protected mode switcher device attached * to the GPU device retrieved through device tree if * GPU do not support protected mode switching natively. * @protected_mode: set to TRUE when GPU is put into protected mode * @protected_mode_transition: set to TRUE when GPU is transitioning into or * out of protected mode. * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be * enabled. Counters must be disabled before transition * into protected mode. * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not * enabled. * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware * counters, used if atomic disable is not possible. * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of * IRQ + bottom half is being done, to prevent the writes * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. * @inited_subsys: Bitmap of inited sub systems at the time of device probe. * Used during device remove or for handling error in probe. * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize * the updates made to Job dispatcher + scheduler states. * @mmu_hw_mutex: Protects access to MMU operations and address space * related state. * @serialize_jobs: Currently used mode for serialization of jobs, both * intra & inter slots serialization is supported. * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken * when GWT is enabled. Used to restore the original value * on disabling of GWT. * @js_ctx_scheduling_mode: Context scheduling mode currently being used by * Job Scheduler * @l2_size_override: Used to set L2 cache size via device tree blob * @l2_hash_override: Used to set L2 cache hash via device tree blob * @l2_hash_values_override: true if @l2_hash_values is valid. * @l2_hash_values: Used to set L2 cache slice hash via device tree blob * @sysc_alloc: Array containing values to be programmed into * SYSC_ALLOC[0..7] GPU registers on L2 cache * power down. These come from either DTB or * via DebugFS (if it is available in kernel). * @process_root: rb_tree root node for maintaining a rb_tree of * kbase_process based on key tgid(thread group ID). * @dma_buf_root: rb_tree root node for maintaining a rb_tree of * &struct kbase_dma_buf based on key dma_buf. * We maintain a rb_tree of dma_buf mappings under * kbase_device and kbase_process, one indicates a * mapping and gpu memory usage at device level and * other one at process level. * @total_gpu_pages: Total GPU pages used for the complete GPU device. * @dma_buf_pages: Total dma_buf pages used for GPU platform device. * @dma_buf_lock: This mutex should be held while accounting for * @total_gpu_pages from imported dma buffers. * @gpu_mem_usage_lock: This spinlock should be held while accounting * @total_gpu_pages for both native and dma-buf imported * allocations. * @job_done_worker: Worker for job_done work. * @event_worker: Worker for event work. * @apc.worker: Worker for async power control work. * @apc.power_on_work: Work struct for powering on the GPU. * @apc.power_off_work: Work struct for powering off the GPU. * @apc.end_ts: The latest end timestamp to power off the GPU. * @apc.timer: A hrtimer for powering off based on wake duration. * @apc.pending: Whether an APC power on request is active and not handled yet. * @apc.lock: Lock for @apc.end_ts, @apc.timer and @apc.pending. * @dummy_job_wa: struct for dummy job execution workaround for the * GPU hang issue * @dummy_job_wa.kctx: dummy job workaround context * @dummy_job_wa.jc: dummy job workaround job * @dummy_job_wa.slot: dummy job workaround slot * @dummy_job_wa.flags: dummy job workaround flags * @dummy_job_wa_loaded: Flag for indicating that the workaround blob has * been loaded. Protected by @fw_load_lock. * @arb: Pointer to the arbiter device * @pcm_dev: The priority control manager device. * @oom_notifier_block: notifier_block containing kernel-registered out-of- * memory handler. * @proc_sysfs_node: Sysfs directory node to store per-process stats. * @mem_migrate: Per device object for managing page migration. * @live_fence_metadata: Count of live fence metadata structures created by * KCPU queue. These structures may outlive kbase module * itself. Therefore, in such a case, a warning should be * be produced. * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking * is enabled. * @pcm_prioritized_process_nb: Notifier block for the Priority Control Manager * driver, this is used to be informed of the * changes in the list of prioritized processes. */ struct kbase_device { u32 hw_quirks_sc; u32 hw_quirks_tiler; u32 hw_quirks_mmu; u32 hw_quirks_gpu; struct list_head entry; struct device *dev; struct miscdevice mdev; u64 reg_start; size_t reg_size; void __iomem *reg; struct { void __iomem **regs; u32 *flags; size_t size; } regmap; struct { u32 irq; u32 flags; } irqs[3]; u32 nr_irqs; struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_clocks; #if IS_ENABLED(CONFIG_REGULATOR) struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_regulators; #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) int token; #elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) struct opp_table *opp_table; #endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ #endif /* CONFIG_REGULATOR */ char devname[DEVNAME_SIZE]; u32 id; #if !IS_ENABLED(CONFIG_MALI_REAL_HW) void *model; struct kmem_cache *irq_slab; struct workqueue_struct *irq_workq; atomic_t serving_job_irq; atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ struct kbase_pm_device_data pm; struct kbase_mem_pool_group mem_pools; struct kbasep_mem_device memdev; struct kbase_mmu_mode const *mmu_mode; struct memory_group_manager_device *mgm_dev; bool mmu_unresponsive; struct kbase_as as[BASE_MAX_NR_AS]; struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; u16 as_free; spinlock_t mmu_mask_change; bool pagesize_2mb; struct kbase_gpu_props gpu_props; unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; struct { atomic_t count; atomic_t state; } disjoint_event; s8 nr_hw_address_spaces; s8 nr_user_address_spaces; /** * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to * restore to L2_CONFIG upon GPU reset. */ u8 pbha_propagate_bits; #if MALI_USE_CSF struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; #else struct kbase_hwcnt { spinlock_t lock; struct kbase_context *kctx; u64 addr; u64 addr_bytes; struct kbase_instr_backend backend; } hwcnt; struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend; #endif struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer; struct kbase_hwcnt_context *hwcnt_gpu_ctx; struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx; atomic_t timeline_flags; struct kbase_timeline *timeline; #if KBASE_KTRACE_TARGET_RBUF struct kbase_ktrace ktrace; #endif u32 reset_timeout_ms; u64 lowest_gpu_freq_khz; struct kbase_backend_time backend_time; bool cache_clean_in_progress; u32 cache_clean_queued; wait_queue_head_t cache_clean_wait; void *platform_context; struct list_head kctx_list; struct mutex kctx_list_lock; #ifdef CONFIG_MALI_DEVFREQ struct devfreq_dev_profile devfreq_profile; struct devfreq *devfreq; unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned long current_nominal_freq; unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; u64 current_core_mask; struct kbase_devfreq_opp *devfreq_table; unsigned int num_opps; struct kbasep_pm_metrics last_devfreq_metrics; struct kbase_devfreq_queue_info devfreq_queue; #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) struct thermal_cooling_device *devfreq_cooling; bool ipa_protection_mode_switched; struct { /* Access to this struct must be with ipa.lock held */ struct mutex lock; struct kbase_ipa_model *configured_model; struct kbase_ipa_model *fallback_model; /* Values of the PM utilization metrics from last time the * power model was invoked. The utilization is calculated as * the difference between last_metrics and the current values. */ struct kbasep_pm_metrics last_metrics; /* true if use of fallback model has been forced by the User */ bool force_fallback_model; /* Records the time when counters, used for dynamic energy * estimation, were last sampled. */ ktime_t last_sample_time; } ipa; #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_DEVFREQ */ unsigned long previous_frequency; #if !MALI_USE_CSF atomic_t job_fault_debug; #endif /* !MALI_USE_CSF */ #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *mali_debugfs_directory; struct dentry *debugfs_ctx_directory; struct dentry *debugfs_instr_directory; #ifdef CONFIG_MALI_DEBUG u64 debugfs_as_read_bitmap; #endif /* CONFIG_MALI_DEBUG */ #if !MALI_USE_CSF wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; spinlock_t job_fault_event_lock; #endif /* !MALI_USE_CSF */ #if !MALI_CUSTOMER_RELEASE struct { u32 reg_offset; } regs_dump_debugfs_data; #endif /* !MALI_CUSTOMER_RELEASE */ #endif /* CONFIG_DEBUG_FS */ atomic_t ctx_num; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_io_history io_history; #endif /* CONFIG_DEBUG_FS */ struct kbase_hwaccess_data hwaccess; atomic_t faults_pending; bool mmu_page_migrate_in_progress; bool poweroff_pending; bool infinite_cache_active_default; struct kbase_mem_pool_group_config mem_pool_defaults; u32 current_gpu_coherency_mode; u32 system_coherency; bool cci_snoop_enabled; u32 snoop_enable_smc; u32 snoop_disable_smc; const struct protected_mode_ops *protected_ops; struct protected_mode_device *protected_dev; bool protected_mode; bool protected_mode_transition; bool protected_mode_hwcnt_desired; bool protected_mode_hwcnt_disabled; struct work_struct protected_mode_hwcnt_disable_work; bool irq_reset_flush; u32 inited_subsys; spinlock_t hwaccess_lock; struct mutex mmu_hw_mutex; u8 l2_size_override; u8 l2_hash_override; bool l2_hash_values_override; u32 l2_hash_values[GPU_L2_SLICE_HASH_COUNT]; u32 sysc_alloc[GPU_SYSC_ALLOC_COUNT]; struct mutex fw_load_lock; #if MALI_USE_CSF /* CSF object for the GPU device. */ struct kbase_csf_device csf; #else struct kbasep_js_device_data js_data; struct kthread_worker job_done_worker; struct kthread_worker event_worker; /* See KBASE_JS_*_PRIORITY_MODE for details. */ u32 js_ctx_scheduling_mode; /* See KBASE_SERIALIZE_* for details */ u8 serialize_jobs; #ifdef CONFIG_MALI_CINSTR_GWT u8 backup_serialize_jobs; #endif /* CONFIG_MALI_CINSTR_GWT */ #endif /* MALI_USE_CSF */ struct { struct kthread_worker worker; struct kthread_work power_on_work; struct kthread_work power_off_work; ktime_t end_ts; struct hrtimer timer; bool pending; struct mutex lock; } apc; struct rb_root process_root; struct rb_root dma_buf_root; size_t total_gpu_pages; size_t dma_buf_pages; struct mutex dma_buf_lock; spinlock_t gpu_mem_usage_lock; struct { struct kbase_context *kctx; u64 jc; u32 slot; u64 flags; } dummy_job_wa; bool dummy_job_wa_loaded; #ifdef CONFIG_MALI_ARBITER_SUPPORT struct kbase_arbiter_device arb; #endif /* Priority Control Manager device */ struct priority_control_manager_device *pcm_dev; struct notifier_block oom_notifier_block; struct kobject *proc_sysfs_node; struct kbase_mem_migrate mem_migrate; #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) atomic_t live_fence_metadata; #endif struct kmem_cache *va_region_slab; #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) /** * @gpu_metrics: GPU device wide structure used for emitting GPU metrics tracepoints. */ struct kbase_gpu_metrics gpu_metrics; #endif #if MALI_USE_CSF atomic_t fence_signal_timeout_enabled; #endif struct notifier_block pcm_prioritized_process_nb; }; /** * enum kbase_file_state - Initialization state of a file opened by @kbase_open * * @KBASE_FILE_NEED_VSN: Initial state, awaiting API version. * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in * progress and other setup calls shall be * rejected. * @KBASE_FILE_NEED_CTX: Indicates if the API version handshake has * completed, awaiting context creation flags. * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress * and other setup calls shall be rejected. * @KBASE_FILE_COMPLETE: Indicates if the setup for context has * completed, i.e. flags have been set for the * context. * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or * is complete. This state can only be reached after * @KBASE_FILE_COMPLETE. * * The driver allows only limited interaction with user-space until setup * is complete. */ enum kbase_file_state { KBASE_FILE_NEED_VSN, KBASE_FILE_VSN_IN_PROGRESS, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS, KBASE_FILE_COMPLETE, KBASE_FILE_DESTROY_CTX }; /** * struct kbase_file - Object representing a file opened by @kbase_open * * @kbdev: Object representing an instance of GPU platform device, * allocated from the probe method of the Mali driver. * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. * @owner: Pointer to the file table structure of a process that * created the instance of /dev/malixx device file. Set to * NULL when that process closes the file instance. No more * file operations would be allowed once set to NULL. * It would be updated only in the Userspace context, i.e. * when @kbase_open or @kbase_flush is called. * @kctx: Object representing an entity, among which GPU is * scheduled and which gets its own GPU address space. * Invalid until @setup_state is KBASE_FILE_COMPLETE. * @api_version: Contains the version number for User/kernel interface, * used for compatibility check. Invalid until * @setup_state is KBASE_FILE_NEED_CTX. * @setup_state: Initialization state of the file. Values come from * the kbase_file_state enumeration. * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when * @fops_count and @map_count becomes zero after /dev/malixx * file was previously closed by the @owner. * @lock: Lock to serialize the access to members like @owner, @fops_count, * @map_count. * @fops_count: Counter that is incremented at the beginning of a method * defined for @kbase_fops and is decremented at the end. * So the counter keeps a track of the file operations in progress * for /dev/malixx file, that are being handled by the Kbase. * The counter is needed to defer the context termination as * Userspace can close the /dev/malixx file and flush() method * can get called when some other file operation is in progress. * @map_count: Counter to keep a track of the memory mappings present on * /dev/malixx file instance. The counter is needed to defer the * context termination as Userspace can close the /dev/malixx * file and flush() method can get called when mappings are still * present. * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0. * Currently needed only for the "mem_view" debugfs file. * @event_queue: Wait queue used for blocking the thread, which consumes * the base_jd_event corresponding to an atom, when there * are no more posted events. */ struct kbase_file { struct kbase_device *kbdev; struct file *filp; fl_owner_t owner; struct kbase_context *kctx; unsigned long api_version; atomic_t setup_state; struct work_struct destroy_kctx_work; spinlock_t lock; int fops_count; int map_count; #if IS_ENABLED(CONFIG_DEBUG_FS) wait_queue_head_t zero_fops_count_wait; #endif wait_queue_head_t event_queue; }; #if MALI_JIT_PRESSURE_LIMIT_BASE /** * enum kbase_context_flags - Flags for kbase contexts * * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit * process on a 64-bit kernel. * * @KCTX_RUNNABLE_REF: Set when context is counted in * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. * * @KCTX_ACTIVE: Set when the context is active. * * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this * context. * * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been * initialized. * * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new * allocations. Existing allocations will not change. * * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. * * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept * scheduled in. * * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. * This is only ever updated whilst the jsctx_mutex is held. * * @KCTX_DYING: Set when the context process is in the process of being evicted. * * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory * allocations. For 64-bit clients it is enabled by default, and disabled by * default on 32-bit clients. Being able to clear this flag is only used for * testing purposes of the custom zone allocation on 64-bit user-space builds, * where we also require more control than is available through e.g. the JIT * allocation mechanism. However, the 64-bit user-space client must still * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT * * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled * from it for job slot 0. This is reset when the context first goes active or * is re-activated on that slot. * * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled * from it for job slot 1. This is reset when the context first goes active or * is re-activated on that slot. * * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled * from it for job slot 2. This is reset when the context first goes active or * is re-activated on that slot. * * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for * the context due to unhandled page(or bus) fault. It is cleared when the * refcount for the context drops to 0 or on when the address spaces are * re-enabled on GPU reset or power cycle. * * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual * address page limit, so we must take care to not exceed the physical limit * * @KCTX_PAGE_FAULT_REPORT_SKIP: Set when the GPU page fault handler is not * allowed to allocate a physical page due to the process exit or context * termination. It is used to suppress the error messages that ensue because * the page fault didn't get handled. * * All members need to be separate bits. This enum is intended for use in a * bitmask where multiple values get OR-ed together. */ enum kbase_context_flags { KCTX_COMPAT = 1U << 0, KCTX_RUNNABLE_REF = 1U << 1, KCTX_ACTIVE = 1U << 2, KCTX_PULLED = 1U << 3, KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, KCTX_INFINITE_CACHE = 1U << 5, KCTX_SUBMIT_DISABLED = 1U << 6, KCTX_PRIVILEGED = 1U << 7, KCTX_SCHEDULED = 1U << 8, KCTX_DYING = 1U << 9, KCTX_FORCE_SAME_VA = 1U << 11, KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, KCTX_AS_DISABLED_ON_FAULT = 1U << 15, KCTX_JPL_ENABLED = 1U << 16, KCTX_PAGE_FAULT_REPORT_SKIP = 1U << 17, }; #else /** * enum kbase_context_flags - Flags for kbase contexts * * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit * process on a 64-bit kernel. * * @KCTX_RUNNABLE_REF: Set when context is counted in * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. * * @KCTX_ACTIVE: Set when the context is active. * * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this * context. * * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been * initialized. * * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new * allocations. Existing allocations will not change. * * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. * * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept * scheduled in. * * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. * This is only ever updated whilst the jsctx_mutex is held. * * @KCTX_DYING: Set when the context process is in the process of being evicted. * * * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory * allocations. For 64-bit clients it is enabled by default, and disabled by * default on 32-bit clients. Being able to clear this flag is only used for * testing purposes of the custom zone allocation on 64-bit user-space builds, * where we also require more control than is available through e.g. the JIT * allocation mechanism. However, the 64-bit user-space client must still * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT * * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled * from it for job slot 0. This is reset when the context first goes active or * is re-activated on that slot. * * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled * from it for job slot 1. This is reset when the context first goes active or * is re-activated on that slot. * * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled * from it for job slot 2. This is reset when the context first goes active or * is re-activated on that slot. * * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for * the context due to unhandled page(or bus) fault. It is cleared when the * refcount for the context drops to 0 or on when the address spaces are * re-enabled on GPU reset or power cycle. * * @KCTX_PAGE_FAULT_REPORT_SKIP: Set when the GPU page fault handler is not * allowed to allocate a physical page due to the process exit or context * termination. It is used to suppress the error messages that ensue because * the page fault didn't get handled. * * All members need to be separate bits. This enum is intended for use in a * bitmask where multiple values get OR-ed together. */ enum kbase_context_flags { KCTX_COMPAT = 1U << 0, KCTX_RUNNABLE_REF = 1U << 1, KCTX_ACTIVE = 1U << 2, KCTX_PULLED = 1U << 3, KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, KCTX_INFINITE_CACHE = 1U << 5, KCTX_SUBMIT_DISABLED = 1U << 6, KCTX_PRIVILEGED = 1U << 7, KCTX_SCHEDULED = 1U << 8, KCTX_DYING = 1U << 9, KCTX_FORCE_SAME_VA = 1U << 11, KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, KCTX_AS_DISABLED_ON_FAULT = 1U << 15, KCTX_PAGE_FAULT_REPORT_SKIP = 1U << 16, }; #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct kbase_sub_alloc { struct list_head link; struct page *page; DECLARE_BITMAP(sub_pages, NUM_PAGES_IN_2MB_LARGE_PAGE); }; /** * struct kbase_context - Kernel base context * * @kfile: Pointer to the object representing the /dev/malixx device * file instance. * @kbdev: Pointer to the Kbase device for which the context is created. * @kctx_list_link: Node into Kbase device list of contexts. * @mmu: Structure holding details of the MMU tables for this * context * @id: Unique identifier for the context, indicates the number of * contexts which have been created for the device so far. * @api_version: contains the version number for User/kernel interface, * used for compatibility check. * @event_list: list of posted events about completed atoms, to be sent to * event handling thread of Userpsace. * @event_coalesce_list: list containing events corresponding to successive atoms * which have requested deferred delivery of the completion * events to Userspace. * @event_mutex: Lock to protect the concurrent access to @event_list & * @event_mutex. * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver * should stop posting events and also inform event handling * thread that context termination is in progress. * @event_count: Count of the posted events to be consumed by Userspace. * @event_coalesce_count: Count of the events present in @event_coalesce_list. * @flags: bitmap of enums from kbase_context_flags, indicating the * state & attributes for the context. * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, * which can alias number of memory regions. The page is * represent a region where it is mapped with a write-alloc * cache setup, typically used when the write result of the * GPU isn't needed, but the GPU must write anyway. * @mem_partials_lock: Lock for protecting the operations done on the elements * added to @mem_partials list. * @mem_partials: List head for the list of large pages, 2MB in size, which * have been split into small pages and are used partially * for the allocations >= 2 MB in size. * @reg_lock: Lock used for GPU virtual address space management operations, * like adding/freeing a memory region in the address space. * @num_fixable_allocs: A count for the number of memory allocations with the * BASE_MEM_FIXABLE property. * @num_fixed_allocs: A count for the number of memory allocations with the * BASE_MEM_FIXED property. * @reg_zone: Zone information for the reg_rbtree_<...> members. * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for * SAME_VA allocations to defer the reservation of memory region * (from the GPU virtual address space) from base_mem_alloc * ioctl to mmap system call. This helps returning unique * handles, disguised as GPU VA, to Userspace from base_mem_alloc * and later retrieving the pointer to memory region structure * in the mmap handler. * @pending_regions: Array containing pointers to memory region structures, * used in conjunction with @cookies bitmask mainly for * providing a mechansim to have the same value for CPU & * GPU virtual address. * @tgid: Thread group ID of the process whose thread created * the context (by calling KBASE_IOCTL_VERSION_CHECK or * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). * This is usually, but not necessarily, the same as the * process whose thread opened the device file * /dev/malixx instance. * @pid: ID of the thread, corresponding to process @tgid, * which actually created the context. This is usually, * but not necessarily, the same as the thread which * opened the device file /dev/malixx instance. * @prioritized: Indicate whether work items originating from this * context should be treated with a higher priority * level relative to work items with the same priority * from other contexts. This value could change multiple * times over the life time of the context, such as when * an application becomes foreground or goes to the * background. * @csf: kbase csf context * @jctx: object encapsulating all the Job dispatcher related state, * including the array of atoms. * @used_pages: Keeps a track of the number of small physical pages in use * for the context. * @nonmapped_pages: Updated in the same way as @used_pages, except for the case * when special tracking page is freed by userspace where it * is reset to 0. * @permanent_mapped_pages: Usage count of permanently mapped memory * @mem_pools: Context-specific pools of free physical memory pages. * @reclaim: Shrinker object registered with the kernel containing * the pointer to callback function which is invoked under * low memory conditions. In the callback function Driver * frees up the memory for allocations marked as * evictable/reclaimable. * @evict_list: List head for the list containing the allocations which * can be evicted or freed up in the shrinker callback. * @evict_nents: Total number of pages allocated by the allocations within * @evict_list (atomic). * @waiting_soft_jobs: List head for the list containing softjob atoms, which * are either waiting for the event set operation, or waiting * for the signaling of input fence or waiting for the GPU * device to powered on so as to dump the CPU/GPU timestamps. * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent * accesses. * @dma_fence: Object containing list head for the list of dma-buf fence * waiting atoms and the waitqueue to process the work item * queued for the atoms blocked on the signaling of dma-buf * fences. * @dma_fence.waiting_resource: list head for the list of dma-buf fence * @dma_fence.wq: waitqueue to process the work item queued * @as_nr: id of the address space being used for the scheduled in * context. This is effectively part of the Run Pool, because * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst * the context is scheduled in. The hwaccess_lock must be held * whilst accessing this. * If the context relating to this value of as_nr is required, * then the context must be retained to ensure that it doesn't * disappear whilst it is being used. Alternatively, hwaccess_lock * can be held to ensure the context doesn't disappear (but this * has restrictions on what other locks can be taken simutaneously). * @refcount: Keeps track of the number of users of this context. A user * can be a job that is available for execution, instrumentation * needing to 'pin' a context for counter collection, etc. * If the refcount reaches 0 then this context is considered * inactive and the previously programmed AS might be cleared * at any point. * Generally the reference count is incremented when the context * is scheduled in and an atom is pulled from the context's per * slot runnable tree in JM GPU or GPU command queue * group is programmed on CSG slot in CSF GPU. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, * to the memory consumed by the process. A reference is taken * on this descriptor for the Userspace created contexts so that * Kbase can safely access it to update the memory usage counters. * The reference is dropped on context termination. * @gpu_va_end: End address of the GPU va space (in small page units) * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all * tiler heaps of the kbase context. * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the * kbase context. * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the * kbase context. * @jit_va: Indicates if a JIT_VA zone has been created. * @mem_profile_data: Buffer containing the profiling information provided by * Userspace, can be read through the mem_profile debugfs file. * @mem_profile_size: Size of the @mem_profile_data. * @mem_profile_lock: Lock to serialize the operations related to mem_profile * debugfs file. * @kctx_dentry: Pointer to the debugfs directory created for every context, * inside kbase_device::debugfs_ctx_directory, containing * context specific files. * @reg_dump: Buffer containing a register offset & value pair, used * for dumping job fault debug info. * @job_fault_count: Indicates that a job fault occurred for the context and * dumping of its debug info is in progress. * @job_fault_resume_event_list: List containing atoms completed after the faulty * atom but before the debug data for faulty atom was dumped. * @mem_view_column_width: Controls the number of bytes shown in every column of the * output of "mem_view" debugfs file. * @job_fault_work: Tracking the latest fault dump work item for assisting the * operation of the job-fault-dump debug process. * @jsctx_queue: Per slot & priority arrays of object containing the root * of RB-tree holding currently runnable atoms on the job slot * and the head item of the linked list of atoms blocked on * cross-slot dependencies. * @slot_tracking: Tracking and control of this context's use of all job * slots * @atoms_pulled_all_slots: Total number of atoms currently pulled from the * context, across all slots. * @slots_pullable: Bitmask of slots, indicating the slots for which the * context has pullable atoms in the runnable tree. * @work: Work structure used for deferred ASID assignment. * @completed_jobs: List containing completed atoms for which base_jd_event is * to be posted. * @work_count: Number of work items, corresponding to atoms, currently * pending on job_done kthread of @jctx. * @soft_job_timeout: Timer object used for failing/cancelling the waiting * soft-jobs which have been blocked for more than the * timeout value used for the soft-jobs * @jit_alloc: Array of 256 pointers to GPU memory regions, used for * just-in-time memory allocations. * @jit_max_allocations: Maximum allowed number of in-flight * just-in-time memory allocations. * @jit_current_allocations: Current number of in-flight just-in-time * memory allocations. * @jit_current_allocations_per_bin: Current number of in-flight just-in-time * memory allocations per bin. * @jit_group_id: A memory group ID to be passed to a platform-specific * memory group manager. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @jit_phys_pages_limit: Limit of physical pages to apply across all * just-in-time memory allocations, applied to * @jit_current_phys_pressure. * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is * the sum of the worst case estimate of pages that * could be used (i.e. the * &struct_kbase_va_region.nr_pages for all in-use * just-in-time memory regions that have not yet had * a usage report) and the actual number of pages * that were used (i.e. the * &struct_kbase_va_region.used_pages for regions * that have had a usage report). * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being * now allocated for just-in-time memory * allocations of a context (across all the * threads). This is supposed to be updated * with @reg_lock held before allocating * the backing pages. This helps ensure that * total physical memory usage for just in * time memory allocation remains within the * @jit_phys_pages_limit in multi-threaded * scenarios. * @jit_active_head: List containing the just-in-time memory allocations * which are in use. * @jit_pool_head: List containing the just-in-time memory allocations * which have been freed up by userspace and so not being * used by them. * Driver caches them to quickly fulfill requests for new * JIT allocations. They are released in case of memory * pressure as they are put on the @evict_list when they * are freed up by userspace. * @jit_destroy_head: List containing the just-in-time memory allocations * which were moved to it from @jit_pool_head, in the * shrinker callback, after freeing their backing * physical pages. * @jit_evict_lock: Lock used for operations done on just-in-time memory * allocations and also for accessing @evict_list. * @jit_work: Work item queued to defer the freeing of a memory * region when a just-in-time memory allocation is moved * to @jit_destroy_head. * @ext_res_meta_head: A list of sticky external resources which were requested to * be mapped on GPU side, through a softjob atom of type * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. * @age_count: Counter incremented on every call to jd_submit_atom, * atom is assigned the snapshot of this counter, which * is used to determine the atom's age when it is added to * the runnable RB-tree. * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) * @kprcs: Reference to @struct kbase_process that the current * kbase_context belongs to. * @kprcs_link: List link for the list of kbase context maintained * under kbase_process. * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by * kbase_context.reg_lock. * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. * @gwt_current_list: A list of addresses for which GPU has generated write faults, * after the last snapshot of it was sent to userspace. * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. * @priority: Indicates the context priority. Used along with @atoms_count * for context scheduling, protected by hwaccess_lock. * @atoms_count: Number of GPU atoms currently in use, per priority * @create_flags: Flags used in context creation. * @kinstr_jm: Kernel job manager instrumentation context handle * @tl_kctx_list_node: List item into the device timeline's list of * contexts, for timeline summarization. * @limited_core_mask: The mask that is applied to the affinity in case of atoms * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * @platform_data: Pointer to platform specific per-context data. * @task: Pointer to the task structure of the main thread of the process * that created the Kbase context. It would be set only for the * contexts created by the Userspace and not for the contexts * created internally by the Kbase. * @comm: Record the process name * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. * Up to one context can be created for each client that opens the device file * /dev/malixx. Context creation is deferred until a special ioctl() system call * is made on the device file. */ struct kbase_context { struct kbase_file *kfile; struct kbase_device *kbdev; struct list_head kctx_list_link; struct kbase_mmu_table mmu; u32 id; unsigned long api_version; struct list_head event_list; struct list_head event_coalesce_list; struct mutex event_mutex; #if !MALI_USE_CSF atomic_t event_closed; #endif struct workqueue_struct *event_workq; atomic_t event_count; int event_coalesce_count; atomic_t flags; struct tagged_addr aliasing_sink_page; spinlock_t mem_partials_lock; struct list_head mem_partials; struct mutex reg_lock; #if MALI_USE_CSF atomic64_t num_fixable_allocs; atomic64_t num_fixed_allocs; #endif struct kbase_reg_zone reg_zone[CONTEXT_ZONE_MAX]; #if MALI_USE_CSF struct kbase_csf_context csf; #else struct kbase_jd_context jctx; struct jsctx_queue jsctx_queue[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS]; atomic_t atoms_pulled_all_slots; struct list_head completed_jobs; atomic_t work_count; struct timer_list soft_job_timeout; int priority; s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; u32 slots_pullable; u32 age_count; #endif /* MALI_USE_CSF */ DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; pid_t tgid; pid_t pid; atomic_t prioritized; atomic_t used_pages; atomic_t nonmapped_pages; atomic_t permanent_mapped_pages; struct kbase_mem_pool_group mem_pools; struct shrinker reclaim; struct list_head evict_list; atomic_t evict_nents; struct list_head waiting_soft_jobs; spinlock_t waiting_soft_jobs_lock; int as_nr; atomic_t refcount; struct mm_struct *process_mm; u64 gpu_va_end; #if MALI_USE_CSF u32 running_total_tiler_heap_nr_chunks; u64 running_total_tiler_heap_memory; u64 peak_total_tiler_heap_memory; #endif bool jit_va; #if IS_ENABLED(CONFIG_DEBUG_FS) char *mem_profile_data; size_t mem_profile_size; struct mutex mem_profile_lock; struct dentry *kctx_dentry; unsigned int *reg_dump; atomic_t job_fault_count; struct list_head job_fault_resume_event_list; unsigned int mem_view_column_width; struct work_struct *job_fault_work; #endif /* CONFIG_DEBUG_FS */ struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; u8 jit_max_allocations; u8 jit_current_allocations; u8 jit_current_allocations_per_bin[256]; u8 jit_group_id; #if MALI_JIT_PRESSURE_LIMIT_BASE u64 jit_phys_pages_limit; u64 jit_current_phys_pressure; u64 jit_phys_pages_to_be_allocated; #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; struct mutex jit_evict_lock; struct work_struct jit_work; struct list_head ext_res_meta_head; u8 trim_level; struct kbase_process *kprcs; struct list_head kprcs_link; #ifdef CONFIG_MALI_CINSTR_GWT bool gwt_enabled; bool gwt_was_enabled; struct list_head gwt_current_list; struct list_head gwt_snapshot_list; #endif base_context_create_flags create_flags; #if !MALI_USE_CSF struct kbase_kinstr_jm *kinstr_jm; #endif struct list_head tl_kctx_list_node; u64 limited_core_mask; void *platform_data; struct task_struct *task; #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) /** * @gpu_metrics_ctx: Pointer to the GPU metrics context corresponding to the * application that created the Kbase context. */ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; #endif char comm[TASK_COMM_LEN]; }; #ifdef CONFIG_MALI_CINSTR_GWT /** * struct kbasep_gwt_list_element - Structure used to collect GPU * write faults. * @link: List head for adding write faults. * @region: Details of the region where we have the * faulting page address. * @page_addr: Page address where GPU write fault occurred. * @num_pages: The number of pages modified. * * Using this structure all GPU write faults are stored in a list. */ struct kbasep_gwt_list_element { struct list_head link; struct kbase_va_region *region; u64 page_addr; u64 num_pages; }; #endif /** * struct kbase_ctx_ext_res_meta - Structure which binds an external resource * to a @kbase_context. * @ext_res_node: List head for adding the metadata to a * @kbase_context. * @reg: External resource information, containing * the corresponding VA region * @ref: Reference count. * * External resources can be mapped into multiple contexts as well as the same * context multiple times. * As kbase_va_region is refcounted, we guarantee that it will be available * for the duration of the external resource, meaning it is sufficient to use * it to rederive any additional data, like the GPU address. * This metadata structure binds a single external resource to a single * context, ensuring that per context mapping is tracked separately so it can * be overridden when needed and abuses by the application (freeing the resource * multiple times) don't effect the refcount of the physical allocation. */ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; struct kbase_va_region *reg; u32 ref; }; enum kbase_reg_access_type { REG_READ, REG_WRITE }; enum kbase_share_attr_bits { /* (1ULL << 8) bit is reserved */ SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ }; /** * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. * @kbdev: kbase device * * Return: true if the device access are coherent, false if not. */ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) { if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || (kbdev->system_coherency == COHERENCY_ACE)) return true; return false; } /** * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock * region, as a logarithm * * @gpu_props: GPU properties * * Return: the minimum size of the MMU lock region as dictated by the corresponding * arch spec. */ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) { if (gpu_props->gpu_id.product_model >= GPU_ID_MODEL_MAKE(12, 0)) return 12; /* 4 kB */ return 15; /* 32 kB */ } /* Conversion helpers for setting up high resolution timers */ #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x)) * 1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) #endif /* _KBASE_DEFS_H_ */